Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +7 -0
Dockerfile
CHANGED
@@ -4,6 +4,12 @@ ENV VLLM_LOGGING_LEVEL=DEBUG
|
|
4 |
ENV HF_HOME=/tmp/.cache/huggingface
|
5 |
ENV OMP_NUM_THREADS=1
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
RUN apt-get update && apt-get install -y python3 python3-pip git
|
9 |
RUN pip3 install --upgrade pip
|
@@ -32,6 +38,7 @@ RUN mkdir -p /tmp/.cache/huggingface
|
|
32 |
|
33 |
EXPOSE 7860
|
34 |
|
|
|
35 |
CMD python3 -m vllm.entrypoints.openai.api_server \
|
36 |
--model "meta-llama/Llama-3.2-3B-Instruct" \
|
37 |
--task generate \
|
|
|
4 |
ENV HF_HOME=/tmp/.cache/huggingface
|
5 |
ENV OMP_NUM_THREADS=1
|
6 |
|
7 |
+
# https://github.com/vllm-project/vllm/blob/v0.10.0/docs/getting_started/installation/gpu/rocm.inc.md?plain=1#L124
|
8 |
+
ENV VLLM_USE_TRITON_FLASH_ATTN=0
|
9 |
+
|
10 |
+
# https://github.com/vllm-project/vllm/blob/v0.10.0/docs/getting_started/quickstart.md?plain=1#L213
|
11 |
+
# `FLASH_ATTN` or `FLASHINFER` or `XFORMERS`.
|
12 |
+
ENV VLLM_ATTENTION_BACKEND=FLASH_ATTN
|
13 |
|
14 |
RUN apt-get update && apt-get install -y python3 python3-pip git
|
15 |
RUN pip3 install --upgrade pip
|
|
|
38 |
|
39 |
EXPOSE 7860
|
40 |
|
41 |
+
# Export for runtime environment
|
42 |
CMD python3 -m vllm.entrypoints.openai.api_server \
|
43 |
--model "meta-llama/Llama-3.2-3B-Instruct" \
|
44 |
--task generate \
|