yusufs commited on
Commit
f6ddd47
·
verified ·
1 Parent(s): bc37efd

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +7 -0
Dockerfile CHANGED
@@ -4,6 +4,12 @@ ENV VLLM_LOGGING_LEVEL=DEBUG
4
  ENV HF_HOME=/tmp/.cache/huggingface
5
  ENV OMP_NUM_THREADS=1
6
 
 
 
 
 
 
 
7
 
8
  RUN apt-get update && apt-get install -y python3 python3-pip git
9
  RUN pip3 install --upgrade pip
@@ -32,6 +38,7 @@ RUN mkdir -p /tmp/.cache/huggingface
32
 
33
  EXPOSE 7860
34
 
 
35
  CMD python3 -m vllm.entrypoints.openai.api_server \
36
  --model "meta-llama/Llama-3.2-3B-Instruct" \
37
  --task generate \
 
4
  ENV HF_HOME=/tmp/.cache/huggingface
5
  ENV OMP_NUM_THREADS=1
6
 
7
+ # https://github.com/vllm-project/vllm/blob/v0.10.0/docs/getting_started/installation/gpu/rocm.inc.md?plain=1#L124
8
+ ENV VLLM_USE_TRITON_FLASH_ATTN=0
9
+
10
+ # https://github.com/vllm-project/vllm/blob/v0.10.0/docs/getting_started/quickstart.md?plain=1#L213
11
+ # `FLASH_ATTN` or `FLASHINFER` or `XFORMERS`.
12
+ ENV VLLM_ATTENTION_BACKEND=FLASH_ATTN
13
 
14
  RUN apt-get update && apt-get install -y python3 python3-pip git
15
  RUN pip3 install --upgrade pip
 
38
 
39
  EXPOSE 7860
40
 
41
+ # Export for runtime environment
42
  CMD python3 -m vllm.entrypoints.openai.api_server \
43
  --model "meta-llama/Llama-3.2-3B-Instruct" \
44
  --task generate \