yusufs commited on
Commit
bda8fe4
·
verified ·
1 Parent(s): 55e5b94

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +4 -4
Dockerfile CHANGED
@@ -7,6 +7,10 @@ ENV OMP_NUM_THREADS=1
7
  ENV VLLM_USE_TRITON_FLASH_ATTN=0
8
  ENV VLLM_ATTENTION_BACKEND=XFORMERS
9
 
 
 
 
 
10
  # Create a user and group with the specified ID
11
  RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
12
 
@@ -15,10 +19,6 @@ USER myuser
15
 
16
  RUN mkdir -p /tmp/.cache/huggingface
17
 
18
- # e.g. install the `audio` optional dependencies
19
- # NOTE: Make sure the version of vLLM matches the base image!
20
- RUN uv pip install --system vllm[audio]==0.10.0
21
-
22
  ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
23
 
24
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
 
7
  ENV VLLM_USE_TRITON_FLASH_ATTN=0
8
  ENV VLLM_ATTENTION_BACKEND=XFORMERS
9
 
10
+ # e.g. install the `audio` optional dependencies
11
+ # NOTE: Make sure the version of vLLM matches the base image!
12
+ RUN uv pip install --system vllm[audio]==0.10.0
13
+
14
  # Create a user and group with the specified ID
15
  RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
16
 
 
19
 
20
  RUN mkdir -p /tmp/.cache/huggingface
21
 
 
 
 
 
22
  ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
23
 
24
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04