yusufs commited on
Commit
55e5b94
·
verified ·
1 Parent(s): f918ab8

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +6 -6
Dockerfile CHANGED
@@ -7,18 +7,18 @@ ENV OMP_NUM_THREADS=1
7
  ENV VLLM_USE_TRITON_FLASH_ATTN=0
8
  ENV VLLM_ATTENTION_BACKEND=XFORMERS
9
 
10
- RUN mkdir -p /tmp/.cache/huggingface
11
-
12
- # e.g. install the `audio` optional dependencies
13
- # NOTE: Make sure the version of vLLM matches the base image!
14
- RUN uv pip install --system vllm[audio]==0.10.0
15
-
16
  # Create a user and group with the specified ID
17
  RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
18
 
19
  # Now, switch to the newly created user
20
  USER myuser
21
 
 
 
 
 
 
 
22
  ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
23
 
24
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
 
7
  ENV VLLM_USE_TRITON_FLASH_ATTN=0
8
  ENV VLLM_ATTENTION_BACKEND=XFORMERS
9
 
 
 
 
 
 
 
10
  # Create a user and group with the specified ID
11
  RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
12
 
13
  # Now, switch to the newly created user
14
  USER myuser
15
 
16
+ RUN mkdir -p /tmp/.cache/huggingface
17
+
18
+ # e.g. install the `audio` optional dependencies
19
+ # NOTE: Make sure the version of vLLM matches the base image!
20
+ RUN uv pip install --system vllm[audio]==0.10.0
21
+
22
  ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
23
 
24
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04