yusufs commited on
Commit
d968727
·
verified ·
1 Parent(s): b2bcd0b

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +19 -22
Dockerfile CHANGED
@@ -1,32 +1,29 @@
1
 
2
  FROM vllm/vllm-openai:v0.10.0
3
 
 
4
  # e.g. install the `audio` optional dependencies
5
  # NOTE: Make sure the version of vLLM matches the base image!
6
  RUN uv pip install --system vllm[audio]==0.10.0
7
 
8
- ENTRYPOINT [
9
- "python3",
10
- "-m",
11
- "vllm.entrypoints.openai.api_server",
12
- "meta-llama/Llama-3.2-3B-Instruct"
13
- ]
14
- # CMD "meta-llama/Llama-3.2-3B-Instruct" \
15
- # --task generate \
16
- # --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
17
- # --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
18
- # --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
19
- # --seed 42 \
20
- # --host 0.0.0.0 \
21
- # --port 7860 \
22
- # --max-num-batched-tokens 32768 \
23
- # --max-model-len 32768 \
24
- # --dtype float16 \
25
- # --enforce-eager \
26
- # --gpu-memory-utilization 0.9 \
27
- # --enable-prefix-caching \
28
- # --disable-log-requests \
29
- # --trust-remote-code
30
 
31
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
32
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04
 
1
 
2
  FROM vllm/vllm-openai:v0.10.0
3
 
4
+
5
  # e.g. install the `audio` optional dependencies
6
  # NOTE: Make sure the version of vLLM matches the base image!
7
  RUN uv pip install --system vllm[audio]==0.10.0
8
 
9
+ ENTRYPOINT ["/bin/bash"]
10
+
11
+ CMD vllm serve "meta-llama/Llama-3.2-3B-Instruct" \
12
+ --task generate \
13
+ --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
14
+ --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
15
+ --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
16
+ --seed 42 \
17
+ --host 0.0.0.0 \
18
+ --port 7860 \
19
+ --max-num-batched-tokens 32768 \
20
+ --max-model-len 32768 \
21
+ --dtype float16 \
22
+ --enforce-eager \
23
+ --gpu-memory-utilization 0.9 \
24
+ --enable-prefix-caching \
25
+ --disable-log-requests \
26
+ --trust-remote-code
 
 
 
 
27
 
28
  # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
29
  # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04