yusufs commited on
Commit
7b16e9f
·
verified ·
1 Parent(s): 4bd51f5

fix(Dockerfile): use cmd single line

Browse files
Files changed (1) hide show
  1. Dockerfile +17 -20
Dockerfile CHANGED
@@ -16,23 +16,20 @@ FROM docker.io/vllm/vllm-openai:v0.10.0
16
 
17
  EXPOSE 7860
18
 
19
- ENTRYPOINT ["vllm", "serve"]
20
-
21
- CMD [
22
- "--model", "meta-llama/Llama-3.2-3B-Instruct",
23
- "--task", "generate",
24
- "--revision", "0cb88a4f764b7a12671c53f0838cd831a0843b95",
25
- "--code-revision", "0cb88a4f764b7a12671c53f0838cd831a0843b95",
26
- "--tokenizer-revision", "0cb88a4f764b7a12671c53f0838cd831a0843b95",
27
- "--seed", "42",
28
- "--host", "0.0.0.0",
29
- "--port", "7860",
30
- "--max-num-batched-tokens", "32768",
31
- "--max-model-len", "32768",
32
- "--dtype", "float16",
33
- "--enforce-eager",
34
- "--gpu-memory-utilization", "0.9",
35
- "--enable-prefix-caching",
36
- "--disable-log-requests",
37
- "--trust-remote-code"
38
- ]
 
16
 
17
  EXPOSE 7860
18
 
19
+ CMD vllm serve \
20
+ --model "meta-llama/Llama-3.2-3B-Instruct" \
21
+ --task generate \
22
+ --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
23
+ --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
24
+ --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
25
+ --seed 42 \
26
+ --host 0.0.0.0 \
27
+ --port 7860 \
28
+ --max-num-batched-tokens 32768 \
29
+ --max-model-len 32768 \
30
+ --dtype float16 \
31
+ --enforce-eager \
32
+ --gpu-memory-utilization 0.9 \
33
+ --enable-prefix-caching \
34
+ --disable-log-requests \
35
+ --trust-remote-code