yusufs commited on
Commit
89c17e2
·
verified ·
1 Parent(s): b983fc2

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +19 -3
Dockerfile CHANGED
@@ -2,7 +2,6 @@
2
  FROM vllm/vllm-openai:v0.10.0
3
  # FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04
4
 
5
- ENV OMP_NUM_THREADS=2
6
  # ENV VLLM_LOGGING_LEVEL=DEBUG
7
  # ENV HF_HOME=/tmp/.cache/huggingface
8
  # ENV OMP_NUM_THREADS=1
@@ -52,8 +51,25 @@ ENV OMP_NUM_THREADS=2
52
  EXPOSE 7860
53
 
54
  # Export for runtime environment
55
- CMD vllm serve \
56
- --model "meta-llama/Llama-3.2-3B-Instruct" \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  --task generate \
58
  --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
59
  --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
 
2
  FROM vllm/vllm-openai:v0.10.0
3
  # FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04
4
 
 
5
  # ENV VLLM_LOGGING_LEVEL=DEBUG
6
  # ENV HF_HOME=/tmp/.cache/huggingface
7
  # ENV OMP_NUM_THREADS=1
 
51
  EXPOSE 7860
52
 
53
  # Export for runtime environment
54
+ # CMD vllm serve \
55
+ # --model "meta-llama/Llama-3.2-3B-Instruct" \
56
+ # --task generate \
57
+ # --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
58
+ # --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
59
+ # --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
60
+ # --seed 42 \
61
+ # --host 0.0.0.0 \
62
+ # --port 7860 \
63
+ # --max-num-batched-tokens 32768 \
64
+ # --max-model-len 32768 \
65
+ # --dtype float16 \
66
+ # --enforce-eager \
67
+ # --gpu-memory-utilization 0.9 \
68
+ # --enable-prefix-caching \
69
+ # --disable-log-requests \
70
+ # --trust-remote-code
71
+
72
+ CMD --model "meta-llama/Llama-3.2-3B-Instruct" \
73
  --task generate \
74
  --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
75
  --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \