Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +19 -3
Dockerfile
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
FROM vllm/vllm-openai:v0.10.0
|
3 |
# FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04
|
4 |
|
5 |
-
ENV OMP_NUM_THREADS=2
|
6 |
# ENV VLLM_LOGGING_LEVEL=DEBUG
|
7 |
# ENV HF_HOME=/tmp/.cache/huggingface
|
8 |
# ENV OMP_NUM_THREADS=1
|
@@ -52,8 +51,25 @@ ENV OMP_NUM_THREADS=2
|
|
52 |
EXPOSE 7860
|
53 |
|
54 |
# Export for runtime environment
|
55 |
-
CMD vllm serve \
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
--task generate \
|
58 |
--revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|
59 |
--code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|
|
|
2 |
FROM vllm/vllm-openai:v0.10.0
|
3 |
# FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04
|
4 |
|
|
|
5 |
# ENV VLLM_LOGGING_LEVEL=DEBUG
|
6 |
# ENV HF_HOME=/tmp/.cache/huggingface
|
7 |
# ENV OMP_NUM_THREADS=1
|
|
|
51 |
EXPOSE 7860
|
52 |
|
53 |
# Export for runtime environment
|
54 |
+
# CMD vllm serve \
|
55 |
+
# --model "meta-llama/Llama-3.2-3B-Instruct" \
|
56 |
+
# --task generate \
|
57 |
+
# --revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|
58 |
+
# --code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|
59 |
+
# --tokenizer-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|
60 |
+
# --seed 42 \
|
61 |
+
# --host 0.0.0.0 \
|
62 |
+
# --port 7860 \
|
63 |
+
# --max-num-batched-tokens 32768 \
|
64 |
+
# --max-model-len 32768 \
|
65 |
+
# --dtype float16 \
|
66 |
+
# --enforce-eager \
|
67 |
+
# --gpu-memory-utilization 0.9 \
|
68 |
+
# --enable-prefix-caching \
|
69 |
+
# --disable-log-requests \
|
70 |
+
# --trust-remote-code
|
71 |
+
|
72 |
+
CMD --model "meta-llama/Llama-3.2-3B-Instruct" \
|
73 |
--task generate \
|
74 |
--revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|
75 |
--code-revision "0cb88a4f764b7a12671c53f0838cd831a0843b95" \
|