Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +6 -6
Dockerfile
CHANGED
@@ -7,18 +7,18 @@ ENV OMP_NUM_THREADS=1
|
|
7 |
ENV VLLM_USE_TRITON_FLASH_ATTN=0
|
8 |
ENV VLLM_ATTENTION_BACKEND=XFORMERS
|
9 |
|
10 |
-
RUN mkdir -p /tmp/.cache/huggingface
|
11 |
-
|
12 |
-
# e.g. install the `audio` optional dependencies
|
13 |
-
# NOTE: Make sure the version of vLLM matches the base image!
|
14 |
-
RUN uv pip install --system vllm[audio]==0.10.0
|
15 |
-
|
16 |
# Create a user and group with the specified ID
|
17 |
RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
|
18 |
|
19 |
# Now, switch to the newly created user
|
20 |
USER myuser
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
|
23 |
|
24 |
# # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
|
|
7 |
ENV VLLM_USE_TRITON_FLASH_ATTN=0
|
8 |
ENV VLLM_ATTENTION_BACKEND=XFORMERS
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Create a user and group with the specified ID
|
11 |
RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
|
12 |
|
13 |
# Now, switch to the newly created user
|
14 |
USER myuser
|
15 |
|
16 |
+
RUN mkdir -p /tmp/.cache/huggingface
|
17 |
+
|
18 |
+
# e.g. install the `audio` optional dependencies
|
19 |
+
# NOTE: Make sure the version of vLLM matches the base image!
|
20 |
+
RUN uv pip install --system vllm[audio]==0.10.0
|
21 |
+
|
22 |
ENTRYPOINT ["/bin/bash", "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
|
23 |
|
24 |
# # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|