Spaces:

yusufs
/

llama32-3b-instruct

Paused

yusufs commited on 16 days ago

Commit

55e5b94

verified ·

1 Parent(s): f918ab8

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -7,18 +7,18 @@ ENV OMP_NUM_THREADS=1
 ENV VLLM_USE_TRITON_FLASH_ATTN=0
 ENV VLLM_ATTENTION_BACKEND=XFORMERS
-RUN mkdir -p /tmp/.cache/huggingface
-# e.g. install the `audio` optional dependencies
-# NOTE: Make sure the version of vLLM matches the base image!
-RUN uv pip install --system vllm[audio]==0.10.0
 # Create a user and group with the specified ID
 RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
 # Now, switch to the newly created user
 USER myuser
 ENTRYPOINT ["/bin/bash",  "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
 # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04

 ENV VLLM_USE_TRITON_FLASH_ATTN=0
 ENV VLLM_ATTENTION_BACKEND=XFORMERS
 # Create a user and group with the specified ID
 RUN groupadd -r myuser --gid 1000 && useradd -r -g myuser --uid 1000 myuser
 # Now, switch to the newly created user
 USER myuser
+RUN mkdir -p /tmp/.cache/huggingface
+# e.g. install the `audio` optional dependencies
+# NOTE: Make sure the version of vLLM matches the base image!
+RUN uv pip install --system vllm[audio]==0.10.0
 ENTRYPOINT ["/bin/bash",  "-c", "vllm serve meta-llama/Llama-3.2-3B-Instruct --task generate --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --code-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --tokenizer-revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 --seed 42 --host 0.0.0.0 --port 7860 --max-num-batched-tokens 32768 --max-model-len 32768 --dtype float16 --enforce-eager --gpu-memory-utilization 0.9 --enable-prefix-caching --disable-log-requests --trust-remote-code"]
 # # FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04