yusufs commited on
Commit
4dd2e29
·
verified ·
1 Parent(s): 46e845e

(feat:vllm serve) Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +22 -24
Dockerfile CHANGED
@@ -1,23 +1,7 @@
1
- FROM python:3.12.7-slim-bookworm
2
 
3
- RUN apt-get update && apt-get install -y \
4
- build-essential \
5
- git \
6
- curl \
7
- wget \
8
- unzip \
9
- gcc
10
-
11
- RUN useradd -m -u 1000 user
12
- USER user
13
- ENV PATH="/home/user/.local/bin:$PATH"
14
-
15
- WORKDIR /app
16
-
17
- COPY --chown=user ./requirements.txt requirements.txt
18
- RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu113
19
-
20
- COPY --chown=user . /app
21
 
22
  # Download at build time,
23
  # to ensure during restart we won't have to wait for the download from HF (only wait for docker pull).
@@ -35,8 +19,22 @@ COPY --chown=user . /app
35
 
36
  EXPOSE 7860
37
 
38
- #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
39
-
40
- RUN chmod +x /app/runner.sh
41
-
42
- CMD ["/app/runner.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM docker.io/vllm/vllm-openai:v0.10.0
2
 
3
+ ENV MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct"
4
+ ENV MODEL_REV="0cb88a4f764b7a12671c53f0838cd831a0843b95"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Download at build time,
7
  # to ensure during restart we won't have to wait for the download from HF (only wait for docker pull).
 
19
 
20
  EXPOSE 7860
21
 
22
+ CMD [
23
+ "vllm", "serve",
24
+ "--model", "$MODEL_NAME",
25
+ "--task", "generate",
26
+ "--revision", "$MODEL_REV",
27
+ "--code-revision", "$MODEL_REV",
28
+ "--tokenizer-revision", "$MODEL_REV",
29
+ "--seed", "42",
30
+ "--host", "0.0.0.0",
31
+ "--port", "7860",
32
+ "--max-num-batched-tokens", "32768",
33
+ "--max-model-len", "32768",
34
+ "--dtype", "float16",
35
+ "--enforce-eager",
36
+ "--gpu-memory-utilization", "0.9",
37
+ "--enable-prefix-caching",
38
+ "--disable-log-requests",
39
+ "--trust-remote-code"
40
+ ]