Spaces:
Running
Running
update dockerfile and app.py
Browse files- Dockerfile +16 -16
- app.py +6 -24
Dockerfile
CHANGED
@@ -5,7 +5,7 @@ FROM ubuntu:22.04
|
|
5 |
ENV DEBIAN_FRONTEND=noninteractive
|
6 |
ENV TZ=Etc/UTC
|
7 |
|
8 |
-
# Configure
|
9 |
ENV XDG_CACHE_HOME=/tmp/.cache
|
10 |
ENV HF_HOME=/tmp/.cache/huggingface
|
11 |
|
@@ -17,7 +17,7 @@ RUN echo "tzdata tzdata/Areas select Etc" > /tmp/tzdata.seed && \
|
|
17 |
echo "tzdata tzdata/Zones/Etc select UTC" >> /tmp/tzdata.seed && \
|
18 |
debconf-set-selections /tmp/tzdata.seed
|
19 |
|
20 |
-
# 1
|
21 |
RUN echo "### STEP 1: Installing OS-level dependencies" && \
|
22 |
apt-get update && \
|
23 |
apt-get install -y --no-install-recommends \
|
@@ -32,28 +32,28 @@ RUN echo "### STEP 1: Installing OS-level dependencies" && \
|
|
32 |
python3-opencv && \
|
33 |
rm -rf /var/lib/apt/lists/*
|
34 |
|
35 |
-
# 2
|
36 |
WORKDIR /app
|
37 |
COPY requirements.txt ./
|
38 |
COPY app.py ./
|
39 |
-
#
|
40 |
|
41 |
-
# 3
|
42 |
-
RUN echo "### STEP
|
43 |
-
|
44 |
-
|
45 |
|
46 |
-
# 4
|
47 |
-
RUN echo "### STEP
|
48 |
mkdir -p "$XDG_CACHE_HOME" "$HF_HOME" && \
|
49 |
-
chmod -R a+rwX "$XDG_CACHE_HOME"
|
50 |
|
51 |
-
# 5
|
52 |
-
RUN echo "### STEP
|
53 |
export CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" && \
|
54 |
-
|
55 |
|
56 |
-
# 6
|
57 |
-
RUN echo "### STEP
|
58 |
EXPOSE 7860
|
59 |
CMD ["python3", "app.py"]
|
|
|
5 |
ENV DEBIAN_FRONTEND=noninteractive
|
6 |
ENV TZ=Etc/UTC
|
7 |
|
8 |
+
# Configure cache paths to a writable /tmp location
|
9 |
ENV XDG_CACHE_HOME=/tmp/.cache
|
10 |
ENV HF_HOME=/tmp/.cache/huggingface
|
11 |
|
|
|
17 |
echo "tzdata tzdata/Zones/Etc select UTC" >> /tmp/tzdata.seed && \
|
18 |
debconf-set-selections /tmp/tzdata.seed
|
19 |
|
20 |
+
# STEP 1: Install OS-level dependencies
|
21 |
RUN echo "### STEP 1: Installing OS-level dependencies" && \
|
22 |
apt-get update && \
|
23 |
apt-get install -y --no-install-recommends \
|
|
|
32 |
python3-opencv && \
|
33 |
rm -rf /var/lib/apt/lists/*
|
34 |
|
35 |
+
# STEP 2: Prepare application directory and copy source code
|
36 |
WORKDIR /app
|
37 |
COPY requirements.txt ./
|
38 |
COPY app.py ./
|
39 |
+
# COPY any other source files or directories needed by your app
|
40 |
|
41 |
+
# STEP 3: Install Python dependencies (ensure huggingface_hub is listed)
|
42 |
+
RUN echo "### STEP 3: Installing Python dependencies" && \
|
43 |
+
python3 -m pip install --upgrade pip && \
|
44 |
+
pip install --no-cache-dir -r requirements.txt
|
45 |
|
46 |
+
# STEP 4: Ensure cache directories are writable
|
47 |
+
RUN echo "### STEP 4: Creating and permissioning cache directories" && \
|
48 |
mkdir -p "$XDG_CACHE_HOME" "$HF_HOME" && \
|
49 |
+
chmod -R a+rwX "$XDG_CACHE_HOME" "$HF_HOME"
|
50 |
|
51 |
+
# STEP 5: Build and install llama-cpp-python from source with OpenBLAS
|
52 |
+
RUN echo "### STEP 5: Building llama-cpp-python with OpenBLAS" && \
|
53 |
export CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" && \
|
54 |
+
pip install --no-cache-dir --force-reinstall --no-binary llama-cpp-python llama-cpp-python==0.2.0
|
55 |
|
56 |
+
# STEP 6: Finalize and launch the application
|
57 |
+
RUN echo "### STEP 6: Finalizing Docker image"
|
58 |
EXPOSE 7860
|
59 |
CMD ["python3", "app.py"]
|
app.py
CHANGED
@@ -12,7 +12,6 @@ import gc
|
|
12 |
import io
|
13 |
from contextlib import redirect_stdout, redirect_stderr
|
14 |
import sys, llama_cpp
|
15 |
-
import shutil
|
16 |
|
17 |
# ----------------------------------------
|
18 |
# Model configurations: per-size prefixes and repos
|
@@ -53,27 +52,12 @@ model_cache = {
|
|
53 |
'llm': None
|
54 |
}
|
55 |
|
56 |
-
# Helper to download
|
57 |
def ensure_weights(cfg, model_file, clip_file):
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
try:
|
63 |
-
os.symlink(path, model_file)
|
64 |
-
except (PermissionError, OSError):
|
65 |
-
print(f"⚠️ symlink failed, copying {path} → {model_file}")
|
66 |
-
shutil.copy2(path, model_file)
|
67 |
-
|
68 |
-
# repeat for clip_file…
|
69 |
-
clip_path = hf_hub_download(repo_id=cfg['clip_repo'], filename=clip_file)
|
70 |
-
try:
|
71 |
-
os.symlink(clip_path, clip_file)
|
72 |
-
except (PermissionError, OSError):
|
73 |
-
print(f"⚠️ symlink failed, copying {clip_path} → {clip_file}")
|
74 |
-
shutil.copy2(clip_path, clip_file)
|
75 |
-
|
76 |
-
return model_file, clip_file
|
77 |
|
78 |
# Custom chat handler
|
79 |
class SmolVLM2ChatHandler(Llava15ChatHandler):
|
@@ -102,7 +86,7 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
102 |
# Load and cache LLM (only on dropdown or verbose change)
|
103 |
def update_llm(size, model_file, clip_file, verbose_mode):
|
104 |
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
|
105 |
-
mf, cf = ensure_weights(size, model_file, clip_file)
|
106 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
107 |
llm = Llama(
|
108 |
model_path=mf,
|
@@ -173,7 +157,6 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
173 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
174 |
|
175 |
t_start = time.time()
|
176 |
-
# right before you call the Llama API:
|
177 |
buf = io.StringIO()
|
178 |
with redirect_stdout(buf), redirect_stderr(buf):
|
179 |
resp = model_cache['llm'].create_chat_completion(
|
@@ -182,7 +165,6 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
182 |
temperature=0.1,
|
183 |
stop=["<end_of_utterance>"]
|
184 |
)
|
185 |
-
# grab every line the Llama client printed
|
186 |
for line in buf.getvalue().splitlines():
|
187 |
timestamp = time.strftime('%H:%M:%S')
|
188 |
debug_msgs.append(f"[{timestamp}] {line}")
|
|
|
12 |
import io
|
13 |
from contextlib import redirect_stdout, redirect_stderr
|
14 |
import sys, llama_cpp
|
|
|
15 |
|
16 |
# ----------------------------------------
|
17 |
# Model configurations: per-size prefixes and repos
|
|
|
52 |
'llm': None
|
53 |
}
|
54 |
|
55 |
+
# Helper to download weights and return their cache paths
|
56 |
def ensure_weights(cfg, model_file, clip_file):
|
57 |
+
# Download model and clip into HF cache (writable, e.g. /tmp/.cache)
|
58 |
+
model_path = hf_hub_download(repo_id=cfg['model_repo'], filename=model_file)
|
59 |
+
clip_path = hf_hub_download(repo_id=cfg['clip_repo'], filename=clip_file)
|
60 |
+
return model_path, clip_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
# Custom chat handler
|
63 |
class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
|
86 |
# Load and cache LLM (only on dropdown or verbose change)
|
87 |
def update_llm(size, model_file, clip_file, verbose_mode):
|
88 |
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
|
89 |
+
mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
|
90 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
91 |
llm = Llama(
|
92 |
model_path=mf,
|
|
|
157 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
158 |
|
159 |
t_start = time.time()
|
|
|
160 |
buf = io.StringIO()
|
161 |
with redirect_stdout(buf), redirect_stderr(buf):
|
162 |
resp = model_cache['llm'].create_chat_completion(
|
|
|
165 |
temperature=0.1,
|
166 |
stop=["<end_of_utterance>"]
|
167 |
)
|
|
|
168 |
for line in buf.getvalue().splitlines():
|
169 |
timestamp = time.strftime('%H:%M:%S')
|
170 |
debug_msgs.append(f"[{timestamp}] {line}")
|