Luigi commited on
Commit
957ece1
·
1 Parent(s): 08f659b

update dockerfile and app.py

Browse files
Files changed (2) hide show
  1. Dockerfile +16 -16
  2. app.py +6 -24
Dockerfile CHANGED
@@ -5,7 +5,7 @@ FROM ubuntu:22.04
5
  ENV DEBIAN_FRONTEND=noninteractive
6
  ENV TZ=Etc/UTC
7
 
8
- # Configure Hugging Face and XDG cache to use a writable /tmp directory
9
  ENV XDG_CACHE_HOME=/tmp/.cache
10
  ENV HF_HOME=/tmp/.cache/huggingface
11
 
@@ -17,7 +17,7 @@ RUN echo "tzdata tzdata/Areas select Etc" > /tmp/tzdata.seed && \
17
  echo "tzdata tzdata/Zones/Etc select UTC" >> /tmp/tzdata.seed && \
18
  debconf-set-selections /tmp/tzdata.seed
19
 
20
- # 1. Install OS-level dependencies (including pkg-config and git)
21
  RUN echo "### STEP 1: Installing OS-level dependencies" && \
22
  apt-get update && \
23
  apt-get install -y --no-install-recommends \
@@ -32,28 +32,28 @@ RUN echo "### STEP 1: Installing OS-level dependencies" && \
32
  python3-opencv && \
33
  rm -rf /var/lib/apt/lists/*
34
 
35
- # 2. Prepare application directory
36
  WORKDIR /app
37
  COPY requirements.txt ./
38
  COPY app.py ./
39
- # (Copy any other source files or directories needed)
40
 
41
- # 3. Install Python dependencies (excluding llama-cpp-python)
42
- RUN echo "### STEP 2: Installing Python dependencies" && \
43
- pip3 install --upgrade pip && \
44
- pip3 install --no-cache-dir -r requirements.txt
45
 
46
- # 4. Ensure cache directories are writable by runtime user
47
- RUN echo "### STEP 3: Creating cache directories" && \
48
  mkdir -p "$XDG_CACHE_HOME" "$HF_HOME" && \
49
- chmod -R a+rwX "$XDG_CACHE_HOME"
50
 
51
- # 5. Build and install llama-cpp-python from source with OpenBLAS
52
- RUN echo "### STEP 4: Building llama-cpp-python with OpenBLAS" && \
53
  export CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" && \
54
- pip3 install --no-cache-dir --force-reinstall --no-binary llama-cpp-python llama-cpp-python
55
 
56
- # 6. Finalize and launch the app
57
- RUN echo "### STEP 5: Finalizing Docker image"
58
  EXPOSE 7860
59
  CMD ["python3", "app.py"]
 
5
  ENV DEBIAN_FRONTEND=noninteractive
6
  ENV TZ=Etc/UTC
7
 
8
+ # Configure cache paths to a writable /tmp location
9
  ENV XDG_CACHE_HOME=/tmp/.cache
10
  ENV HF_HOME=/tmp/.cache/huggingface
11
 
 
17
  echo "tzdata tzdata/Zones/Etc select UTC" >> /tmp/tzdata.seed && \
18
  debconf-set-selections /tmp/tzdata.seed
19
 
20
+ # STEP 1: Install OS-level dependencies
21
  RUN echo "### STEP 1: Installing OS-level dependencies" && \
22
  apt-get update && \
23
  apt-get install -y --no-install-recommends \
 
32
  python3-opencv && \
33
  rm -rf /var/lib/apt/lists/*
34
 
35
+ # STEP 2: Prepare application directory and copy source code
36
  WORKDIR /app
37
  COPY requirements.txt ./
38
  COPY app.py ./
39
+ # COPY any other source files or directories needed by your app
40
 
41
+ # STEP 3: Install Python dependencies (ensure huggingface_hub is listed)
42
+ RUN echo "### STEP 3: Installing Python dependencies" && \
43
+ python3 -m pip install --upgrade pip && \
44
+ pip install --no-cache-dir -r requirements.txt
45
 
46
+ # STEP 4: Ensure cache directories are writable
47
+ RUN echo "### STEP 4: Creating and permissioning cache directories" && \
48
  mkdir -p "$XDG_CACHE_HOME" "$HF_HOME" && \
49
+ chmod -R a+rwX "$XDG_CACHE_HOME" "$HF_HOME"
50
 
51
+ # STEP 5: Build and install llama-cpp-python from source with OpenBLAS
52
+ RUN echo "### STEP 5: Building llama-cpp-python with OpenBLAS" && \
53
  export CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" && \
54
+ pip install --no-cache-dir --force-reinstall --no-binary llama-cpp-python llama-cpp-python==0.2.0
55
 
56
+ # STEP 6: Finalize and launch the application
57
+ RUN echo "### STEP 6: Finalizing Docker image"
58
  EXPOSE 7860
59
  CMD ["python3", "app.py"]
app.py CHANGED
@@ -12,7 +12,6 @@ import gc
12
  import io
13
  from contextlib import redirect_stdout, redirect_stderr
14
  import sys, llama_cpp
15
- import shutil
16
 
17
  # ----------------------------------------
18
  # Model configurations: per-size prefixes and repos
@@ -53,27 +52,12 @@ model_cache = {
53
  'llm': None
54
  }
55
 
56
- # Helper to download & symlink weights
57
  def ensure_weights(cfg, model_file, clip_file):
58
- # download into HF cache (now in /tmp/.cache)
59
- path = hf_hub_download(repo_id=cfg['model_repo'], filename=model_file)
60
-
61
- # try to link into your working dir, else copy
62
- try:
63
- os.symlink(path, model_file)
64
- except (PermissionError, OSError):
65
- print(f"⚠️ symlink failed, copying {path} → {model_file}")
66
- shutil.copy2(path, model_file)
67
-
68
- # repeat for clip_file…
69
- clip_path = hf_hub_download(repo_id=cfg['clip_repo'], filename=clip_file)
70
- try:
71
- os.symlink(clip_path, clip_file)
72
- except (PermissionError, OSError):
73
- print(f"⚠️ symlink failed, copying {clip_path} → {clip_file}")
74
- shutil.copy2(clip_path, clip_file)
75
-
76
- return model_file, clip_file
77
 
78
  # Custom chat handler
79
  class SmolVLM2ChatHandler(Llava15ChatHandler):
@@ -102,7 +86,7 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
102
  # Load and cache LLM (only on dropdown or verbose change)
103
  def update_llm(size, model_file, clip_file, verbose_mode):
104
  if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
105
- mf, cf = ensure_weights(size, model_file, clip_file)
106
  handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
107
  llm = Llama(
108
  model_path=mf,
@@ -173,7 +157,6 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
173
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
174
 
175
  t_start = time.time()
176
- # right before you call the Llama API:
177
  buf = io.StringIO()
178
  with redirect_stdout(buf), redirect_stderr(buf):
179
  resp = model_cache['llm'].create_chat_completion(
@@ -182,7 +165,6 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
182
  temperature=0.1,
183
  stop=["<end_of_utterance>"]
184
  )
185
- # grab every line the Llama client printed
186
  for line in buf.getvalue().splitlines():
187
  timestamp = time.strftime('%H:%M:%S')
188
  debug_msgs.append(f"[{timestamp}] {line}")
 
12
  import io
13
  from contextlib import redirect_stdout, redirect_stderr
14
  import sys, llama_cpp
 
15
 
16
  # ----------------------------------------
17
  # Model configurations: per-size prefixes and repos
 
52
  'llm': None
53
  }
54
 
55
+ # Helper to download weights and return their cache paths
56
  def ensure_weights(cfg, model_file, clip_file):
57
+ # Download model and clip into HF cache (writable, e.g. /tmp/.cache)
58
+ model_path = hf_hub_download(repo_id=cfg['model_repo'], filename=model_file)
59
+ clip_path = hf_hub_download(repo_id=cfg['clip_repo'], filename=clip_file)
60
+ return model_path, clip_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # Custom chat handler
63
  class SmolVLM2ChatHandler(Llava15ChatHandler):
 
86
  # Load and cache LLM (only on dropdown or verbose change)
87
  def update_llm(size, model_file, clip_file, verbose_mode):
88
  if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
89
+ mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
90
  handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
91
  llm = Llama(
92
  model_path=mf,
 
157
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
158
 
159
  t_start = time.time()
 
160
  buf = io.StringIO()
161
  with redirect_stdout(buf), redirect_stderr(buf):
162
  resp = model_cache['llm'].create_chat_completion(
 
165
  temperature=0.1,
166
  stop=["<end_of_utterance>"]
167
  )
 
168
  for line in buf.getvalue().splitlines():
169
  timestamp = time.strftime('%H:%M:%S')
170
  debug_msgs.append(f"[{timestamp}] {line}")