Luigi commited on
Commit
2881733
·
1 Parent(s): 4decc4b

add vebose mode switch

Browse files
Files changed (1) hide show
  1. app.py +48 -38
app.py CHANGED
@@ -45,6 +45,7 @@ model_cache = {
45
  'size': None,
46
  'model_file': None,
47
  'clip_file': None,
 
48
  'llm': None
49
  }
50
 
@@ -86,19 +87,22 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
86
  "{% if add_generation_prompt %}Assistant:{% endif %}"
87
  )
88
 
89
- # Load and cache LLM (only on dropdown change)
90
-
91
- def update_llm(size, model_file, clip_file):
92
- if (model_cache['size'], model_cache['model_file'], model_cache['clip_file']) != (size, model_file, clip_file):
93
  mf, cf = ensure_weights(size, model_file, clip_file)
94
- handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=False)
95
- llm = Llama(model_path=mf, chat_handler=handler, n_ctx=8192,
96
- verbose=False, n_threads=max(2, os.cpu_count()))
97
- model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'llm': llm})
98
- return None # no UI output
 
 
 
 
 
99
 
100
  # Build weight filename lists
101
-
102
  def get_weight_files(size):
103
  cfg = MODELS[size]
104
  model_files = [f"{cfg['model_prefix']}.{v}.gguf" for v in cfg['model_variants']]
@@ -106,14 +110,13 @@ def get_weight_files(size):
106
  return model_files, clip_files
107
 
108
  # Caption using cached llm with real-time debug logs
109
-
110
- def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, usr_prompt,
111
- reset_clip: bool):
112
  debug_msgs = []
113
  timestamp = time.strftime('%H:%M:%S')
 
 
114
  debug_msgs.append(f"[{timestamp}] Received frame shape: {frame.shape}")
115
 
116
- # show which weight files we’re using this run
117
  timestamp = time.strftime('%H:%M:%S')
118
  debug_msgs.append(f"[{timestamp}] Using model weights: {model_file}")
119
  debug_msgs.append(f"[{timestamp}] Using CLIP weights: {clip_file}")
@@ -145,9 +148,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
145
 
146
  timestamp = time.strftime('%H:%M:%S')
147
  debug_msgs.append(f"[{timestamp}] Sending prompt of length {len(usr_prompt)} to LLM")
148
- # re-init handler for image
149
  if reset_clip:
150
- model_cache['llm'].chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
151
  timestamp = time.strftime('%H:%M:%S')
152
  debug_msgs.append(f"[{timestamp}] Reinitialized chat handler")
153
 
@@ -176,10 +178,10 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
176
  return content, "\n".join(debug_msgs)
177
 
178
  # Gradio UI
179
-
180
  def main():
181
  logging.basicConfig(level=logging.INFO)
182
  default = '2.2B'
 
183
  mf, cf = get_weight_files(default)
184
 
185
  with gr.Blocks() as demo:
@@ -188,38 +190,46 @@ def main():
188
  size_dd = gr.Dropdown(list(MODELS.keys()), value=default, label='Model Size')
189
  model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
190
  clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
 
191
 
192
- # When size changes: update dropdowns AND preload llm with the new first weights
193
- def on_size_change(sz):
194
  mlist, clist = get_weight_files(sz)
195
- # update dropdown choices and default values
196
- update_ui = (
197
- gr.update(choices=mlist, value=mlist[0]),
198
- gr.update(choices=clist, value=clist[0])
199
- )
200
- # preload with first weights
201
- update_llm(sz, mlist[0], clist[0])
202
- return update_ui
203
  size_dd.change(
204
  fn=on_size_change,
205
- inputs=[size_dd],
206
  outputs=[model_dd, clip_dd]
207
  )
208
- model_dd.change(lambda sz, mf, cf: update_llm(sz, mf, cf), inputs=[size_dd, model_dd, clip_dd], outputs=[])
209
- clip_dd.change(lambda sz, mf, cf: update_llm(sz, mf, cf), inputs=[size_dd, model_dd, clip_dd], outputs=[])
210
- update_llm(default, mf[0], cf[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
213
- sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
214
- usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
215
  reset_clip = gr.Checkbox(value=True, label="Reset CLIP handler each frame")
216
- cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
217
- cap = gr.Textbox(interactive=False, label='Caption')
218
- log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
219
 
220
  cam.stream(
221
  fn=caption_frame,
222
- inputs=[cam, size_dd, model_dd, clip_dd, interval, sys_p, usr_p, reset_clip],
223
  outputs=[cap, log_box],
224
  time_limit=600,
225
  )
 
45
  'size': None,
46
  'model_file': None,
47
  'clip_file': None,
48
+ 'verbose': None,
49
  'llm': None
50
  }
51
 
 
87
  "{% if add_generation_prompt %}Assistant:{% endif %}"
88
  )
89
 
90
+ # Load and cache LLM (only on dropdown or verbose change)
91
+ def update_llm(size, model_file, clip_file, verbose_mode):
92
+ if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
 
93
  mf, cf = ensure_weights(size, model_file, clip_file)
94
+ handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
95
+ llm = Llama(
96
+ model_path=mf,
97
+ chat_handler=handler,
98
+ n_ctx=8192,
99
+ verbose=verbose_mode,
100
+ n_threads=max(2, os.cpu_count())
101
+ )
102
+ model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'llm': llm})
103
+ return None
104
 
105
  # Build weight filename lists
 
106
  def get_weight_files(size):
107
  cfg = MODELS[size]
108
  model_files = [f"{cfg['model_prefix']}.{v}.gguf" for v in cfg['model_variants']]
 
110
  return model_files, clip_files
111
 
112
  # Caption using cached llm with real-time debug logs
113
+ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, usr_prompt, reset_clip, verbose_mode):
 
 
114
  debug_msgs = []
115
  timestamp = time.strftime('%H:%M:%S')
116
+ debug_msgs.append(f"[{timestamp}] Verbose mode: {verbose_mode}")
117
+ timestamp = time.strftime('%H:%M:%S')
118
  debug_msgs.append(f"[{timestamp}] Received frame shape: {frame.shape}")
119
 
 
120
  timestamp = time.strftime('%H:%M:%S')
121
  debug_msgs.append(f"[{timestamp}] Using model weights: {model_file}")
122
  debug_msgs.append(f"[{timestamp}] Using CLIP weights: {clip_file}")
 
148
 
149
  timestamp = time.strftime('%H:%M:%S')
150
  debug_msgs.append(f"[{timestamp}] Sending prompt of length {len(usr_prompt)} to LLM")
 
151
  if reset_clip:
152
+ model_cache['llm'].chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=verbose_mode)
153
  timestamp = time.strftime('%H:%M:%S')
154
  debug_msgs.append(f"[{timestamp}] Reinitialized chat handler")
155
 
 
178
  return content, "\n".join(debug_msgs)
179
 
180
  # Gradio UI
 
181
  def main():
182
  logging.basicConfig(level=logging.INFO)
183
  default = '2.2B'
184
+ default_verbose = False
185
  mf, cf = get_weight_files(default)
186
 
187
  with gr.Blocks() as demo:
 
190
  size_dd = gr.Dropdown(list(MODELS.keys()), value=default, label='Model Size')
191
  model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
192
  clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
193
+ verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
194
 
195
+ def on_size_change(sz, verbose):
 
196
  mlist, clist = get_weight_files(sz)
197
+ update_llm(sz, mlist[0], clist[0], verbose)
198
+ return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
199
+
 
 
 
 
 
200
  size_dd.change(
201
  fn=on_size_change,
202
+ inputs=[size_dd, verbose_cb],
203
  outputs=[model_dd, clip_dd]
204
  )
205
+ model_dd.change(
206
+ fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
207
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb],
208
+ outputs=[]
209
+ )
210
+ clip_dd.change(
211
+ fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
212
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb],
213
+ outputs=[]
214
+ )
215
+ verbose_cb.change(
216
+ fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
217
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb],
218
+ outputs=[]
219
+ )
220
+ update_llm(default, mf[0], cf[0], default_verbose)
221
 
222
+ interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
223
+ sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
224
+ usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
225
  reset_clip = gr.Checkbox(value=True, label="Reset CLIP handler each frame")
226
+ cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
227
+ cap = gr.Textbox(interactive=False, label='Caption')
228
+ log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
229
 
230
  cam.stream(
231
  fn=caption_frame,
232
+ inputs=[cam, size_dd, model_dd, clip_dd, interval, sys_p, usr_p, reset_clip, verbose_cb],
233
  outputs=[cap, log_box],
234
  time_limit=600,
235
  )