Luigi commited on
Commit
5462ff3
·
1 Parent(s): a459bee

add ui component to allow user enabl or disable reset_clip per frame

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -107,7 +107,8 @@ def get_weight_files(size):
107
 
108
  # Caption using cached llm with real-time debug logs
109
 
110
- def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, usr_prompt):
 
111
  debug_msgs = []
112
  timestamp = time.strftime('%H:%M:%S')
113
  debug_msgs.append(f"[{timestamp}] Received frame shape: {frame.shape}")
@@ -145,10 +146,12 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
145
  timestamp = time.strftime('%H:%M:%S')
146
  debug_msgs.append(f"[{timestamp}] Sending prompt of length {len(usr_prompt)} to LLM")
147
  # re-init handler for image
148
- model_cache['llm'].chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
149
- timestamp = time.strftime('%H:%M:%S')
150
- debug_msgs.append(f"[{timestamp}] Reinitialized chat handler")
 
151
 
 
152
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
153
 
154
  t_start = time.time()
@@ -209,15 +212,16 @@ def main():
209
  interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
210
  sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
211
  usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
 
212
  cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
213
  cap = gr.Textbox(interactive=False, label='Caption')
214
  log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
215
 
216
  cam.stream(
217
  fn=caption_frame,
218
- inputs=[cam, size_dd, model_dd, clip_dd, interval, sys_p, usr_p],
219
  outputs=[cap, log_box],
220
- time_limit=600
221
  )
222
 
223
  demo.launch()
 
107
 
108
  # Caption using cached llm with real-time debug logs
109
 
110
+ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, usr_prompt,
111
+ reset_clip: bool):
112
  debug_msgs = []
113
  timestamp = time.strftime('%H:%M:%S')
114
  debug_msgs.append(f"[{timestamp}] Received frame shape: {frame.shape}")
 
146
  timestamp = time.strftime('%H:%M:%S')
147
  debug_msgs.append(f"[{timestamp}] Sending prompt of length {len(usr_prompt)} to LLM")
148
  # re-init handler for image
149
+ if reset_clip:
150
+ model_cache['llm'].chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
151
+ timestamp = time.strftime('%H:%M:%S')
152
+ debug_msgs.append(f"[{timestamp}] Reinitialized chat handler")
153
 
154
+ timestamp = time.strftime('%H:%M:%S')
155
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
156
 
157
  t_start = time.time()
 
212
  interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
213
  sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
214
  usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
215
+ reset_clip = gr.Checkbox(value=True, label="Reset CLIP handler each frame")
216
  cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
217
  cap = gr.Textbox(interactive=False, label='Caption')
218
  log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
219
 
220
  cam.stream(
221
  fn=caption_frame,
222
+ inputs=[cam, size_dd, model_dd, clip_dd, interval, sys_p, usr_p, reset_clip],
223
  outputs=[cap, log_box],
224
+ time_limit=600,
225
  )
226
 
227
  demo.launch()