Luigi commited on
Commit
65efb90
·
1 Parent(s): 3be8e88

default to smallest model with q8 prcision, enable verbose mode, disable reset clip

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -21,7 +21,7 @@ MODELS = {
21
  "clip_repo": "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF",
22
  "model_prefix": "SmolVLM2-256M-Video-Instruct",
23
  "clip_prefix": "mmproj-SmolVLM2-256M-Video-Instruct",
24
- "model_variants": ["Q2_K","Q8_0", "f16"],
25
  "clip_variants": ["Q8_0", "f16"],
26
  },
27
  "500M": {
@@ -29,7 +29,7 @@ MODELS = {
29
  "clip_repo": "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF",
30
  "model_prefix": "SmolVLM2-500M-Video-Instruct",
31
  "clip_prefix": "mmproj-SmolVLM2-500M-Video-Instruct",
32
- "model_variants": ["Q2_K","Q8_0", "f16"],
33
  "clip_variants": ["Q8_0", "f16"],
34
  },
35
  "2.2B": {
@@ -37,7 +37,7 @@ MODELS = {
37
  "clip_repo": "ggml-org/SmolVLM2-2.2B-Instruct-GGUF",
38
  "model_prefix": "SmolVLM2-2.2B-Instruct",
39
  "clip_prefix": "mmproj-SmolVLM2-2.2B-Instruct",
40
- "model_variants": ["Q2_K","Q4_K_M", "Q8_0", "f16"],
41
  "clip_variants": ["Q8_0", "f16"],
42
  },
43
  }
@@ -194,8 +194,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
194
  # Gradio UI
195
  def main():
196
  logging.basicConfig(level=logging.INFO)
197
- default = '2.2B'
198
- default_verbose = False
199
  mf, cf = get_weight_files(default)
200
 
201
  with gr.Blocks() as demo:
@@ -236,7 +236,7 @@ def main():
236
  interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
237
  sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
238
  usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
239
- reset_clip = gr.Checkbox(value=True, label="Reset CLIP handler each frame")
240
  cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
241
  cap = gr.Textbox(interactive=False, label='Caption')
242
  log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
 
21
  "clip_repo": "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF",
22
  "model_prefix": "SmolVLM2-256M-Video-Instruct",
23
  "clip_prefix": "mmproj-SmolVLM2-256M-Video-Instruct",
24
+ "model_variants": ["Q8_0", "Q2_K", "f16"],
25
  "clip_variants": ["Q8_0", "f16"],
26
  },
27
  "500M": {
 
29
  "clip_repo": "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF",
30
  "model_prefix": "SmolVLM2-500M-Video-Instruct",
31
  "clip_prefix": "mmproj-SmolVLM2-500M-Video-Instruct",
32
+ "model_variants": ["Q8_0", "Q2_K", "f16"],
33
  "clip_variants": ["Q8_0", "f16"],
34
  },
35
  "2.2B": {
 
37
  "clip_repo": "ggml-org/SmolVLM2-2.2B-Instruct-GGUF",
38
  "model_prefix": "SmolVLM2-2.2B-Instruct",
39
  "clip_prefix": "mmproj-SmolVLM2-2.2B-Instruct",
40
+ "model_variants": ["Q8_0", "Q2_K","Q4_K_M", "f16"],
41
  "clip_variants": ["Q8_0", "f16"],
42
  },
43
  }
 
194
  # Gradio UI
195
  def main():
196
  logging.basicConfig(level=logging.INFO)
197
+ default = '256M'
198
+ default_verbose = True
199
  mf, cf = get_weight_files(default)
200
 
201
  with gr.Blocks() as demo:
 
236
  interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
237
  sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
238
  usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
239
+ reset_clip = gr.Checkbox(value=False, label="Reset CLIP handler each frame")
240
  cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
241
  cap = gr.Textbox(interactive=False, label='Caption')
242
  log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')