Spaces:
Running
Running
default to smallest model with q8 prcision, enable verbose mode, disable reset clip
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ MODELS = {
|
|
21 |
"clip_repo": "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF",
|
22 |
"model_prefix": "SmolVLM2-256M-Video-Instruct",
|
23 |
"clip_prefix": "mmproj-SmolVLM2-256M-Video-Instruct",
|
24 |
-
"model_variants": ["
|
25 |
"clip_variants": ["Q8_0", "f16"],
|
26 |
},
|
27 |
"500M": {
|
@@ -29,7 +29,7 @@ MODELS = {
|
|
29 |
"clip_repo": "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF",
|
30 |
"model_prefix": "SmolVLM2-500M-Video-Instruct",
|
31 |
"clip_prefix": "mmproj-SmolVLM2-500M-Video-Instruct",
|
32 |
-
"model_variants": ["
|
33 |
"clip_variants": ["Q8_0", "f16"],
|
34 |
},
|
35 |
"2.2B": {
|
@@ -37,7 +37,7 @@ MODELS = {
|
|
37 |
"clip_repo": "ggml-org/SmolVLM2-2.2B-Instruct-GGUF",
|
38 |
"model_prefix": "SmolVLM2-2.2B-Instruct",
|
39 |
"clip_prefix": "mmproj-SmolVLM2-2.2B-Instruct",
|
40 |
-
"model_variants": ["
|
41 |
"clip_variants": ["Q8_0", "f16"],
|
42 |
},
|
43 |
}
|
@@ -194,8 +194,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
194 |
# Gradio UI
|
195 |
def main():
|
196 |
logging.basicConfig(level=logging.INFO)
|
197 |
-
default = '
|
198 |
-
default_verbose =
|
199 |
mf, cf = get_weight_files(default)
|
200 |
|
201 |
with gr.Blocks() as demo:
|
@@ -236,7 +236,7 @@ def main():
|
|
236 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
237 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|
238 |
usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
|
239 |
-
reset_clip = gr.Checkbox(value=
|
240 |
cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
|
241 |
cap = gr.Textbox(interactive=False, label='Caption')
|
242 |
log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
|
|
|
21 |
"clip_repo": "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF",
|
22 |
"model_prefix": "SmolVLM2-256M-Video-Instruct",
|
23 |
"clip_prefix": "mmproj-SmolVLM2-256M-Video-Instruct",
|
24 |
+
"model_variants": ["Q8_0", "Q2_K", "f16"],
|
25 |
"clip_variants": ["Q8_0", "f16"],
|
26 |
},
|
27 |
"500M": {
|
|
|
29 |
"clip_repo": "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF",
|
30 |
"model_prefix": "SmolVLM2-500M-Video-Instruct",
|
31 |
"clip_prefix": "mmproj-SmolVLM2-500M-Video-Instruct",
|
32 |
+
"model_variants": ["Q8_0", "Q2_K", "f16"],
|
33 |
"clip_variants": ["Q8_0", "f16"],
|
34 |
},
|
35 |
"2.2B": {
|
|
|
37 |
"clip_repo": "ggml-org/SmolVLM2-2.2B-Instruct-GGUF",
|
38 |
"model_prefix": "SmolVLM2-2.2B-Instruct",
|
39 |
"clip_prefix": "mmproj-SmolVLM2-2.2B-Instruct",
|
40 |
+
"model_variants": ["Q8_0", "Q2_K","Q4_K_M", "f16"],
|
41 |
"clip_variants": ["Q8_0", "f16"],
|
42 |
},
|
43 |
}
|
|
|
194 |
# Gradio UI
|
195 |
def main():
|
196 |
logging.basicConfig(level=logging.INFO)
|
197 |
+
default = '256M'
|
198 |
+
default_verbose = True
|
199 |
mf, cf = get_weight_files(default)
|
200 |
|
201 |
with gr.Blocks() as demo:
|
|
|
236 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
237 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|
238 |
usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
|
239 |
+
reset_clip = gr.Checkbox(value=False, label="Reset CLIP handler each frame")
|
240 |
cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
|
241 |
cap = gr.Textbox(interactive=False, label='Caption')
|
242 |
log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
|