Spaces:

Luigi
/

SmolVLM2-on-llama.cpp

Running

App Files Files Community

Luigi commited on 28 days ago

Commit

65efb90

1 Parent(s): 3be8e88

default to smallest model with q8 prcision, enable verbose mode, disable reset clip

Browse files

Files changed (1) hide show

app.py +6 -6

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ MODELS = {
         "clip_repo":  "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF",
         "model_prefix": "SmolVLM2-256M-Video-Instruct",
         "clip_prefix":  "mmproj-SmolVLM2-256M-Video-Instruct",
-        "model_variants": ["Q2_K","Q8_0", "f16"],
         "clip_variants":  ["Q8_0", "f16"],
     },
     "500M": {
@@ -29,7 +29,7 @@ MODELS = {
         "clip_repo":  "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF",
         "model_prefix": "SmolVLM2-500M-Video-Instruct",
         "clip_prefix":  "mmproj-SmolVLM2-500M-Video-Instruct",
-        "model_variants": ["Q2_K","Q8_0", "f16"],
         "clip_variants":  ["Q8_0", "f16"],
     },
     "2.2B": {
@@ -37,7 +37,7 @@ MODELS = {
         "clip_repo":  "ggml-org/SmolVLM2-2.2B-Instruct-GGUF",
         "model_prefix": "SmolVLM2-2.2B-Instruct",
         "clip_prefix":  "mmproj-SmolVLM2-2.2B-Instruct",
-        "model_variants": ["Q2_K","Q4_K_M", "Q8_0", "f16"],
         "clip_variants":  ["Q8_0", "f16"],
     },
 }
@@ -194,8 +194,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
 # Gradio UI
 def main():
     logging.basicConfig(level=logging.INFO)
-    default = '2.2B'
-    default_verbose = False
     mf, cf = get_weight_files(default)
     with gr.Blocks() as demo:
@@ -236,7 +236,7 @@ def main():
         interval   = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
         sys_p      = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
         usr_p      = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
-        reset_clip = gr.Checkbox(value=True, label="Reset CLIP handler each frame")
         cam        = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
         cap        = gr.Textbox(interactive=False, label='Caption')
         log_box    = gr.Textbox(lines=8, interactive=False, label='Debug Log')

         "clip_repo":  "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF",
         "model_prefix": "SmolVLM2-256M-Video-Instruct",
         "clip_prefix":  "mmproj-SmolVLM2-256M-Video-Instruct",
+        "model_variants": ["Q8_0", "Q2_K", "f16"],
         "clip_variants":  ["Q8_0", "f16"],
     },
     "500M": {
         "clip_repo":  "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF",
         "model_prefix": "SmolVLM2-500M-Video-Instruct",
         "clip_prefix":  "mmproj-SmolVLM2-500M-Video-Instruct",
+        "model_variants": ["Q8_0", "Q2_K", "f16"],
         "clip_variants":  ["Q8_0", "f16"],
     },
     "2.2B": {
         "clip_repo":  "ggml-org/SmolVLM2-2.2B-Instruct-GGUF",
         "model_prefix": "SmolVLM2-2.2B-Instruct",
         "clip_prefix":  "mmproj-SmolVLM2-2.2B-Instruct",
+        "model_variants": ["Q8_0", "Q2_K","Q4_K_M", "f16"],
         "clip_variants":  ["Q8_0", "f16"],
     },
 }
 # Gradio UI
 def main():
     logging.basicConfig(level=logging.INFO)
+    default = '256M'
+    default_verbose = True
     mf, cf = get_weight_files(default)
     with gr.Blocks() as demo:
         interval   = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
         sys_p      = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
         usr_p      = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
+        reset_clip = gr.Checkbox(value=False, label="Reset CLIP handler each frame")
         cam        = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
         cap        = gr.Textbox(interactive=False, label='Caption')
         log_box    = gr.Textbox(lines=8, interactive=False, label='Debug Log')