Spaces:

Luigi
/

SmolVLM2-on-llama.cpp

Running

Luigi commited on Jun 16

Commit

45c2159

1 Parent(s): 238a95a

apply in-memory encoding instead of temp files

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,12 +2,12 @@ import time
 import logging
 import gradio as gr
 import cv2
-import tempfile
 import os
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 from llama_cpp.llama_chat_format import Llava15ChatHandler
 # ----------------------------------------
 # Model configurations: per-size prefixes and repos
@@ -110,24 +110,23 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
     llm = model_cache['llm']
     time.sleep(interval_ms / 1000)
     img = cv2.resize(frame.copy(), (384, 384))
-    with tempfile.NamedTemporaryFile(suffix='.jpg') as tmp:
-        cv2.imwrite(tmp.name, img)
-        uri = Path(tmp.name).absolute().as_uri()
-        messages = [
-            {"role": "system", "content": sys_prompt},
-            {"role": "user",   "content": [
-                {"type": "image_url", "image_url": uri},
-                {"type": "text",      "text": usr_prompt}
-            ]}
-        ]
-        # re-init handler
-        llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
-        resp = llm.create_chat_completion(
-            messages=messages,
-            max_tokens=128,
-            temperature=0.1,
-            stop=["<end_of_utterance>"]
-        )
     import gc
     gc.collect()

 import logging
 import gradio as gr
 import cv2
 import os
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 from llama_cpp.llama_chat_format import Llava15ChatHandler
+import base64
 # ----------------------------------------
 # Model configurations: per-size prefixes and repos
     llm = model_cache['llm']
     time.sleep(interval_ms / 1000)
     img = cv2.resize(frame.copy(), (384, 384))
+    success, jpeg = cv2.imencode('.jpg', img)
+    uri = 'data:image/jpeg;base64,' + base64.b64encode(jpeg.tobytes()).decode()
+    messages = [
+        {"role": "system", "content": sys_prompt},
+        {"role": "user",   "content": [
+            {"type": "image_url", "image_url": uri},
+            {"type": "text",      "text": usr_prompt}
+        ]}
+    ]
+    # re-init handler
+    llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
+    resp = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=128,
+        temperature=0.1,
+        stop=["<end_of_utterance>"]
+    )
     import gc
     gc.collect()