Spaces:

Luigi
/

SmolVLM2-on-llama.cpp

Running

Luigi commited on 28 days ago

Commit

69c8775

1 Parent(s): 2881733

inject verbose message to debug window

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from llama_cpp import Llama
 from llama_cpp.llama_chat_format import Llava15ChatHandler
 import base64
 import gc
 # ----------------------------------------
 # Model configurations: per-size prefixes and repos
@@ -157,12 +159,20 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
     debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
     t_start = time.time()
-    resp = model_cache['llm'].create_chat_completion(
-        messages=messages,
-        max_tokens=128,
-        temperature=0.1,
-        stop=["<end_of_utterance>"]
-    )
     elapsed = (time.time() - t_start) * 1000
     timestamp = time.strftime('%H:%M:%S')
     debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")

 from llama_cpp.llama_chat_format import Llava15ChatHandler
 import base64
 import gc
+import io
+from contextlib import redirect_stdout, redirect_stderr
 # ----------------------------------------
 # Model configurations: per-size prefixes and repos
     debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
     t_start = time.time()
+    # right before you call the Llama API:
+    buf = io.StringIO()
+    with redirect_stdout(buf), redirect_stderr(buf):
+        resp = model_cache['llm'].create_chat_completion(
+            messages=messages,
+            max_tokens=128,
+            temperature=0.1,
+            stop=["<end_of_utterance>"]
+        )
+    # grab every line the Llama client printed
+    for line in buf.getvalue().splitlines():
+        timestamp = time.strftime('%H:%M:%S')
+        debug_msgs.append(f"[{timestamp}] {line}")
     elapsed = (time.time() - t_start) * 1000
     timestamp = time.strftime('%H:%M:%S')
     debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")