Spaces:
Running
Running
inject verbose message to debug window
Browse files
app.py
CHANGED
@@ -9,6 +9,8 @@ from llama_cpp import Llama
|
|
9 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
10 |
import base64
|
11 |
import gc
|
|
|
|
|
12 |
|
13 |
# ----------------------------------------
|
14 |
# Model configurations: per-size prefixes and repos
|
@@ -157,12 +159,20 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
157 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
158 |
|
159 |
t_start = time.time()
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
elapsed = (time.time() - t_start) * 1000
|
167 |
timestamp = time.strftime('%H:%M:%S')
|
168 |
debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")
|
|
|
9 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
10 |
import base64
|
11 |
import gc
|
12 |
+
import io
|
13 |
+
from contextlib import redirect_stdout, redirect_stderr
|
14 |
|
15 |
# ----------------------------------------
|
16 |
# Model configurations: per-size prefixes and repos
|
|
|
159 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
160 |
|
161 |
t_start = time.time()
|
162 |
+
# right before you call the Llama API:
|
163 |
+
buf = io.StringIO()
|
164 |
+
with redirect_stdout(buf), redirect_stderr(buf):
|
165 |
+
resp = model_cache['llm'].create_chat_completion(
|
166 |
+
messages=messages,
|
167 |
+
max_tokens=128,
|
168 |
+
temperature=0.1,
|
169 |
+
stop=["<end_of_utterance>"]
|
170 |
+
)
|
171 |
+
# grab every line the Llama client printed
|
172 |
+
for line in buf.getvalue().splitlines():
|
173 |
+
timestamp = time.strftime('%H:%M:%S')
|
174 |
+
debug_msgs.append(f"[{timestamp}] {line}")
|
175 |
+
|
176 |
elapsed = (time.time() - t_start) * 1000
|
177 |
timestamp = time.strftime('%H:%M:%S')
|
178 |
debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")
|