Luigi commited on
Commit
69c8775
·
1 Parent(s): 2881733

inject verbose message to debug window

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -9,6 +9,8 @@ from llama_cpp import Llama
9
  from llama_cpp.llama_chat_format import Llava15ChatHandler
10
  import base64
11
  import gc
 
 
12
 
13
  # ----------------------------------------
14
  # Model configurations: per-size prefixes and repos
@@ -157,12 +159,20 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
157
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
158
 
159
  t_start = time.time()
160
- resp = model_cache['llm'].create_chat_completion(
161
- messages=messages,
162
- max_tokens=128,
163
- temperature=0.1,
164
- stop=["<end_of_utterance>"]
165
- )
 
 
 
 
 
 
 
 
166
  elapsed = (time.time() - t_start) * 1000
167
  timestamp = time.strftime('%H:%M:%S')
168
  debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")
 
9
  from llama_cpp.llama_chat_format import Llava15ChatHandler
10
  import base64
11
  import gc
12
+ import io
13
+ from contextlib import redirect_stdout, redirect_stderr
14
 
15
  # ----------------------------------------
16
  # Model configurations: per-size prefixes and repos
 
159
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
160
 
161
  t_start = time.time()
162
+ # right before you call the Llama API:
163
+ buf = io.StringIO()
164
+ with redirect_stdout(buf), redirect_stderr(buf):
165
+ resp = model_cache['llm'].create_chat_completion(
166
+ messages=messages,
167
+ max_tokens=128,
168
+ temperature=0.1,
169
+ stop=["<end_of_utterance>"]
170
+ )
171
+ # grab every line the Llama client printed
172
+ for line in buf.getvalue().splitlines():
173
+ timestamp = time.strftime('%H:%M:%S')
174
+ debug_msgs.append(f"[{timestamp}] {line}")
175
+
176
  elapsed = (time.time() - t_start) * 1000
177
  timestamp = time.strftime('%H:%M:%S')
178
  debug_msgs.append(f"[{timestamp}] LLM response in {elapsed:.1f} ms")