Spaces:
Running
Running
apply in-memory encoding instead of temp files
Browse files
app.py
CHANGED
@@ -2,12 +2,12 @@ import time
|
|
2 |
import logging
|
3 |
import gradio as gr
|
4 |
import cv2
|
5 |
-
import tempfile
|
6 |
import os
|
7 |
from pathlib import Path
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
from llama_cpp import Llama
|
10 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
|
|
11 |
|
12 |
# ----------------------------------------
|
13 |
# Model configurations: per-size prefixes and repos
|
@@ -110,24 +110,23 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
110 |
llm = model_cache['llm']
|
111 |
time.sleep(interval_ms / 1000)
|
112 |
img = cv2.resize(frame.copy(), (384, 384))
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
{"
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
)
|
131 |
|
132 |
import gc
|
133 |
gc.collect()
|
|
|
2 |
import logging
|
3 |
import gradio as gr
|
4 |
import cv2
|
|
|
5 |
import os
|
6 |
from pathlib import Path
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
from llama_cpp import Llama
|
9 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
10 |
+
import base64
|
11 |
|
12 |
# ----------------------------------------
|
13 |
# Model configurations: per-size prefixes and repos
|
|
|
110 |
llm = model_cache['llm']
|
111 |
time.sleep(interval_ms / 1000)
|
112 |
img = cv2.resize(frame.copy(), (384, 384))
|
113 |
+
success, jpeg = cv2.imencode('.jpg', img)
|
114 |
+
uri = 'data:image/jpeg;base64,' + base64.b64encode(jpeg.tobytes()).decode()
|
115 |
+
messages = [
|
116 |
+
{"role": "system", "content": sys_prompt},
|
117 |
+
{"role": "user", "content": [
|
118 |
+
{"type": "image_url", "image_url": uri},
|
119 |
+
{"type": "text", "text": usr_prompt}
|
120 |
+
]}
|
121 |
+
]
|
122 |
+
# re-init handler
|
123 |
+
llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
|
124 |
+
resp = llm.create_chat_completion(
|
125 |
+
messages=messages,
|
126 |
+
max_tokens=128,
|
127 |
+
temperature=0.1,
|
128 |
+
stop=["<end_of_utterance>"]
|
129 |
+
)
|
|
|
130 |
|
131 |
import gc
|
132 |
gc.collect()
|