Spaces:
Running
Running
add debug messages
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import cv2
|
3 |
import tempfile
|
@@ -8,6 +9,13 @@ from llama_cpp import Llama
|
|
8 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
9 |
from termcolor import cprint
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# βββββββββββββββββββββββββββββββββββββββββ
|
12 |
# 1) Inline definition & registration of SmolVLM2ChatHandler
|
13 |
class SmolVLM2ChatHandler(Llava15ChatHandler):
|
@@ -41,35 +49,58 @@ MODEL_REPO = "mradermacher/SmolVLM2-500M-Video-Instruct-GGUF"
|
|
41 |
CLIP_REPO = "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF"
|
42 |
|
43 |
def ensure_models():
|
|
|
44 |
if not os.path.exists(MODEL_FILE):
|
|
|
45 |
path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
|
46 |
os.symlink(path, MODEL_FILE)
|
|
|
|
|
|
|
|
|
47 |
if not os.path.exists(CLIP_FILE):
|
|
|
48 |
path = hf_hub_download(repo_id=CLIP_REPO, filename=CLIP_FILE)
|
49 |
os.symlink(path, CLIP_FILE)
|
|
|
|
|
|
|
50 |
|
51 |
ensure_models()
|
52 |
|
|
|
53 |
def load_llm():
|
|
|
54 |
handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
|
55 |
-
|
56 |
model_path=MODEL_FILE,
|
57 |
chat_handler=handler,
|
58 |
n_ctx=8192,
|
59 |
verbose=False,
|
60 |
)
|
|
|
|
|
61 |
|
62 |
llm = load_llm()
|
63 |
|
64 |
# βββββββββββββββββββββββββββββββββββββββββ
|
65 |
# 4) Captioning helper (stateless prompt)
|
66 |
def caption_frame(frame):
|
|
|
67 |
# make a writable copy
|
68 |
frame = frame.copy()
|
|
|
|
|
69 |
# save frame to temporary file for URI
|
70 |
with tempfile.NamedTemporaryFile(suffix='.jpg') as f:
|
71 |
-
cv2.imwrite(f.name, frame)
|
|
|
|
|
|
|
|
|
|
|
72 |
uri = Path(f.name).absolute().as_uri()
|
|
|
73 |
|
74 |
# build a single prompt string
|
75 |
messages = [
|
@@ -89,20 +120,25 @@ def caption_frame(frame):
|
|
89 |
],
|
90 |
},
|
91 |
]
|
|
|
92 |
|
93 |
# stateless completion call
|
|
|
94 |
llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
|
95 |
llm.reset() # reset n_tokens back to 0
|
96 |
-
llm._ctx.kv_cache_clear()
|
|
|
97 |
resp = llm.create_chat_completion(
|
98 |
-
messages
|
99 |
max_tokens=256,
|
100 |
temperature=0.1,
|
101 |
stop=["<end_of_utterance>"],
|
102 |
)
|
|
|
103 |
|
104 |
# extract caption
|
105 |
-
caption = (resp.get("choices", [])[0][
|
|
|
106 |
return caption
|
107 |
|
108 |
# βββββββββββββββββββββββββββββββββββββββββ
|
@@ -123,4 +159,5 @@ with demo:
|
|
123 |
)
|
124 |
|
125 |
if __name__ == "__main__":
|
|
|
126 |
demo.launch()
|
|
|
1 |
+
import logging
|
2 |
import gradio as gr
|
3 |
import cv2
|
4 |
import tempfile
|
|
|
9 |
from llama_cpp.llama_chat_format import Llava15ChatHandler
|
10 |
from termcolor import cprint
|
11 |
|
12 |
+
# Configure logging
|
13 |
+
logging.basicConfig(
|
14 |
+
level=logging.DEBUG,
|
15 |
+
format='[%(asctime)s] %(levelname)s: %(message)s',
|
16 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
17 |
+
)
|
18 |
+
|
19 |
# βββββββββββββββββββββββββββββββββββββββββ
|
20 |
# 1) Inline definition & registration of SmolVLM2ChatHandler
|
21 |
class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
|
49 |
CLIP_REPO = "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF"
|
50 |
|
51 |
def ensure_models():
|
52 |
+
logging.debug("Ensuring model files are present...")
|
53 |
if not os.path.exists(MODEL_FILE):
|
54 |
+
logging.info(f"Downloading model file {MODEL_FILE} from {MODEL_REPO}...")
|
55 |
path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
|
56 |
os.symlink(path, MODEL_FILE)
|
57 |
+
logging.info(f"Created symlink: {path} -> {MODEL_FILE}")
|
58 |
+
else:
|
59 |
+
logging.debug(f"Model file {MODEL_FILE} already exists.")
|
60 |
+
|
61 |
if not os.path.exists(CLIP_FILE):
|
62 |
+
logging.info(f"Downloading CLIP file {CLIP_FILE} from {CLIP_REPO}...")
|
63 |
path = hf_hub_download(repo_id=CLIP_REPO, filename=CLIP_FILE)
|
64 |
os.symlink(path, CLIP_FILE)
|
65 |
+
logging.info(f"Created symlink: {path} -> {CLIP_FILE}")
|
66 |
+
else:
|
67 |
+
logging.debug(f"CLIP file {CLIP_FILE} already exists.")
|
68 |
|
69 |
ensure_models()
|
70 |
|
71 |
+
|
72 |
def load_llm():
|
73 |
+
logging.debug("Loading Llama model with SmolVLM2ChatHandler...")
|
74 |
handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
|
75 |
+
llm = Llama(
|
76 |
model_path=MODEL_FILE,
|
77 |
chat_handler=handler,
|
78 |
n_ctx=8192,
|
79 |
verbose=False,
|
80 |
)
|
81 |
+
logging.info("Llama model loaded successfully.")
|
82 |
+
return llm
|
83 |
|
84 |
llm = load_llm()
|
85 |
|
86 |
# βββββββββββββββββββββββββββββββββββββββββ
|
87 |
# 4) Captioning helper (stateless prompt)
|
88 |
def caption_frame(frame):
|
89 |
+
logging.debug("caption_frame called.")
|
90 |
# make a writable copy
|
91 |
frame = frame.copy()
|
92 |
+
logging.debug(f"Frame shape: {frame.shape}, dtype: {frame.dtype}")
|
93 |
+
|
94 |
# save frame to temporary file for URI
|
95 |
with tempfile.NamedTemporaryFile(suffix='.jpg') as f:
|
96 |
+
success = cv2.imwrite(f.name, frame)
|
97 |
+
if not success:
|
98 |
+
logging.error(f"Failed to write frame to {f.name}")
|
99 |
+
else:
|
100 |
+
logging.debug(f"Frame written to temp file: {f.name}")
|
101 |
+
|
102 |
uri = Path(f.name).absolute().as_uri()
|
103 |
+
logging.debug(f"Frame URI: {uri}")
|
104 |
|
105 |
# build a single prompt string
|
106 |
messages = [
|
|
|
120 |
],
|
121 |
},
|
122 |
]
|
123 |
+
logging.debug(f"Constructed messages: {messages}")
|
124 |
|
125 |
# stateless completion call
|
126 |
+
logging.debug("Resetting LLM and clearing cache.")
|
127 |
llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
|
128 |
llm.reset() # reset n_tokens back to 0
|
129 |
+
llm._ctx.kv_cache_clear() # clear any cached key/values
|
130 |
+
logging.debug("Sending chat completion request...")
|
131 |
resp = llm.create_chat_completion(
|
132 |
+
messages=messages,
|
133 |
max_tokens=256,
|
134 |
temperature=0.1,
|
135 |
stop=["<end_of_utterance>"],
|
136 |
)
|
137 |
+
logging.debug(f"LLM raw response: {resp}")
|
138 |
|
139 |
# extract caption
|
140 |
+
caption = (resp.get("choices", [])[0]["message"].get("content", "") or "").strip()
|
141 |
+
logging.debug(f"Extracted caption: {caption}")
|
142 |
return caption
|
143 |
|
144 |
# βββββββββββββββββββββββββββββββββββββββββ
|
|
|
159 |
)
|
160 |
|
161 |
if __name__ == "__main__":
|
162 |
+
logging.debug("Launching Gradio demo...")
|
163 |
demo.launch()
|