Spaces:
Running
Running
minor update then add todos
Browse files
app.py
CHANGED
@@ -43,10 +43,10 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
43 |
|
44 |
# βββββββββββββββββββββββββββββββββββββββββ
|
45 |
# 2) Model & CLIP files β download if missing
|
46 |
-
MODEL_FILE = "SmolVLM2-
|
47 |
-
CLIP_FILE = "mmproj-SmolVLM2-
|
48 |
-
MODEL_REPO = "mradermacher/SmolVLM2-
|
49 |
-
CLIP_REPO = "ggml-org/SmolVLM2-
|
50 |
|
51 |
def ensure_models():
|
52 |
logging.debug("Ensuring model files are present...")
|
@@ -125,9 +125,7 @@ def caption_frame(frame):
|
|
125 |
|
126 |
# stateless completion call
|
127 |
logging.debug("Resetting LLM and clearing cache.")
|
128 |
-
llm.chat_handler
|
129 |
-
llm.reset() # reset n_tokens back to 0
|
130 |
-
llm._ctx.kv_cache_clear() # clear any cached key/values
|
131 |
logging.debug("Sending chat completion request...")
|
132 |
resp = llm.create_chat_completion(
|
133 |
messages=messages,
|
@@ -162,3 +160,8 @@ with demo:
|
|
162 |
if __name__ == "__main__":
|
163 |
logging.debug("Launching Gradio demo...")
|
164 |
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# βββββββββββββββββββββββββββββββββββββββββ
|
45 |
# 2) Model & CLIP files β download if missing
|
46 |
+
MODEL_FILE = "SmolVLM2-2.2B-Instruct.IQ4_XS.gguf"
|
47 |
+
CLIP_FILE = "mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf"
|
48 |
+
MODEL_REPO = "mradermacher/SmolVLM2-2.2B-Instruct-GGUF"
|
49 |
+
CLIP_REPO = "ggml-org/SmolVLM2-2.2B-Instruct-GGUF"
|
50 |
|
51 |
def ensure_models():
|
52 |
logging.debug("Ensuring model files are present...")
|
|
|
125 |
|
126 |
# stateless completion call
|
127 |
logging.debug("Resetting LLM and clearing cache.")
|
128 |
+
llm.chat_handler.__init__(clip_model_path=CLIP_FILE, verbose=False)
|
|
|
|
|
129 |
logging.debug("Sending chat completion request...")
|
130 |
resp = llm.create_chat_completion(
|
131 |
messages=messages,
|
|
|
160 |
if __name__ == "__main__":
|
161 |
logging.debug("Launching Gradio demo...")
|
162 |
demo.launch()
|
163 |
+
|
164 |
+
# todos:
|
165 |
+
# 1. add list of models: smolvml2 256m, 500m, 2.2b with varouis precision in choice
|
166 |
+
# 2. customizable interval
|
167 |
+
# 3. customizable system and user prompts
|