Luigi commited on
Commit
65b3c3a
Β·
1 Parent(s): c1d8038

minor update then add todos

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -43,10 +43,10 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
43
 
44
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
45
  # 2) Model & CLIP files β€” download if missing
46
- MODEL_FILE = "SmolVLM2-500M-Video-Instruct.Q8_0.gguf"
47
- CLIP_FILE = "mmproj-SmolVLM2-500M-Video-Instruct-Q8_0.gguf"
48
- MODEL_REPO = "mradermacher/SmolVLM2-500M-Video-Instruct-GGUF"
49
- CLIP_REPO = "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF"
50
 
51
  def ensure_models():
52
  logging.debug("Ensuring model files are present...")
@@ -125,9 +125,7 @@ def caption_frame(frame):
125
 
126
  # stateless completion call
127
  logging.debug("Resetting LLM and clearing cache.")
128
- llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=CLIP_FILE, verbose=False)
129
- llm.reset() # reset n_tokens back to 0
130
- llm._ctx.kv_cache_clear() # clear any cached key/values
131
  logging.debug("Sending chat completion request...")
132
  resp = llm.create_chat_completion(
133
  messages=messages,
@@ -162,3 +160,8 @@ with demo:
162
  if __name__ == "__main__":
163
  logging.debug("Launching Gradio demo...")
164
  demo.launch()
 
 
 
 
 
 
43
 
44
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
45
  # 2) Model & CLIP files β€” download if missing
46
+ MODEL_FILE = "SmolVLM2-2.2B-Instruct.IQ4_XS.gguf"
47
+ CLIP_FILE = "mmproj-SmolVLM2-2.2B-Instruct-Q8_0.gguf"
48
+ MODEL_REPO = "mradermacher/SmolVLM2-2.2B-Instruct-GGUF"
49
+ CLIP_REPO = "ggml-org/SmolVLM2-2.2B-Instruct-GGUF"
50
 
51
  def ensure_models():
52
  logging.debug("Ensuring model files are present...")
 
125
 
126
  # stateless completion call
127
  logging.debug("Resetting LLM and clearing cache.")
128
+ llm.chat_handler.__init__(clip_model_path=CLIP_FILE, verbose=False)
 
 
129
  logging.debug("Sending chat completion request...")
130
  resp = llm.create_chat_completion(
131
  messages=messages,
 
160
  if __name__ == "__main__":
161
  logging.debug("Launching Gradio demo...")
162
  demo.launch()
163
+
164
+ # todos:
165
+ # 1. add list of models: smolvml2 256m, 500m, 2.2b with varouis precision in choice
166
+ # 2. customizable interval
167
+ # 3. customizable system and user prompts