Luigi commited on
Commit
45c2159
·
1 Parent(s): 238a95a

apply in-memory encoding instead of temp files

Browse files
Files changed (1) hide show
  1. app.py +18 -19
app.py CHANGED
@@ -2,12 +2,12 @@ import time
2
  import logging
3
  import gradio as gr
4
  import cv2
5
- import tempfile
6
  import os
7
  from pathlib import Path
8
  from huggingface_hub import hf_hub_download
9
  from llama_cpp import Llama
10
  from llama_cpp.llama_chat_format import Llava15ChatHandler
 
11
 
12
  # ----------------------------------------
13
  # Model configurations: per-size prefixes and repos
@@ -110,24 +110,23 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
110
  llm = model_cache['llm']
111
  time.sleep(interval_ms / 1000)
112
  img = cv2.resize(frame.copy(), (384, 384))
113
- with tempfile.NamedTemporaryFile(suffix='.jpg') as tmp:
114
- cv2.imwrite(tmp.name, img)
115
- uri = Path(tmp.name).absolute().as_uri()
116
- messages = [
117
- {"role": "system", "content": sys_prompt},
118
- {"role": "user", "content": [
119
- {"type": "image_url", "image_url": uri},
120
- {"type": "text", "text": usr_prompt}
121
- ]}
122
- ]
123
- # re-init handler
124
- llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
125
- resp = llm.create_chat_completion(
126
- messages=messages,
127
- max_tokens=128,
128
- temperature=0.1,
129
- stop=["<end_of_utterance>"]
130
- )
131
 
132
  import gc
133
  gc.collect()
 
2
  import logging
3
  import gradio as gr
4
  import cv2
 
5
  import os
6
  from pathlib import Path
7
  from huggingface_hub import hf_hub_download
8
  from llama_cpp import Llama
9
  from llama_cpp.llama_chat_format import Llava15ChatHandler
10
+ import base64
11
 
12
  # ----------------------------------------
13
  # Model configurations: per-size prefixes and repos
 
110
  llm = model_cache['llm']
111
  time.sleep(interval_ms / 1000)
112
  img = cv2.resize(frame.copy(), (384, 384))
113
+ success, jpeg = cv2.imencode('.jpg', img)
114
+ uri = 'data:image/jpeg;base64,' + base64.b64encode(jpeg.tobytes()).decode()
115
+ messages = [
116
+ {"role": "system", "content": sys_prompt},
117
+ {"role": "user", "content": [
118
+ {"type": "image_url", "image_url": uri},
119
+ {"type": "text", "text": usr_prompt}
120
+ ]}
121
+ ]
122
+ # re-init handler
123
+ llm.chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
124
+ resp = llm.create_chat_completion(
125
+ messages=messages,
126
+ max_tokens=128,
127
+ temperature=0.1,
128
+ stop=["<end_of_utterance>"]
129
+ )
 
130
 
131
  import gc
132
  gc.collect()