Spaces:
Running
Running
open n_threads to set by user
Browse files
app.py
CHANGED
@@ -49,6 +49,7 @@ model_cache = {
|
|
49 |
'model_file': None,
|
50 |
'clip_file': None,
|
51 |
'verbose': None,
|
|
|
52 |
'llm': None
|
53 |
}
|
54 |
|
@@ -83,9 +84,10 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
|
|
83 |
"{% if add_generation_prompt %}Assistant:{% endif %}"
|
84 |
)
|
85 |
|
86 |
-
# Load and cache LLM (only on dropdown or verbose change)
|
87 |
-
def update_llm(size, model_file, clip_file, verbose_mode):
|
88 |
-
|
|
|
89 |
mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
|
90 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
91 |
llm = Llama(
|
@@ -93,9 +95,9 @@ def update_llm(size, model_file, clip_file, verbose_mode):
|
|
93 |
chat_handler=handler,
|
94 |
n_ctx=512,
|
95 |
verbose=verbose_mode,
|
96 |
-
n_threads=
|
97 |
)
|
98 |
-
model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'llm': llm})
|
99 |
return None
|
100 |
|
101 |
# Build weight filename lists
|
@@ -155,6 +157,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
155 |
|
156 |
timestamp = time.strftime('%H:%M:%S')
|
157 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
|
|
|
|
158 |
|
159 |
t_start = time.time()
|
160 |
buf = io.StringIO()
|
@@ -188,6 +192,7 @@ def main():
|
|
188 |
logging.basicConfig(level=logging.INFO)
|
189 |
default = '256M'
|
190 |
default_verbose = True
|
|
|
191 |
mf, cf = get_weight_files(default)
|
192 |
|
193 |
with gr.Blocks() as demo:
|
@@ -197,33 +202,40 @@ def main():
|
|
197 |
model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
|
198 |
clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
|
199 |
verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
|
|
|
200 |
|
201 |
-
def on_size_change(sz, verbose):
|
202 |
mlist, clist = get_weight_files(sz)
|
203 |
-
update_llm(sz, mlist[0], clist[0], verbose)
|
204 |
return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
|
205 |
|
206 |
size_dd.change(
|
207 |
fn=on_size_change,
|
208 |
-
inputs=[size_dd, verbose_cb],
|
209 |
outputs=[model_dd, clip_dd]
|
210 |
)
|
211 |
model_dd.change(
|
212 |
-
fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
|
213 |
-
inputs=[size_dd, model_dd, clip_dd, verbose_cb],
|
214 |
outputs=[]
|
215 |
)
|
216 |
clip_dd.change(
|
217 |
-
fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
|
218 |
-
inputs=[size_dd, model_dd, clip_dd, verbose_cb],
|
219 |
outputs=[]
|
220 |
)
|
221 |
verbose_cb.change(
|
222 |
-
fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
|
223 |
-
inputs=[size_dd, model_dd, clip_dd, verbose_cb],
|
224 |
outputs=[]
|
225 |
)
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
229 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|
|
|
49 |
'model_file': None,
|
50 |
'clip_file': None,
|
51 |
'verbose': None,
|
52 |
+
'n_threads': None,
|
53 |
'llm': None
|
54 |
}
|
55 |
|
|
|
84 |
"{% if add_generation_prompt %}Assistant:{% endif %}"
|
85 |
)
|
86 |
|
87 |
+
# Load and cache LLM (only on dropdown or verbose or thread change)
|
88 |
+
def update_llm(size, model_file, clip_file, verbose_mode, n_threads):
|
89 |
+
# Only reload if any of parameters changed
|
90 |
+
if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose'], model_cache['n_threads']) != (size, model_file, clip_file, verbose_mode, n_threads):
|
91 |
mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
|
92 |
handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
|
93 |
llm = Llama(
|
|
|
95 |
chat_handler=handler,
|
96 |
n_ctx=512,
|
97 |
verbose=verbose_mode,
|
98 |
+
n_threads=n_threads
|
99 |
)
|
100 |
+
model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'n_threads': n_threads, 'llm': llm})
|
101 |
return None
|
102 |
|
103 |
# Build weight filename lists
|
|
|
157 |
|
158 |
timestamp = time.strftime('%H:%M:%S')
|
159 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
160 |
+
if model_cache.get('n_threads') is not None:
|
161 |
+
debug_msgs.append(f"[{timestamp}] llama_cpp n_threads = {model_cache['n_threads']}")
|
162 |
|
163 |
t_start = time.time()
|
164 |
buf = io.StringIO()
|
|
|
192 |
logging.basicConfig(level=logging.INFO)
|
193 |
default = '256M'
|
194 |
default_verbose = True
|
195 |
+
default_threads = os.cpu_count() or 1
|
196 |
mf, cf = get_weight_files(default)
|
197 |
|
198 |
with gr.Blocks() as demo:
|
|
|
202 |
model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
|
203 |
clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
|
204 |
verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
|
205 |
+
thread_dd = gr.Slider(minimum=1, maximum=default_threads, step=1, value=default_threads, label='CPU Threads (n_threads)')
|
206 |
|
207 |
+
def on_size_change(sz, verbose, n_threads):
|
208 |
mlist, clist = get_weight_files(sz)
|
209 |
+
update_llm(sz, mlist[0], clist[0], verbose, n_threads)
|
210 |
return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
|
211 |
|
212 |
size_dd.change(
|
213 |
fn=on_size_change,
|
214 |
+
inputs=[size_dd, verbose_cb, thread_dd],
|
215 |
outputs=[model_dd, clip_dd]
|
216 |
)
|
217 |
model_dd.change(
|
218 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
219 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
220 |
outputs=[]
|
221 |
)
|
222 |
clip_dd.change(
|
223 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
224 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
225 |
outputs=[]
|
226 |
)
|
227 |
verbose_cb.change(
|
228 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
229 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
230 |
outputs=[]
|
231 |
)
|
232 |
+
thread_dd.change(
|
233 |
+
fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
|
234 |
+
inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
|
235 |
+
outputs=[]
|
236 |
+
)
|
237 |
+
# Initial load
|
238 |
+
update_llm(default, mf[0], cf[0], default_verbose, default_threads)
|
239 |
|
240 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
241 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|