Luigi commited on
Commit
22b94a2
·
1 Parent(s): 427620d

open n_threads to set by user

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -49,6 +49,7 @@ model_cache = {
49
  'model_file': None,
50
  'clip_file': None,
51
  'verbose': None,
 
52
  'llm': None
53
  }
54
 
@@ -83,9 +84,10 @@ class SmolVLM2ChatHandler(Llava15ChatHandler):
83
  "{% if add_generation_prompt %}Assistant:{% endif %}"
84
  )
85
 
86
- # Load and cache LLM (only on dropdown or verbose change)
87
- def update_llm(size, model_file, clip_file, verbose_mode):
88
- if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose']) != (size, model_file, clip_file, verbose_mode):
 
89
  mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
90
  handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
91
  llm = Llama(
@@ -93,9 +95,9 @@ def update_llm(size, model_file, clip_file, verbose_mode):
93
  chat_handler=handler,
94
  n_ctx=512,
95
  verbose=verbose_mode,
96
- n_threads=max(2, os.cpu_count())
97
  )
98
- model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'llm': llm})
99
  return None
100
 
101
  # Build weight filename lists
@@ -155,6 +157,8 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
155
 
156
  timestamp = time.strftime('%H:%M:%S')
157
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
 
 
158
 
159
  t_start = time.time()
160
  buf = io.StringIO()
@@ -188,6 +192,7 @@ def main():
188
  logging.basicConfig(level=logging.INFO)
189
  default = '256M'
190
  default_verbose = True
 
191
  mf, cf = get_weight_files(default)
192
 
193
  with gr.Blocks() as demo:
@@ -197,33 +202,40 @@ def main():
197
  model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
198
  clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
199
  verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
 
200
 
201
- def on_size_change(sz, verbose):
202
  mlist, clist = get_weight_files(sz)
203
- update_llm(sz, mlist[0], clist[0], verbose)
204
  return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
205
 
206
  size_dd.change(
207
  fn=on_size_change,
208
- inputs=[size_dd, verbose_cb],
209
  outputs=[model_dd, clip_dd]
210
  )
211
  model_dd.change(
212
- fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
213
- inputs=[size_dd, model_dd, clip_dd, verbose_cb],
214
  outputs=[]
215
  )
216
  clip_dd.change(
217
- fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
218
- inputs=[size_dd, model_dd, clip_dd, verbose_cb],
219
  outputs=[]
220
  )
221
  verbose_cb.change(
222
- fn=lambda sz, mf, cf, verbose: update_llm(sz, mf, cf, verbose),
223
- inputs=[size_dd, model_dd, clip_dd, verbose_cb],
224
  outputs=[]
225
  )
226
- update_llm(default, mf[0], cf[0], default_verbose)
 
 
 
 
 
 
227
 
228
  interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
229
  sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
 
49
  'model_file': None,
50
  'clip_file': None,
51
  'verbose': None,
52
+ 'n_threads': None,
53
  'llm': None
54
  }
55
 
 
84
  "{% if add_generation_prompt %}Assistant:{% endif %}"
85
  )
86
 
87
+ # Load and cache LLM (only on dropdown or verbose or thread change)
88
+ def update_llm(size, model_file, clip_file, verbose_mode, n_threads):
89
+ # Only reload if any of parameters changed
90
+ if (model_cache['size'], model_cache['model_file'], model_cache['clip_file'], model_cache['verbose'], model_cache['n_threads']) != (size, model_file, clip_file, verbose_mode, n_threads):
91
  mf, cf = ensure_weights(MODELS[size], model_file, clip_file)
92
  handler = SmolVLM2ChatHandler(clip_model_path=cf, verbose=verbose_mode)
93
  llm = Llama(
 
95
  chat_handler=handler,
96
  n_ctx=512,
97
  verbose=verbose_mode,
98
+ n_threads=n_threads
99
  )
100
+ model_cache.update({'size': size, 'model_file': mf, 'clip_file': cf, 'verbose': verbose_mode, 'n_threads': n_threads, 'llm': llm})
101
  return None
102
 
103
  # Build weight filename lists
 
157
 
158
  timestamp = time.strftime('%H:%M:%S')
159
  debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
160
+ if model_cache.get('n_threads') is not None:
161
+ debug_msgs.append(f"[{timestamp}] llama_cpp n_threads = {model_cache['n_threads']}")
162
 
163
  t_start = time.time()
164
  buf = io.StringIO()
 
192
  logging.basicConfig(level=logging.INFO)
193
  default = '256M'
194
  default_verbose = True
195
+ default_threads = os.cpu_count() or 1
196
  mf, cf = get_weight_files(default)
197
 
198
  with gr.Blocks() as demo:
 
202
  model_dd = gr.Dropdown(mf, value=mf[0], label='Decoder Weights')
203
  clip_dd = gr.Dropdown(cf, value=cf[0], label='CLIP Weights')
204
  verbose_cb= gr.Checkbox(value=default_verbose, label='Verbose Mode')
205
+ thread_dd = gr.Slider(minimum=1, maximum=default_threads, step=1, value=default_threads, label='CPU Threads (n_threads)')
206
 
207
+ def on_size_change(sz, verbose, n_threads):
208
  mlist, clist = get_weight_files(sz)
209
+ update_llm(sz, mlist[0], clist[0], verbose, n_threads)
210
  return gr.update(choices=mlist, value=mlist[0]), gr.update(choices=clist, value=clist[0])
211
 
212
  size_dd.change(
213
  fn=on_size_change,
214
+ inputs=[size_dd, verbose_cb, thread_dd],
215
  outputs=[model_dd, clip_dd]
216
  )
217
  model_dd.change(
218
+ fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
219
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
220
  outputs=[]
221
  )
222
  clip_dd.change(
223
+ fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
224
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
225
  outputs=[]
226
  )
227
  verbose_cb.change(
228
+ fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
229
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
230
  outputs=[]
231
  )
232
+ thread_dd.change(
233
+ fn=lambda sz, mf, cf, verbose, n_threads: update_llm(sz, mf, cf, verbose, n_threads),
234
+ inputs=[size_dd, model_dd, clip_dd, verbose_cb, thread_dd],
235
+ outputs=[]
236
+ )
237
+ # Initial load
238
+ update_llm(default, mf[0], cf[0], default_verbose, default_threads)
239
 
240
  interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
241
  sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')