alakxender commited on
Commit
81d025b
·
1 Parent(s): d5e9410
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -70,7 +70,12 @@ def process_single_line(image, model_name):
70
  prompt = MODELS[model_name]["prompt"]
71
  # Add image token to prompt
72
  prompt = f"<image>{prompt}"
73
- model_inputs = processor(text=prompt, images=image, return_tensors="pt").to(torch.bfloat16).to("cuda")
 
 
 
 
 
74
 
75
  outputs = model.generate(
76
  **model_inputs,
@@ -156,8 +161,30 @@ def process_multi_line(image, model_name, progress=gr.Progress()):
156
  progress(1.0, desc="Done!")
157
  return "\n".join(all_text), [bbox_image] # Return as list for gallery
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def process_pdf(pdf_path, model_name, progress=gr.Progress()):
160
  """Process a PDF file"""
 
 
 
 
 
 
 
 
161
  # Create temporary directory
162
  with tempfile.TemporaryDirectory() as temp_dir:
163
  # Initialize detector with temp directory
@@ -236,19 +263,6 @@ def process_pdf(pdf_path, model_name, progress=gr.Progress()):
236
  progress(1.0, desc="Done!")
237
  return "\n".join(all_text), bbox_images # Return list of bbox images
238
 
239
- @spaces.GPU
240
- def process_image(model_name, image, progress=gr.Progress()):
241
- """Process a single image"""
242
- if image is None:
243
- return "", None
244
-
245
- # Load model if different model selected
246
- if model_name != current_model_name:
247
- progress(0, desc="Loading model...")
248
- load_model(model_name)
249
-
250
- return process_multi_line(image, model_name, progress)
251
-
252
  # Example images with descriptions
253
  examples = [
254
  ["type_1_sl.png", "Typed Dhivehi text sample 1"],
 
70
  prompt = MODELS[model_name]["prompt"]
71
  # Add image token to prompt
72
  prompt = f"<image>{prompt}"
73
+
74
+ # First prepare inputs without moving to CUDA
75
+ model_inputs = processor(text=prompt, images=image, return_tensors="pt")
76
+
77
+ # Then move to CUDA in a separate step
78
+ model_inputs = {k: v.to(torch.bfloat16).to("cuda") for k, v in model_inputs.items()}
79
 
80
  outputs = model.generate(
81
  **model_inputs,
 
161
  progress(1.0, desc="Done!")
162
  return "\n".join(all_text), [bbox_image] # Return as list for gallery
163
 
164
+ @spaces.GPU
165
+ def process_image(model_name, image, progress=gr.Progress()):
166
+ """Process a single image"""
167
+ if image is None:
168
+ return "", []
169
+
170
+ # Load model if different model selected
171
+ if model_name != current_model_name:
172
+ progress(0, desc="Loading model...")
173
+ load_model(model_name)
174
+
175
+ return process_multi_line(image, model_name, progress)
176
+
177
+ @spaces.GPU
178
  def process_pdf(pdf_path, model_name, progress=gr.Progress()):
179
  """Process a PDF file"""
180
+ if pdf_path is None:
181
+ return "", []
182
+
183
+ # Load model if different model selected
184
+ if model_name != current_model_name:
185
+ progress(0, desc="Loading model...")
186
+ load_model(model_name)
187
+
188
  # Create temporary directory
189
  with tempfile.TemporaryDirectory() as temp_dir:
190
  # Initialize detector with temp directory
 
263
  progress(1.0, desc="Done!")
264
  return "\n".join(all_text), bbox_images # Return list of bbox images
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  # Example images with descriptions
267
  examples = [
268
  ["type_1_sl.png", "Typed Dhivehi text sample 1"],