prithivMLmods commited on
Commit
b1b6025
·
verified ·
1 Parent(s): 401889a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -44,15 +44,6 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
44
  torch_dtype=torch.float16
45
  ).to(device).eval()
46
 
47
- # Load SmolDocling-256M-preview
48
- MODEL_ID_X = "ds4sd/SmolDocling-256M-preview"
49
- processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
50
- model_x = AutoModelForVision2Seq.from_pretrained(
51
- MODEL_ID_X,
52
- trust_remote_code=True,
53
- torch_dtype=torch.float16
54
- ).to(device).eval()
55
-
56
  # Load MonkeyOCR
57
  MODEL_ID_G = "echo840/MonkeyOCR"
58
  SUBFOLDER = "Recognition"
@@ -68,6 +59,26 @@ model_g = Qwen2_5_VLForConditionalGeneration.from_pretrained(
68
  torch_dtype=torch.float16
69
  ).to(device).eval()
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # Preprocessing functions for SmolDocling-256M
72
  def add_random_padding(image, min_percent=0.1, max_percent=0.10):
73
  """Add random padding to an image based on its size."""
@@ -132,6 +143,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
132
  elif model_name == "SmolDocling-256M-preview":
133
  processor = processor_x
134
  model = model_x
 
 
 
135
  else:
136
  yield "Invalid model selected."
137
  return
@@ -216,6 +230,9 @@ def generate_video(model_name: str, text: str, video_path: str,
216
  elif model_name == "SmolDocling-256M-preview":
217
  processor = processor_x
218
  model = model_x
 
 
 
219
  else:
220
  yield "Invalid model selected."
221
  return
@@ -336,7 +353,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
336
  with gr.Column():
337
  output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
338
  model_choice = gr.Radio(
339
- choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "SmolDocling-256M-preview"],
340
  label="Select Model",
341
  value="Nanonets-OCR-s"
342
  )
 
44
  torch_dtype=torch.float16
45
  ).to(device).eval()
46
 
 
 
 
 
 
 
 
 
 
47
  # Load MonkeyOCR
48
  MODEL_ID_G = "echo840/MonkeyOCR"
49
  SUBFOLDER = "Recognition"
 
59
  torch_dtype=torch.float16
60
  ).to(device).eval()
61
 
62
+ # Load NuExtract-2.0-8B
63
+ MODEL_ID_L = "numind/NuExtract-2.0-8B"
64
+ processor_l = AutoProcessor.from_pretrained(MODEL_ID_L, trust_remote_code=True)
65
+ model_l = Qwen2_5_VLForConditionalGeneration.from_pretrained(
66
+ MODEL_ID_L,
67
+ trust_remote_code=True,
68
+ torch_dtype=torch.float16
69
+ ).to(device).eval()
70
+
71
+ #--------------------------------------------------#
72
+ # Load SmolDocling-256M-preview
73
+ MODEL_ID_X = "ds4sd/SmolDocling-256M-preview"
74
+ processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
75
+ model_x = AutoModelForVision2Seq.from_pretrained(
76
+ MODEL_ID_X,
77
+ trust_remote_code=True,
78
+ torch_dtype=torch.float16
79
+ ).to(device).eval()
80
+ #--------------------------------------------------#
81
+
82
  # Preprocessing functions for SmolDocling-256M
83
  def add_random_padding(image, min_percent=0.1, max_percent=0.10):
84
  """Add random padding to an image based on its size."""
 
143
  elif model_name == "SmolDocling-256M-preview":
144
  processor = processor_x
145
  model = model_x
146
+ elif model_name == "NuExtract-2.0-8B":
147
+ processor = processor_l
148
+ model = model_l
149
  else:
150
  yield "Invalid model selected."
151
  return
 
230
  elif model_name == "SmolDocling-256M-preview":
231
  processor = processor_x
232
  model = model_x
233
+ elif model_name == "NuExtract-2.0-8B":
234
+ processor = processor_l
235
+ model = model_l
236
  else:
237
  yield "Invalid model selected."
238
  return
 
353
  with gr.Column():
354
  output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
355
  model_choice = gr.Radio(
356
+ choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "SmolDocling-256M-preview", "NuExtract-2.0-8B"],
357
  label="Select Model",
358
  value="Nanonets-OCR-s"
359
  )