prithivMLmods commited on
Commit
3e8f362
·
verified ·
1 Parent(s): 4ef8c51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -16,6 +16,7 @@ import cv2
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
  Qwen2_5_VLForConditionalGeneration,
 
19
  AutoModelForVision2Seq,
20
  AutoProcessor,
21
  TextIteratorStreamer,
@@ -77,6 +78,16 @@ model_x = AutoModelForVision2Seq.from_pretrained(
77
  torch_dtype=torch.float16
78
  ).to(device).eval()
79
 
 
 
 
 
 
 
 
 
 
 
80
  # Preprocessing functions for SmolDocling-256M
81
  def add_random_padding(image, min_percent=0.1, max_percent=0.10):
82
  """Add random padding to an image based on its size."""
@@ -143,6 +154,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
143
  elif model_name == "Typhoon-OCR-7B":
144
  processor = processor_l
145
  model = model_l
 
 
 
146
  else:
147
  yield "Invalid model selected.", "Invalid model selected."
148
  return
@@ -221,6 +235,9 @@ def generate_video(model_name: str, text: str, video_path: str,
221
  elif model_name == "Typhoon-OCR-7B":
222
  processor = processor_l
223
  model = model_l
 
 
 
224
  else:
225
  yield "Invalid model selected.", "Invalid model selected."
226
  return
@@ -351,7 +368,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
351
  formatted_output = gr.Markdown(label="(Result.md)")
352
 
353
  model_choice = gr.Radio(
354
- choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "SmolDocling-256M-preview", "Typhoon-OCR-7B"],
355
  label="Select Model",
356
  value="Nanonets-OCR-s"
357
  )
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
  Qwen2_5_VLForConditionalGeneration,
19
+ AutoModelForCausalLM,
20
  AutoModelForVision2Seq,
21
  AutoProcessor,
22
  TextIteratorStreamer,
 
78
  torch_dtype=torch.float16
79
  ).to(device).eval()
80
 
81
+ # Load dots.ocr
82
+ MODEL_ID_D = "rednote-hilab/dots.ocr"
83
+ processor_d = AutoProcessor.from_pretrained(MODEL_ID_D, trust_remote_code=True)
84
+ model_d = AutoModelForCausalLM.from_pretrained(
85
+ MODEL_ID_D,
86
+ attn_implementation="flash_attention_2",
87
+ trust_remote_code=True,
88
+ torch_dtype=torch.float16
89
+ ).to(device).eval()
90
+
91
  # Preprocessing functions for SmolDocling-256M
92
  def add_random_padding(image, min_percent=0.1, max_percent=0.10):
93
  """Add random padding to an image based on its size."""
 
154
  elif model_name == "Typhoon-OCR-7B":
155
  processor = processor_l
156
  model = model_l
157
+ elif model_name == "rednote-dots.ocr":
158
+ processor = processor_d
159
+ model = model_d
160
  else:
161
  yield "Invalid model selected.", "Invalid model selected."
162
  return
 
235
  elif model_name == "Typhoon-OCR-7B":
236
  processor = processor_l
237
  model = model_l
238
+ elif model_name == "rednote-dots.ocr":
239
+ processor = processor_d
240
+ model = model_d
241
  else:
242
  yield "Invalid model selected.", "Invalid model selected."
243
  return
 
368
  formatted_output = gr.Markdown(label="(Result.md)")
369
 
370
  model_choice = gr.Radio(
371
+ choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "rednote-dots.ocr", "Typhoon-OCR-7B", "SmolDocling-256M-preview"],
372
  label="Select Model",
373
  value="Nanonets-OCR-s"
374
  )