prithivMLmods commited on
Commit
d5cb2af
·
verified ·
1 Parent(s): 0c22ab6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -1
app.py CHANGED
@@ -16,6 +16,7 @@ import cv2
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
  Qwen2_5_VLForConditionalGeneration,
 
19
  AutoModelForVision2Seq,
20
  AutoProcessor,
21
  TextIteratorStreamer,
@@ -44,6 +45,16 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
44
  torch_dtype=torch.float16
45
  ).to(device).eval()
46
 
 
 
 
 
 
 
 
 
 
 
47
  # Load SmolDocling-256M-preview
48
  MODEL_ID_X = "ds4sd/SmolDocling-256M-preview"
49
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
@@ -52,6 +63,7 @@ model_x = AutoModelForVision2Seq.from_pretrained(
52
  trust_remote_code=True,
53
  torch_dtype=torch.float16
54
  ).to(device).eval()
 
55
 
56
  # Load MonkeyOCR
57
  MODEL_ID_G = "echo840/MonkeyOCR"
@@ -132,6 +144,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
132
  elif model_name == "SmolDocling-256M-preview":
133
  processor = processor_x
134
  model = model_x
 
 
 
135
  else:
136
  yield "Invalid model selected."
137
  return
@@ -216,6 +231,9 @@ def generate_video(model_name: str, text: str, video_path: str,
216
  elif model_name == "SmolDocling-256M-preview":
217
  processor = processor_x
218
  model = model_x
 
 
 
219
  else:
220
  yield "Invalid model selected."
221
  return
@@ -336,7 +354,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
336
  with gr.Column():
337
  output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
338
  model_choice = gr.Radio(
339
- choices=["Nanonets-OCR-s", "SmolDocling-256M-preview", "MonkeyOCR-Recognition"],
340
  label="Select Model",
341
  value="Nanonets-OCR-s"
342
  )
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
  Qwen2_5_VLForConditionalGeneration,
19
+ VisionEncoderDecoderModel,
20
  AutoModelForVision2Seq,
21
  AutoProcessor,
22
  TextIteratorStreamer,
 
45
  torch_dtype=torch.float16
46
  ).to(device).eval()
47
 
48
+ # Load ByteDance's Dolphin
49
+ MODEL_ID_K = "ByteDance/Dolphin"
50
+ processor_k = AutoProcessor.from_pretrained(MODEL_ID_K, trust_remote_code=True)
51
+ model_k = VisionEncoderDecoderModel.from_pretrained(
52
+ MODEL_ID_K,
53
+ trust_remote_code=True,
54
+ torch_dtype=torch.float16
55
+ ).to(device).eval()
56
+
57
+ #------------------------------------------------#
58
  # Load SmolDocling-256M-preview
59
  MODEL_ID_X = "ds4sd/SmolDocling-256M-preview"
60
  processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
 
63
  trust_remote_code=True,
64
  torch_dtype=torch.float16
65
  ).to(device).eval()
66
+ #------------------------------------------------#
67
 
68
  # Load MonkeyOCR
69
  MODEL_ID_G = "echo840/MonkeyOCR"
 
144
  elif model_name == "SmolDocling-256M-preview":
145
  processor = processor_x
146
  model = model_x
147
+ elif model_name == "ByteDance-s-Dolphin":
148
+ processor = processor_k
149
+ model = model_k
150
  else:
151
  yield "Invalid model selected."
152
  return
 
231
  elif model_name == "SmolDocling-256M-preview":
232
  processor = processor_x
233
  model = model_x
234
+ elif model_name == "ByteDance-s-Dolphin":
235
+ processor = processor_k
236
+ model = model_k
237
  else:
238
  yield "Invalid model selected."
239
  return
 
354
  with gr.Column():
355
  output = gr.Textbox(label="Output", interactive=False, lines=3, scale=2)
356
  model_choice = gr.Radio(
357
+ choices=["Nanonets-OCR-s", "SmolDocling-256M-preview", "MonkeyOCR-Recognition", "ByteDance-s-Dolphin"],
358
  label="Select Model",
359
  value="Nanonets-OCR-s"
360
  )