Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -16,6 +16,7 @@ import cv2
|
|
16 |
from transformers import (
|
17 |
Qwen2VLForConditionalGeneration,
|
18 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
19 |
AutoModelForVision2Seq,
|
20 |
AutoProcessor,
|
21 |
TextIteratorStreamer,
|
@@ -77,6 +78,16 @@ model_x = AutoModelForVision2Seq.from_pretrained(
|
|
77 |
torch_dtype=torch.float16
|
78 |
).to(device).eval()
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
# Preprocessing functions for SmolDocling-256M
|
81 |
def add_random_padding(image, min_percent=0.1, max_percent=0.10):
|
82 |
"""Add random padding to an image based on its size."""
|
@@ -143,6 +154,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
143 |
elif model_name == "Typhoon-OCR-7B":
|
144 |
processor = processor_l
|
145 |
model = model_l
|
|
|
|
|
|
|
146 |
else:
|
147 |
yield "Invalid model selected.", "Invalid model selected."
|
148 |
return
|
@@ -221,6 +235,9 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
221 |
elif model_name == "Typhoon-OCR-7B":
|
222 |
processor = processor_l
|
223 |
model = model_l
|
|
|
|
|
|
|
224 |
else:
|
225 |
yield "Invalid model selected.", "Invalid model selected."
|
226 |
return
|
@@ -351,7 +368,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
351 |
formatted_output = gr.Markdown(label="(Result.md)")
|
352 |
|
353 |
model_choice = gr.Radio(
|
354 |
-
choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "
|
355 |
label="Select Model",
|
356 |
value="Nanonets-OCR-s"
|
357 |
)
|
|
|
16 |
from transformers import (
|
17 |
Qwen2VLForConditionalGeneration,
|
18 |
Qwen2_5_VLForConditionalGeneration,
|
19 |
+
AutoModelForCausalLM,
|
20 |
AutoModelForVision2Seq,
|
21 |
AutoProcessor,
|
22 |
TextIteratorStreamer,
|
|
|
78 |
torch_dtype=torch.float16
|
79 |
).to(device).eval()
|
80 |
|
81 |
+
# Load dots.ocr
|
82 |
+
MODEL_ID_D = "rednote-hilab/dots.ocr"
|
83 |
+
processor_d = AutoProcessor.from_pretrained(MODEL_ID_D, trust_remote_code=True)
|
84 |
+
model_d = AutoModelForCausalLM.from_pretrained(
|
85 |
+
MODEL_ID_D,
|
86 |
+
attn_implementation="flash_attention_2",
|
87 |
+
trust_remote_code=True,
|
88 |
+
torch_dtype=torch.float16
|
89 |
+
).to(device).eval()
|
90 |
+
|
91 |
# Preprocessing functions for SmolDocling-256M
|
92 |
def add_random_padding(image, min_percent=0.1, max_percent=0.10):
|
93 |
"""Add random padding to an image based on its size."""
|
|
|
154 |
elif model_name == "Typhoon-OCR-7B":
|
155 |
processor = processor_l
|
156 |
model = model_l
|
157 |
+
elif model_name == "rednote-dots.ocr":
|
158 |
+
processor = processor_d
|
159 |
+
model = model_d
|
160 |
else:
|
161 |
yield "Invalid model selected.", "Invalid model selected."
|
162 |
return
|
|
|
235 |
elif model_name == "Typhoon-OCR-7B":
|
236 |
processor = processor_l
|
237 |
model = model_l
|
238 |
+
elif model_name == "rednote-dots.ocr":
|
239 |
+
processor = processor_d
|
240 |
+
model = model_d
|
241 |
else:
|
242 |
yield "Invalid model selected.", "Invalid model selected."
|
243 |
return
|
|
|
368 |
formatted_output = gr.Markdown(label="(Result.md)")
|
369 |
|
370 |
model_choice = gr.Radio(
|
371 |
+
choices=["Nanonets-OCR-s", "MonkeyOCR-Recognition", "rednote-dots.ocr", "Typhoon-OCR-7B", "SmolDocling-256M-preview"],
|
372 |
label="Select Model",
|
373 |
value="Nanonets-OCR-s"
|
374 |
)
|