Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
83e370e
1
Parent(s):
003891a
improve ui
Browse files
app.py
CHANGED
@@ -151,7 +151,7 @@ def parse_xml_for_text(xml_file_path):
|
|
151 |
elif xml_format == "ALTO":
|
152 |
return parse_alto_xml_for_text(xml_file_path)
|
153 |
else:
|
154 |
-
return
|
155 |
|
156 |
except Exception as e:
|
157 |
return f"Error determining XML format: {str(e)}"
|
@@ -282,11 +282,24 @@ def process_files(image_path, xml_path, model_name):
|
|
282 |
img_to_display = None
|
283 |
xml_text_output = "XML not provided or not processed."
|
284 |
hf_ocr_text_output = "Image not provided or OCR not run."
|
|
|
|
|
285 |
|
286 |
if image_path:
|
287 |
try:
|
288 |
img_to_display = Image.open(image_path).convert("RGB")
|
289 |
hf_ocr_text_output = run_hf_ocr(image_path, model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
except Exception as e:
|
291 |
img_to_display = None # Clear image if it failed to load
|
292 |
hf_ocr_text_output = f"Error loading image or running {model_name} OCR: {e}"
|
@@ -295,6 +308,17 @@ def process_files(image_path, xml_path, model_name):
|
|
295 |
|
296 |
if xml_path:
|
297 |
xml_text_output = parse_xml_for_text(xml_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
else:
|
299 |
xml_text_output = "No XML file uploaded."
|
300 |
|
@@ -303,16 +327,16 @@ def process_files(image_path, xml_path, model_name):
|
|
303 |
img_to_display = None # No image to display
|
304 |
hf_ocr_text_output = "Upload an image to perform OCR."
|
305 |
|
306 |
-
return img_to_display, xml_text_output, hf_ocr_text_output
|
307 |
|
308 |
|
309 |
# --- Create Gradio App ---
|
310 |
|
311 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
312 |
-
gr.Markdown("# OCR
|
313 |
gr.Markdown(
|
314 |
-
"
|
315 |
-
"
|
316 |
)
|
317 |
|
318 |
with gr.Row():
|
@@ -321,7 +345,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
321 |
choices=AVAILABLE_MODELS,
|
322 |
value="RolmOCR",
|
323 |
label="Select OCR Model",
|
324 |
-
info="
|
325 |
)
|
326 |
image_input = gr.File(
|
327 |
label="Upload Image (PNG, JPG, etc.)", type="filepath"
|
@@ -329,7 +353,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
329 |
xml_input = gr.File(
|
330 |
label="Upload XML File (Optional, ALTO or PAGE format)", type="filepath"
|
331 |
)
|
332 |
-
submit_button = gr.Button("
|
333 |
|
334 |
with gr.Row():
|
335 |
with gr.Column(scale=1):
|
@@ -338,20 +362,28 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
338 |
)
|
339 |
with gr.Column(scale=1):
|
340 |
hf_ocr_output_textbox = gr.Markdown(
|
341 |
-
label="OCR Output",
|
342 |
show_copy_button=True,
|
343 |
)
|
|
|
|
|
|
|
|
|
344 |
xml_output_textbox = gr.Textbox(
|
345 |
-
label="
|
346 |
lines=15,
|
347 |
interactive=False,
|
348 |
show_copy_button=True,
|
349 |
)
|
|
|
|
|
|
|
|
|
350 |
|
351 |
submit_button.click(
|
352 |
fn=process_files,
|
353 |
inputs=[image_input, xml_input, model_selector],
|
354 |
-
outputs=[output_image_display, xml_output_textbox, hf_ocr_output_textbox],
|
355 |
)
|
356 |
|
357 |
gr.Markdown("---")
|
|
|
151 |
elif xml_format == "ALTO":
|
152 |
return parse_alto_xml_for_text(xml_file_path)
|
153 |
else:
|
154 |
+
return "Error: Unsupported XML format. Expected ALTO or PAGE XML."
|
155 |
|
156 |
except Exception as e:
|
157 |
return f"Error determining XML format: {str(e)}"
|
|
|
282 |
img_to_display = None
|
283 |
xml_text_output = "XML not provided or not processed."
|
284 |
hf_ocr_text_output = "Image not provided or OCR not run."
|
285 |
+
ocr_download = gr.DownloadButton(visible=False)
|
286 |
+
xml_download = gr.DownloadButton(visible=False)
|
287 |
|
288 |
if image_path:
|
289 |
try:
|
290 |
img_to_display = Image.open(image_path).convert("RGB")
|
291 |
hf_ocr_text_output = run_hf_ocr(image_path, model_name)
|
292 |
+
|
293 |
+
# Create download file for OCR output
|
294 |
+
if hf_ocr_text_output and not hf_ocr_text_output.startswith("Error"):
|
295 |
+
ocr_filename = f"vlm_ocr_output_{model_name}.txt"
|
296 |
+
with open(ocr_filename, "w", encoding="utf-8") as f:
|
297 |
+
f.write(hf_ocr_text_output)
|
298 |
+
ocr_download = gr.DownloadButton(
|
299 |
+
label="Download VLM OCR",
|
300 |
+
value=ocr_filename,
|
301 |
+
visible=True
|
302 |
+
)
|
303 |
except Exception as e:
|
304 |
img_to_display = None # Clear image if it failed to load
|
305 |
hf_ocr_text_output = f"Error loading image or running {model_name} OCR: {e}"
|
|
|
308 |
|
309 |
if xml_path:
|
310 |
xml_text_output = parse_xml_for_text(xml_path)
|
311 |
+
|
312 |
+
# Create download file for XML text
|
313 |
+
if xml_text_output and not xml_text_output.startswith("Error"):
|
314 |
+
xml_filename = "traditional_ocr_output.txt"
|
315 |
+
with open(xml_filename, "w", encoding="utf-8") as f:
|
316 |
+
f.write(xml_text_output)
|
317 |
+
xml_download = gr.DownloadButton(
|
318 |
+
label="Download XML Text",
|
319 |
+
value=xml_filename,
|
320 |
+
visible=True
|
321 |
+
)
|
322 |
else:
|
323 |
xml_text_output = "No XML file uploaded."
|
324 |
|
|
|
327 |
img_to_display = None # No image to display
|
328 |
hf_ocr_text_output = "Upload an image to perform OCR."
|
329 |
|
330 |
+
return img_to_display, xml_text_output, hf_ocr_text_output, ocr_download, xml_download
|
331 |
|
332 |
|
333 |
# --- Create Gradio App ---
|
334 |
|
335 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
336 |
+
gr.Markdown("# OCR Comparison Tool: Traditional vs VLM-based")
|
337 |
gr.Markdown(
|
338 |
+
"Compare traditional OCR outputs (ALTO/PAGE XML) with modern Vision-Language Model OCR that produces clean Markdown. "
|
339 |
+
"Upload an image and its XML file to see how VLMs simplify document text extraction."
|
340 |
)
|
341 |
|
342 |
with gr.Row():
|
|
|
345 |
choices=AVAILABLE_MODELS,
|
346 |
value="RolmOCR",
|
347 |
label="Select OCR Model",
|
348 |
+
info="RolmOCR: Fast extraction, clean readable output | Nanonets-OCR-s: Detailed extraction with tables/math support, outputs structured Markdown"
|
349 |
)
|
350 |
image_input = gr.File(
|
351 |
label="Upload Image (PNG, JPG, etc.)", type="filepath"
|
|
|
353 |
xml_input = gr.File(
|
354 |
label="Upload XML File (Optional, ALTO or PAGE format)", type="filepath"
|
355 |
)
|
356 |
+
submit_button = gr.Button("Compare OCR Methods", variant="primary")
|
357 |
|
358 |
with gr.Row():
|
359 |
with gr.Column(scale=1):
|
|
|
362 |
)
|
363 |
with gr.Column(scale=1):
|
364 |
hf_ocr_output_textbox = gr.Markdown(
|
365 |
+
label="VLM OCR Output (Markdown)",
|
366 |
show_copy_button=True,
|
367 |
)
|
368 |
+
ocr_download_btn = gr.DownloadButton(
|
369 |
+
label="Download VLM OCR",
|
370 |
+
visible=False
|
371 |
+
)
|
372 |
xml_output_textbox = gr.Textbox(
|
373 |
+
label="Traditional OCR (XML Reading Order)",
|
374 |
lines=15,
|
375 |
interactive=False,
|
376 |
show_copy_button=True,
|
377 |
)
|
378 |
+
xml_download_btn = gr.DownloadButton(
|
379 |
+
label="Download XML Text",
|
380 |
+
visible=False
|
381 |
+
)
|
382 |
|
383 |
submit_button.click(
|
384 |
fn=process_files,
|
385 |
inputs=[image_input, xml_input, model_selector],
|
386 |
+
outputs=[output_image_display, xml_output_textbox, hf_ocr_output_textbox, ocr_download_btn, xml_download_btn],
|
387 |
)
|
388 |
|
389 |
gr.Markdown("---")
|