davanstrien HF Staff commited on
Commit
83e370e
·
1 Parent(s): 003891a

improve ui

Browse files
Files changed (1) hide show
  1. app.py +42 -10
app.py CHANGED
@@ -151,7 +151,7 @@ def parse_xml_for_text(xml_file_path):
151
  elif xml_format == "ALTO":
152
  return parse_alto_xml_for_text(xml_file_path)
153
  else:
154
- return f"Error: Unsupported XML format. Expected ALTO or PAGE XML."
155
 
156
  except Exception as e:
157
  return f"Error determining XML format: {str(e)}"
@@ -282,11 +282,24 @@ def process_files(image_path, xml_path, model_name):
282
  img_to_display = None
283
  xml_text_output = "XML not provided or not processed."
284
  hf_ocr_text_output = "Image not provided or OCR not run."
 
 
285
 
286
  if image_path:
287
  try:
288
  img_to_display = Image.open(image_path).convert("RGB")
289
  hf_ocr_text_output = run_hf_ocr(image_path, model_name)
 
 
 
 
 
 
 
 
 
 
 
290
  except Exception as e:
291
  img_to_display = None # Clear image if it failed to load
292
  hf_ocr_text_output = f"Error loading image or running {model_name} OCR: {e}"
@@ -295,6 +308,17 @@ def process_files(image_path, xml_path, model_name):
295
 
296
  if xml_path:
297
  xml_text_output = parse_xml_for_text(xml_path)
 
 
 
 
 
 
 
 
 
 
 
298
  else:
299
  xml_text_output = "No XML file uploaded."
300
 
@@ -303,16 +327,16 @@ def process_files(image_path, xml_path, model_name):
303
  img_to_display = None # No image to display
304
  hf_ocr_text_output = "Upload an image to perform OCR."
305
 
306
- return img_to_display, xml_text_output, hf_ocr_text_output
307
 
308
 
309
  # --- Create Gradio App ---
310
 
311
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
312
- gr.Markdown("# OCR Viewer and Extractor")
313
  gr.Markdown(
314
- "Upload an image to perform OCR using a Hugging Face model. "
315
- "Optionally, upload its corresponding ALTO or PAGE XML file to compare the extracted text."
316
  )
317
 
318
  with gr.Row():
@@ -321,7 +345,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
321
  choices=AVAILABLE_MODELS,
322
  value="RolmOCR",
323
  label="Select OCR Model",
324
- info="Choose between RolmOCR (fast, general purpose) or Nanonets-OCR-s (detailed extraction)"
325
  )
326
  image_input = gr.File(
327
  label="Upload Image (PNG, JPG, etc.)", type="filepath"
@@ -329,7 +353,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
329
  xml_input = gr.File(
330
  label="Upload XML File (Optional, ALTO or PAGE format)", type="filepath"
331
  )
332
- submit_button = gr.Button("Process Image and XML", variant="primary")
333
 
334
  with gr.Row():
335
  with gr.Column(scale=1):
@@ -338,20 +362,28 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
338
  )
339
  with gr.Column(scale=1):
340
  hf_ocr_output_textbox = gr.Markdown(
341
- label="OCR Output",
342
  show_copy_button=True,
343
  )
 
 
 
 
344
  xml_output_textbox = gr.Textbox(
345
- label="Text from XML",
346
  lines=15,
347
  interactive=False,
348
  show_copy_button=True,
349
  )
 
 
 
 
350
 
351
  submit_button.click(
352
  fn=process_files,
353
  inputs=[image_input, xml_input, model_selector],
354
- outputs=[output_image_display, xml_output_textbox, hf_ocr_output_textbox],
355
  )
356
 
357
  gr.Markdown("---")
 
151
  elif xml_format == "ALTO":
152
  return parse_alto_xml_for_text(xml_file_path)
153
  else:
154
+ return "Error: Unsupported XML format. Expected ALTO or PAGE XML."
155
 
156
  except Exception as e:
157
  return f"Error determining XML format: {str(e)}"
 
282
  img_to_display = None
283
  xml_text_output = "XML not provided or not processed."
284
  hf_ocr_text_output = "Image not provided or OCR not run."
285
+ ocr_download = gr.DownloadButton(visible=False)
286
+ xml_download = gr.DownloadButton(visible=False)
287
 
288
  if image_path:
289
  try:
290
  img_to_display = Image.open(image_path).convert("RGB")
291
  hf_ocr_text_output = run_hf_ocr(image_path, model_name)
292
+
293
+ # Create download file for OCR output
294
+ if hf_ocr_text_output and not hf_ocr_text_output.startswith("Error"):
295
+ ocr_filename = f"vlm_ocr_output_{model_name}.txt"
296
+ with open(ocr_filename, "w", encoding="utf-8") as f:
297
+ f.write(hf_ocr_text_output)
298
+ ocr_download = gr.DownloadButton(
299
+ label="Download VLM OCR",
300
+ value=ocr_filename,
301
+ visible=True
302
+ )
303
  except Exception as e:
304
  img_to_display = None # Clear image if it failed to load
305
  hf_ocr_text_output = f"Error loading image or running {model_name} OCR: {e}"
 
308
 
309
  if xml_path:
310
  xml_text_output = parse_xml_for_text(xml_path)
311
+
312
+ # Create download file for XML text
313
+ if xml_text_output and not xml_text_output.startswith("Error"):
314
+ xml_filename = "traditional_ocr_output.txt"
315
+ with open(xml_filename, "w", encoding="utf-8") as f:
316
+ f.write(xml_text_output)
317
+ xml_download = gr.DownloadButton(
318
+ label="Download XML Text",
319
+ value=xml_filename,
320
+ visible=True
321
+ )
322
  else:
323
  xml_text_output = "No XML file uploaded."
324
 
 
327
  img_to_display = None # No image to display
328
  hf_ocr_text_output = "Upload an image to perform OCR."
329
 
330
+ return img_to_display, xml_text_output, hf_ocr_text_output, ocr_download, xml_download
331
 
332
 
333
  # --- Create Gradio App ---
334
 
335
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
336
+ gr.Markdown("# OCR Comparison Tool: Traditional vs VLM-based")
337
  gr.Markdown(
338
+ "Compare traditional OCR outputs (ALTO/PAGE XML) with modern Vision-Language Model OCR that produces clean Markdown. "
339
+ "Upload an image and its XML file to see how VLMs simplify document text extraction."
340
  )
341
 
342
  with gr.Row():
 
345
  choices=AVAILABLE_MODELS,
346
  value="RolmOCR",
347
  label="Select OCR Model",
348
+ info="RolmOCR: Fast extraction, clean readable output | Nanonets-OCR-s: Detailed extraction with tables/math support, outputs structured Markdown"
349
  )
350
  image_input = gr.File(
351
  label="Upload Image (PNG, JPG, etc.)", type="filepath"
 
353
  xml_input = gr.File(
354
  label="Upload XML File (Optional, ALTO or PAGE format)", type="filepath"
355
  )
356
+ submit_button = gr.Button("Compare OCR Methods", variant="primary")
357
 
358
  with gr.Row():
359
  with gr.Column(scale=1):
 
362
  )
363
  with gr.Column(scale=1):
364
  hf_ocr_output_textbox = gr.Markdown(
365
+ label="VLM OCR Output (Markdown)",
366
  show_copy_button=True,
367
  )
368
+ ocr_download_btn = gr.DownloadButton(
369
+ label="Download VLM OCR",
370
+ visible=False
371
+ )
372
  xml_output_textbox = gr.Textbox(
373
+ label="Traditional OCR (XML Reading Order)",
374
  lines=15,
375
  interactive=False,
376
  show_copy_button=True,
377
  )
378
+ xml_download_btn = gr.DownloadButton(
379
+ label="Download XML Text",
380
+ visible=False
381
+ )
382
 
383
  submit_button.click(
384
  fn=process_files,
385
  inputs=[image_input, xml_input, model_selector],
386
+ outputs=[output_image_display, xml_output_textbox, hf_ocr_output_textbox, ocr_download_btn, xml_download_btn],
387
  )
388
 
389
  gr.Markdown("---")