|
""" |
|
File: module_ocr.py |
|
Description: Use a vision language model for Optical Character Recognition (OCR) tasks. |
|
Author: Didier Guillevic |
|
Date: 2025-04-06 |
|
""" |
|
|
|
import gradio as gr |
|
import ocr |
|
import pdf2image |
|
import tempfile |
|
import os |
|
|
|
|
|
|
|
|
|
def process(input_file: str): |
|
"""Process given file with OCR." |
|
""" |
|
return ocr.process_file(input_file) |
|
|
|
|
|
|
|
|
|
def preview_file(file): |
|
if file is None: |
|
return None, None |
|
|
|
file_path = file.name |
|
file_extension = file_path.lower().split('.')[-1] |
|
|
|
if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']: |
|
|
|
return file_path, None |
|
|
|
elif file_extension == 'pdf': |
|
|
|
try: |
|
|
|
pages = pdf2image.convert_from_path( |
|
file_path, |
|
first_page=1, |
|
last_page=1, |
|
dpi=150 |
|
) |
|
|
|
if pages: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file: |
|
pages[0].save(tmp_file.name, 'PNG') |
|
return tmp_file.name, f"PDF Preview: {os.path.basename(file_path)}" |
|
else: |
|
return None, "<p>Could not convert PDF to image</p>" |
|
|
|
except Exception as e: |
|
return None, f"<p>Error previewing PDF: {str(e)}</p>" |
|
|
|
else: |
|
return None, f"<p>Preview not available for {file_extension} files</p>" |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
input_file = gr.File( |
|
label="Upload a PDF or image file", |
|
file_types=[".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp"], |
|
scale=1) |
|
preview_image = gr.Image(label="Preview", show_label=True) |
|
preview_text = gr.HTML(label="Status") |
|
output_text = gr.Textbox(label="OCR output", scale=2) |
|
|
|
|
|
with gr.Row(): |
|
ocr_btn = gr.Button(value="OCR", variant="primary") |
|
clear_btn = gr.Button("Clear", variant="secondary") |
|
|
|
|
|
with gr.Accordion("Examples", open=False): |
|
examples = gr.Examples( |
|
[ |
|
['./scanned_doc.pdf',], |
|
['./passport_jp.png',] |
|
], |
|
inputs=[input_file,], |
|
outputs=[output_text,], |
|
fn=process, |
|
cache_examples=False, |
|
label="Examples" |
|
) |
|
|
|
|
|
input_file.change( |
|
fn=preview_file, |
|
inputs=[input_file], |
|
outputs=[preview_image, preview_text] |
|
) |
|
|
|
|
|
ocr_btn.click( |
|
fn=process, |
|
inputs=[input_file,], |
|
outputs=[output_text,] |
|
) |
|
clear_btn.click( |
|
fn=lambda : (None, ''), |
|
inputs=[], |
|
outputs=[input_file, output_text] |
|
) |
|
|
|
if __name__ == '__main__': |
|
demo.launch() |
|
|