Spaces:

Didier
/

Optical_character_recognition

Running

App Files Files Community

Optical_character_recognition / module_ocr2.py

Didier

Update module_ocr2.py

1d8c673 verified 16 days ago

raw

history blame contribute delete

4.26 kB

	"""
	File: module_ocr2.py

	Description: module to interact with OCR deep learning models.

	Author: Didier Guillevic
	Date: 2025-04-07
	"""

	import gradio as gr
	import os
	import magic

	import pdf2image
	import tempfile

	import ocr2 # OCR with software 2.0 models

	#
	# Get file type: PDF or Image or something else
	#
	def get_file_type(file_path):
	# Check file extension
	file_extension = os.path.splitext(file_path)[1].lower()

	# Check MIME type
	mime = magic.Magic(mime=True)
	mime_type = mime.from_file(file_path)

	# Determine file type
	if file_extension == '.pdf' or mime_type == 'application/pdf':
	return 'PDF'
	elif file_extension in ['.jpg', '.jpeg', '.png', '.gif'] or mime_type.startswith('image/'):
	return 'Image'
	elif file_extension == '.pptx' or mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
	return 'PowerPoint'
	else:
	return 'Other'

	#
	# Process one file
	#
	def process(input_file: str):
	"""Process given file with OCR using given languages."
	"""
	file_type = get_file_type(input_file)
	if file_type == 'PDF':
	return ocr2.process_pdf(input_file)
	elif file_type == 'Image':
	return ocr2.process_image(input_file)
	else:
	return "Unsupported file type. Please upload a PDF, or an image file."
	return ocr2.process(input_file)

	#
	# Preview the document (image or PDF)
	#
	def preview_file(file):
	if file is None:
	return None, None

	file_path = file.name
	file_extension = file_path.lower().split('.')[-1]

	if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
	# For images, return the image directly
	return file_path, None

	elif file_extension == 'pdf':
	# For PDFs, convert first page to image using pdf2image
	try:
	# Convert only the first page for preview
	pages = pdf2image.convert_from_path(
	file_path,
	first_page=1,
	last_page=1,
	dpi=150 # Good quality for preview
	)

	if pages:
	# Save the first page as a temporary image
	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
	pages[0].save(tmp_file.name, 'PNG')
	return tmp_file.name, f"PDF Preview: {os.path.basename(file_path)}"
	else:
	return None, "<p>Could not convert PDF to image</p>"

	except Exception as e:
	return None, f"<p>Error previewing PDF: {str(e)}</p>"

	else:
	return None, f"<p>Preview not available for {file_extension} files</p>"


	#
	# User interface
	#
	with gr.Blocks() as demo:

	# Upload file to process
	with gr.Row():
	with gr.Column():
	input_file = gr.File(
	label="Upload a PDF or an image file",
	file_types=[".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp"],
	scale=1)
	#preview_image = gr.Image(label="Preview", show_label=True)
	#preview_text = gr.HTML(label="Status")

	output_text = gr.Textbox(label="OCR output", scale=2)

	# Buttons
	with gr.Row():
	ocr_btn = gr.Button(value="OCR", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	# Examples
	with gr.Accordion("Examples", open=False):
	examples = gr.Examples(
	[
	['./scanned_doc.pdf',],
	['./passport_jp.png']
	],
	inputs=[input_file,],
	outputs=[output_text,],
	fn=process,
	cache_examples=False,
	label="Examples"
	)
	# Update preview when file is uploaded
	#input_file.change(
	# fn=preview_file,
	# inputs=[input_file],
	# outputs=[preview_image, preview_text]
	#)

	# Functions
	ocr_btn.click(
	fn=process,
	inputs=[input_file,],
	outputs=[output_text,]
	)
	clear_btn.click(
	fn=lambda : (None, ''),
	inputs=[],
	outputs=[input_file, output_text] # input_file, output_text
	)

	if __name__ == '__main__':
	demo.launch()