Spaces:

Didier
/

Optical_character_recognition

Running

App Files Files Community

Didier commited on 19 days ago

Commit

7798dc4

verified ·

1 Parent(s): 17c6e9f

Update module_ocr.py

Browse files

Files changed (1) hide show

module_ocr.py +51 -0

module_ocr.py CHANGED Viewed

@@ -14,6 +14,7 @@ import shutil
 import threading
 import time
 import pathlib
 import ocr
 import lang_codes
@@ -105,6 +106,47 @@ def process(
     return output_text, output_pdf
 #
 # User interface
 #
@@ -120,6 +162,8 @@ with gr.Blocks() as demo:
                 label="Upload an image or a PDF file of a scanned document",
                 height=160
             )
             output_file = gr.File(
                 label="Download OCR'ed PDF",
                 visible=False # Initially not visible
@@ -168,6 +212,13 @@ with gr.Blocks() as demo:
         gr.Markdown(f"""
             - Model: using the tesseract package for OCR 1.0 (traditional)
         """)
     # Functions
     ocr_btn.click(

 import threading
 import time
 import pathlib
+import pdf2image
 import ocr
 import lang_codes
     return output_text, output_pdf
+def preview_file(file):
+    if file is None:
+        return None, None
+    file_path = file.name
+    file_extension = file_path.lower().split('.')[-1]
+    if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
+        # For images, return the image directly
+        return file_path, None
+    elif file_extension == 'pdf':
+        # For PDFs, convert first page to image using pdf2image
+        try:
+            # Convert only the first page for preview
+            pages = convert_from_path(
+                file_path,
+                first_page=1,
+                last_page=1,
+                dpi=150  # Good quality for preview
+            )
+            if pages:
+                # Save the first page as a temporary image
+                base_filename = os.path.basename(file_path)
+                base_filename, _ = os.path.splitext(base_filename)
+                output_path = f"{base_filename}_{uuid.uuid4()}.png"
+                output_path = os.path.join(output_dir, output_path)
+                pages[0].save(output_path.name, 'PNG')
+                return output_path.name.name, f"PDF Preview: {os.path.basename(file_path)}"
+            else:
+                return None, "<p>Could not convert PDF to image</p>"
+        except Exception as e:
+            return None, f"<p>Error previewing PDF: {str(e)}</p>"
+    else:
+        return None, f"<p>Preview not available for {file_extension} files</p>"
 #
 # User interface
 #
                 label="Upload an image or a PDF file of a scanned document",
                 height=160
             )
+            preview_image = gr.Image(label="Preview", show_label=True)
+            preview_text = gr.HTML(label="Status")
             output_file = gr.File(
                 label="Download OCR'ed PDF",
                 visible=False # Initially not visible
         gr.Markdown(f"""
             - Model: using the tesseract package for OCR 1.0 (traditional)
         """)
+    # Update preview when file is uploaded
+    input_file.change(
+        fn=preview_file,
+        inputs=[file_input],
+        outputs=[preview_image, preview_text]
+    )
     # Functions
     ocr_btn.click(