File size: 4,264 Bytes
8ba2238
17c6e9f
8ba2238
17c6e9f
8ba2238
 
 
 
 
 
 
 
 
843ddbc
 
 
8ba2238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843ddbc
 
 
 
 
 
 
 
 
 
 
 
 
8ba2238
843ddbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ba2238
 
 
 
 
 
 
843ddbc
 
 
 
 
1d8c673
 
843ddbc
8ba2238
 
 
 
 
 
 
 
 
 
 
 
17c6e9f
8ba2238
 
 
 
 
 
 
843ddbc
1d8c673
 
 
 
 
8ba2238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
File: module_ocr2.py

Description: module to interact with OCR deep learning models.

Author: Didier Guillevic
Date: 2025-04-07
"""

import gradio as gr
import os
import magic

import pdf2image
import tempfile

import ocr2 # OCR with software 2.0 models

#
# Get file type: PDF or Image or something else
#
def get_file_type(file_path):
    # Check file extension
    file_extension = os.path.splitext(file_path)[1].lower()

    # Check MIME type
    mime = magic.Magic(mime=True)
    mime_type = mime.from_file(file_path)

    # Determine file type
    if file_extension == '.pdf' or mime_type == 'application/pdf':
        return 'PDF'
    elif file_extension in ['.jpg', '.jpeg', '.png', '.gif'] or mime_type.startswith('image/'):
        return 'Image'
    elif file_extension == '.pptx' or mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
        return 'PowerPoint'
    else:
        return 'Other'

#
# Process one file
#
def process(input_file: str):
    """Process given file with OCR using given languages."
    """
    file_type = get_file_type(input_file)
    if file_type == 'PDF':
        return ocr2.process_pdf(input_file)
    elif file_type == 'Image':
        return ocr2.process_image(input_file)
    else:
        return "Unsupported file type. Please upload a PDF, or an image file."
    return ocr2.process(input_file)

#
# Preview the document (image or PDF)
#
def preview_file(file):
    if file is None:
        return None, None
    
    file_path = file.name
    file_extension = file_path.lower().split('.')[-1]
    
    if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
        # For images, return the image directly
        return file_path, None
    
    elif file_extension == 'pdf':
        # For PDFs, convert first page to image using pdf2image
        try:
            # Convert only the first page for preview
            pages = pdf2image.convert_from_path(
                file_path, 
                first_page=1, 
                last_page=1,
                dpi=150  # Good quality for preview
            )
            
            if pages:
                # Save the first page as a temporary image
                with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
                    pages[0].save(tmp_file.name, 'PNG')
                    return tmp_file.name, f"PDF Preview: {os.path.basename(file_path)}"
            else:
                return None, "<p>Could not convert PDF to image</p>"
                
        except Exception as e:
            return None, f"<p>Error previewing PDF: {str(e)}</p>"
    
    else:
        return None, f"<p>Preview not available for {file_extension} files</p>"


#
# User interface
#
with gr.Blocks() as demo:

    # Upload file to process
    with gr.Row():
        with gr.Column():
            input_file = gr.File(
                label="Upload a PDF or an image file",
                file_types=[".pdf", ".jpg", ".jpeg", ".png", ".gif", ".bmp"],
                scale=1)
            #preview_image = gr.Image(label="Preview", show_label=True)
            #preview_text = gr.HTML(label="Status")
        
        output_text = gr.Textbox(label="OCR output", scale=2)

    # Buttons
    with gr.Row():
        ocr_btn = gr.Button(value="OCR", variant="primary")
        clear_btn = gr.Button("Clear", variant="secondary")
    
    # Examples
    with gr.Accordion("Examples", open=False):
        examples = gr.Examples(
            [
                ['./scanned_doc.pdf',],
                ['./passport_jp.png']
            ],
            inputs=[input_file,],
            outputs=[output_text,],
            fn=process,
            cache_examples=False,
            label="Examples"
        )
    # Update preview when file is uploaded
    #input_file.change(
    #    fn=preview_file,
    #    inputs=[input_file],
    #    outputs=[preview_image, preview_text]
    #)
    
    # Functions
    ocr_btn.click(
        fn=process,
        inputs=[input_file,],
        outputs=[output_text,]
    )
    clear_btn.click(
        fn=lambda : (None, ''),
        inputs=[],
        outputs=[input_file, output_text] # input_file, output_text
    )

if __name__ == '__main__':
    demo.launch()