Update module_ocr.py
Browse files- module_ocr.py +51 -0
module_ocr.py
CHANGED
@@ -14,6 +14,7 @@ import shutil
|
|
14 |
import threading
|
15 |
import time
|
16 |
import pathlib
|
|
|
17 |
|
18 |
import ocr
|
19 |
import lang_codes
|
@@ -105,6 +106,47 @@ def process(
|
|
105 |
|
106 |
return output_text, output_pdf
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
#
|
109 |
# User interface
|
110 |
#
|
@@ -120,6 +162,8 @@ with gr.Blocks() as demo:
|
|
120 |
label="Upload an image or a PDF file of a scanned document",
|
121 |
height=160
|
122 |
)
|
|
|
|
|
123 |
output_file = gr.File(
|
124 |
label="Download OCR'ed PDF",
|
125 |
visible=False # Initially not visible
|
@@ -168,6 +212,13 @@ with gr.Blocks() as demo:
|
|
168 |
gr.Markdown(f"""
|
169 |
- Model: using the tesseract package for OCR 1.0 (traditional)
|
170 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
# Functions
|
173 |
ocr_btn.click(
|
|
|
14 |
import threading
|
15 |
import time
|
16 |
import pathlib
|
17 |
+
import pdf2image
|
18 |
|
19 |
import ocr
|
20 |
import lang_codes
|
|
|
106 |
|
107 |
return output_text, output_pdf
|
108 |
|
109 |
+
|
110 |
+
def preview_file(file):
|
111 |
+
if file is None:
|
112 |
+
return None, None
|
113 |
+
|
114 |
+
file_path = file.name
|
115 |
+
file_extension = file_path.lower().split('.')[-1]
|
116 |
+
|
117 |
+
if file_extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:
|
118 |
+
# For images, return the image directly
|
119 |
+
return file_path, None
|
120 |
+
|
121 |
+
elif file_extension == 'pdf':
|
122 |
+
# For PDFs, convert first page to image using pdf2image
|
123 |
+
try:
|
124 |
+
# Convert only the first page for preview
|
125 |
+
pages = convert_from_path(
|
126 |
+
file_path,
|
127 |
+
first_page=1,
|
128 |
+
last_page=1,
|
129 |
+
dpi=150 # Good quality for preview
|
130 |
+
)
|
131 |
+
|
132 |
+
if pages:
|
133 |
+
# Save the first page as a temporary image
|
134 |
+
base_filename = os.path.basename(file_path)
|
135 |
+
base_filename, _ = os.path.splitext(base_filename)
|
136 |
+
output_path = f"{base_filename}_{uuid.uuid4()}.png"
|
137 |
+
output_path = os.path.join(output_dir, output_path)
|
138 |
+
pages[0].save(output_path.name, 'PNG')
|
139 |
+
return output_path.name.name, f"PDF Preview: {os.path.basename(file_path)}"
|
140 |
+
else:
|
141 |
+
return None, "<p>Could not convert PDF to image</p>"
|
142 |
+
|
143 |
+
except Exception as e:
|
144 |
+
return None, f"<p>Error previewing PDF: {str(e)}</p>"
|
145 |
+
|
146 |
+
else:
|
147 |
+
return None, f"<p>Preview not available for {file_extension} files</p>"
|
148 |
+
|
149 |
+
|
150 |
#
|
151 |
# User interface
|
152 |
#
|
|
|
162 |
label="Upload an image or a PDF file of a scanned document",
|
163 |
height=160
|
164 |
)
|
165 |
+
preview_image = gr.Image(label="Preview", show_label=True)
|
166 |
+
preview_text = gr.HTML(label="Status")
|
167 |
output_file = gr.File(
|
168 |
label="Download OCR'ed PDF",
|
169 |
visible=False # Initially not visible
|
|
|
212 |
gr.Markdown(f"""
|
213 |
- Model: using the tesseract package for OCR 1.0 (traditional)
|
214 |
""")
|
215 |
+
|
216 |
+
# Update preview when file is uploaded
|
217 |
+
input_file.change(
|
218 |
+
fn=preview_file,
|
219 |
+
inputs=[file_input],
|
220 |
+
outputs=[preview_image, preview_text]
|
221 |
+
)
|
222 |
|
223 |
# Functions
|
224 |
ocr_btn.click(
|