|
import os |
|
|
|
|
|
os.environ["EASYOCR_HOME"] = "/tmp/.easyocr" |
|
|
|
import cv2 |
|
import numpy as np |
|
import easyocr |
|
from PIL import Image |
|
|
|
|
|
try: |
|
reader = easyocr.Reader(['en'], download_enabled=True) |
|
print("β
EasyOCR initialized") |
|
ocr_available = True |
|
except Exception as e: |
|
print(f"β EasyOCR initialization failed: {str(e)}") |
|
print("Text extraction may not work properly.") |
|
ocr_available = False |
|
|
|
def extract_text_from_image(image_path): |
|
try: |
|
print(f"π Reading image from: {image_path}") |
|
if not ocr_available: |
|
print("β EasyOCR not available.") |
|
return "Text extraction failed. Please enter text manually." |
|
|
|
image = cv2.imread(image_path) |
|
if image is None: |
|
print("β cv2 could not read image.") |
|
return "Image could not be read." |
|
|
|
processed_image = preprocess_image(image) |
|
temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}") |
|
cv2.imwrite(temp_path, processed_image) |
|
print(f"πΌοΈ Temp processed image saved at: {temp_path}") |
|
|
|
results = reader.readtext(temp_path) |
|
print("π OCR Raw Output:", results) |
|
|
|
os.remove(temp_path) |
|
|
|
text = ' '.join([result[1] for result in results]).strip() |
|
if not text: |
|
print("β οΈ No text found, retrying with original image...") |
|
results = reader.readtext(image_path) |
|
text = ' '.join([result[1] for result in results]).strip() |
|
|
|
print("β
Extracted Text:", text) |
|
return text or "Text extraction failed. Please enter text manually." |
|
|
|
except Exception as e: |
|
print(f"OCR failed: {str(e)}") |
|
return "Text extraction failed. Please enter text manually." |
|
|