Spaces:
Sleeping
Sleeping
import os | |
# β Set environment variable before importing easyocr | |
os.environ["EASYOCR_HOME"] = "/tmp/.easyocr" | |
import cv2 | |
import numpy as np | |
import easyocr | |
from PIL import Image | |
# β Initialize EasyOCR reader once (only once, with safe storage dir) | |
try: | |
reader = easyocr.Reader(['en'], download_enabled=True) | |
print("β EasyOCR initialized") | |
ocr_available = True | |
except Exception as e: | |
print(f"β EasyOCR initialization failed: {str(e)}") | |
print("Text extraction may not work properly.") | |
ocr_available = False | |
def extract_text_from_image(image_path): | |
try: | |
print(f"π Reading image from: {image_path}") | |
if not ocr_available: | |
print("β EasyOCR not available.") | |
return "Text extraction failed. Please enter text manually." | |
image = cv2.imread(image_path) | |
if image is None: | |
print("β cv2 could not read image.") | |
return "Image could not be read." | |
processed_image = preprocess_image(image) | |
temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}") | |
cv2.imwrite(temp_path, processed_image) | |
print(f"πΌοΈ Temp processed image saved at: {temp_path}") | |
results = reader.readtext(temp_path) | |
print("π OCR Raw Output:", results) | |
os.remove(temp_path) | |
text = ' '.join([result[1] for result in results]).strip() | |
if not text: | |
print("β οΈ No text found, retrying with original image...") | |
results = reader.readtext(image_path) | |
text = ' '.join([result[1] for result in results]).strip() | |
print("β Extracted Text:", text) | |
return text or "Text extraction failed. Please enter text manually." | |
except Exception as e: | |
print(f"OCR failed: {str(e)}") | |
return "Text extraction failed. Please enter text manually." | |