File size: 2,134 Bytes
a03a20f
 
 
3497db5
a03a20f
436a6c3
a03a20f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436a6c3
5888c6a
a03a20f
 
 
21202e9
5888c6a
a03a20f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a1c667
a03a20f
 
 
 
8ceac8e
cd39c9c
 
986c935
5888c6a
a03a20f
cd39c9c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import cv2
import numpy as np
from PIL import Image
import easyocr

# βœ… Safe directory for OCR model storage
EASYOCR_DIR = "/tmp/.easyocr"
os.makedirs(EASYOCR_DIR, exist_ok=True)

# βœ… Set environment variable before import (optional but safe)
os.environ["EASYOCR_HOME"] = EASYOCR_DIR

# βœ… Initialize EasyOCR
try:
    reader = easyocr.Reader(['en'], model_storage_directory=EASYOCR_DIR)
    ocr_available = True
    print("βœ… EasyOCR initialized.")
except Exception as e:
    print(f"❌ EasyOCR initialization failed: {str(e)}")
    ocr_available = False

def preprocess_image(image):
    """
    Preprocess image to improve OCR accuracy
    """
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image
    denoised = cv2.fastNlMeansDenoising(gray, h=10)
    processed = cv2.adaptiveThreshold(
        denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 11, 2
    )
    return processed

def extract_text_from_image(image_path):
    """
    Extract text from image using EasyOCR
    """
    print(f"πŸ“‚ Reading image from: {image_path}")
    try:
        if not ocr_available:
            raise ValueError("EasyOCR not available")

        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Could not read image at {image_path}")
        
        processed = preprocess_image(image)
        temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}")
        cv2.imwrite(temp_path, processed)

        results = reader.readtext(temp_path)
        os.remove(temp_path)

        text = ' '.join([res[1] for res in results]).strip()

        # Fallback to original if empty
        if not text:
            results = reader.readtext(image_path)
            text = ' '.join([res[1] for res in results]).strip()

        print("πŸ“ Extracted Text:", text)
        return text or "Text extraction failed. Please enter text manually."

    except Exception as e:
        print(f"❌ OCR failed: {e}")
        return "Text extraction failed. Please enter text manually."