File size: 1,894 Bytes
6aedaf3
dff818a
3497db5
 
 
0a99f12
 
3497db5
 
436a6c3
3497db5
 
c8944a1
3497db5
 
 
 
 
 
436a6c3
5888c6a
 
986c935
6aedaf3
986c935
 
 
3497db5
6aedaf3
986c935
 
3497db5
986c935
3497db5
6aedaf3
986c935
3497db5
6aedaf3
986c935
3497db5
986c935
3497db5
 
6aedaf3
986c935
6aedaf3
3497db5
 
986c935
3497db5
986c935
5888c6a
6aedaf3
3497db5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os

# βœ… Set environment variable before importing easyocr
os.environ["EASYOCR_HOME"] = "/tmp/.easyocr"

import cv2
import numpy as np
import easyocr
from PIL import Image

# βœ… Initialize EasyOCR reader once (only once, with safe storage dir)
try:
    reader = easyocr.Reader(['en'], download_enabled=True)
    print("βœ… EasyOCR initialized")
    ocr_available = True
except Exception as e:
    print(f"❌ EasyOCR initialization failed: {str(e)}")
    print("Text extraction may not work properly.")
    ocr_available = False

def extract_text_from_image(image_path):
    try:
        print(f"πŸ“‚ Reading image from: {image_path}")
        if not ocr_available:
            print("❌ EasyOCR not available.")
            return "Text extraction failed. Please enter text manually."

        image = cv2.imread(image_path)
        if image is None:
            print("❌ cv2 could not read image.")
            return "Image could not be read."

        processed_image = preprocess_image(image)
        temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}")
        cv2.imwrite(temp_path, processed_image)
        print(f"πŸ–ΌοΈ Temp processed image saved at: {temp_path}")

        results = reader.readtext(temp_path)
        print("πŸ” OCR Raw Output:", results)

        os.remove(temp_path)

        text = ' '.join([result[1] for result in results]).strip()
        if not text:
            print("⚠️ No text found, retrying with original image...")
            results = reader.readtext(image_path)
            text = ' '.join([result[1] for result in results]).strip()

        print("βœ… Extracted Text:", text)
        return text or "Text extraction failed. Please enter text manually."

    except Exception as e:
        print(f"OCR failed: {str(e)}")
        return "Text extraction failed. Please enter text manually."