File size: 1,693 Bytes
1a1c667
23fad62
1a1c667
3497db5
1a1c667
 
436a6c3
1a1c667
3497db5
706dc31
 
1a1c667
 
 
3497db5
1a1c667
 
 
 
 
 
 
 
 
 
 
 
 
 
436a6c3
5888c6a
706dc31
1471ff9
706dc31
5888c6a
1471ff9
1a1c667
1471ff9
 
8ceac8e
1471ff9
 
3497db5
1471ff9
986c935
1471ff9
 
5888c6a
1471ff9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import cv2
import numpy as np
from PIL import Image
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# βœ… Load TrOCR model and processor once
try:
    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
    model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
    model.eval()
    print("βœ… TrOCR model loaded.")
    trocr_available = True
except Exception as e:
    print(f"❌ Failed to load TrOCR: {e}")
    trocr_available = False

def preprocess_image(image):
    """
    Preprocess image for OCR: convert to grayscale and enhance contrast.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    denoised = cv2.fastNlMeansDenoising(gray, h=10)
    processed = cv2.adaptiveThreshold(
        denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 11, 2
    )
    return processed

def extract_text_from_image(image_path):
    """
    Extract handwritten text from an image using TrOCR
    """
    try:
        print(f"πŸ“‚ Reading image from: {image_path}")  # βœ… log file path

        image = Image.open(image_path).convert("RGB")
        pixel_values = processor(images=image, return_tensors="pt").pixel_values

        generated_ids = trocr_model.generate(pixel_values)
        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

        print(f"πŸ“ Extracted Text from {os.path.basename(image_path)}: {text}")  # βœ… PRINT EXTRACTED TEXT

        return text or "Text extraction failed."
    
    except Exception as e:
        print(f"❌ OCR failed on {image_path}: {str(e)}")
        return "Text extraction failed."