yeswanthvarma commited on
Commit
cd39c9c
Β·
verified Β·
1 Parent(s): b410261

Update utils/image_processor.py

Browse files
Files changed (1) hide show
  1. utils/image_processor.py +55 -20
utils/image_processor.py CHANGED
@@ -2,34 +2,69 @@ import os
2
  import cv2
3
  import numpy as np
4
  from PIL import Image
5
- import torch
6
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
7
 
8
- # βœ… Load TrOCR processor and model once
9
- try:
10
- processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
11
- trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
 
 
12
 
13
- print("βœ… TrOCR model loaded.")
 
 
 
 
14
  except Exception as e:
15
- print(f"❌ Failed to load TrOCR model: {e}")
16
- processor = None
17
- trocr_model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract_text_from_image(image_path):
 
 
 
20
  print(f"πŸ“‚ Reading image from: {image_path}")
21
  try:
22
- if trocr_model is None or processor is None:
23
- raise RuntimeError("TrOCR model or processor not loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- image = Image.open(image_path).convert("RGB")
26
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
27
 
28
- generated_ids = trocr_model.generate(pixel_values)
29
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
30
 
31
- return generated_text.strip() if generated_text else "Text extraction failed."
32
-
33
  except Exception as e:
34
- print(f"❌ OCR failed on {image_path}: {e}")
35
- return "Text extraction failed."
 
2
  import cv2
3
  import numpy as np
4
  from PIL import Image
5
+ import easyocr
 
6
 
7
+ # βœ… Safe directory for OCR model storage
8
+ EASYOCR_DIR = "/tmp/.easyocr"
9
+ os.makedirs(EASYOCR_DIR, exist_ok=True)
10
+
11
+ # βœ… Set environment variable before import (optional but safe)
12
+ os.environ["EASYOCR_HOME"] = EASYOCR_DIR
13
 
14
+ # βœ… Initialize EasyOCR
15
+ try:
16
+ reader = easyocr.Reader(['en'], model_storage_directory=EASYOCR_DIR)
17
+ ocr_available = True
18
+ print("βœ… EasyOCR initialized.")
19
  except Exception as e:
20
+ print(f"❌ EasyOCR initialization failed: {str(e)}")
21
+ ocr_available = False
22
+
23
+ def preprocess_image(image):
24
+ """
25
+ Preprocess image to improve OCR accuracy
26
+ """
27
+ if len(image.shape) == 3:
28
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
29
+ else:
30
+ gray = image
31
+ denoised = cv2.fastNlMeansDenoising(gray, h=10)
32
+ processed = cv2.adaptiveThreshold(
33
+ denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
34
+ cv2.THRESH_BINARY, 11, 2
35
+ )
36
+ return processed
37
 
38
  def extract_text_from_image(image_path):
39
+ """
40
+ Extract text from image using EasyOCR
41
+ """
42
  print(f"πŸ“‚ Reading image from: {image_path}")
43
  try:
44
+ if not ocr_available:
45
+ raise ValueError("EasyOCR not available")
46
+
47
+ image = cv2.imread(image_path)
48
+ if image is None:
49
+ raise ValueError(f"Could not read image at {image_path}")
50
+
51
+ processed = preprocess_image(image)
52
+ temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}")
53
+ cv2.imwrite(temp_path, processed)
54
+
55
+ results = reader.readtext(temp_path)
56
+ os.remove(temp_path)
57
+
58
+ text = ' '.join([res[1] for res in results]).strip()
59
 
60
+ # Fallback to original if empty
61
+ if not text:
62
+ results = reader.readtext(image_path)
63
+ text = ' '.join([res[1] for res in results]).strip()
64
 
65
+ print("πŸ“ Extracted Text:", text)
66
+ return text or "Text extraction failed. Please enter text manually."
67
 
 
 
68
  except Exception as e:
69
+ print(f"❌ OCR failed: {e}")
70
+ return "Text extraction failed. Please enter text manually."