yeswanthvarma commited on
Commit
1a1c667
Β·
verified Β·
1 Parent(s): 23fad62

Update utils/image_processor.py

Browse files
Files changed (1) hide show
  1. utils/image_processor.py +38 -35
utils/image_processor.py CHANGED
@@ -1,58 +1,61 @@
1
- import torch
2
  import cv2
3
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
4
  from PIL import Image
5
- import os
 
6
 
7
- # Load processor and model only once
8
  try:
9
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
10
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
11
- model.to("cpu") # You can set to "cuda" if running with GPU
12
- print("βœ… TrOCR model loaded successfully.")
13
- ocr_available = True
14
  except Exception as e:
15
- print(f"❌ TrOCR initialization failed: {str(e)}")
16
- ocr_available = False
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def extract_text_from_image(image_path):
19
  """
20
- Extract text from an image file using EasyOCR or TrOCR (if integrated).
21
  """
22
  try:
 
 
 
23
  print(f"πŸ“‚ Reading image from: {image_path}")
24
 
25
- if not ocr_available:
26
- raise ValueError("EasyOCR is not available")
27
-
28
  image = cv2.imread(image_path)
29
  if image is None:
30
- raise ValueError(f"Could not read image at {image_path}")
31
-
32
- processed_image = preprocess_image(image)
33
-
34
- temp_path = os.path.join(os.path.dirname(image_path), f"temp_{os.path.basename(image_path)}")
35
- cv2.imwrite(temp_path, processed_image)
36
 
37
- results = reader.readtext(temp_path)
38
-
39
- try:
40
- os.remove(temp_path)
41
- except:
42
- pass
43
 
44
- text = ' '.join([result[1] for result in results]).strip()
 
45
 
46
- if not text:
47
- results = reader.readtext(image_path)
48
- text = ' '.join([result[1] for result in results]).strip()
 
49
 
50
- # βœ… Log extracted text
51
- print(f"πŸ“ Extracted text from {os.path.basename(image_path)}:\n{text}\n")
52
 
53
- return text or "Text extraction failed. Please enter text manually."
54
-
55
  except Exception as e:
56
- print(f"OCR failed: {str(e)}")
57
  return "Text extraction failed. Please enter text manually."
58
-
 
1
+ import os
2
  import cv2
3
+ import numpy as np
4
  from PIL import Image
5
+ import torch
6
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
7
 
8
+ # βœ… Load TrOCR model and processor once
9
  try:
10
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
11
  model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
12
+ model.eval()
13
+ print("βœ… TrOCR model loaded.")
14
+ trocr_available = True
15
  except Exception as e:
16
+ print(f"❌ Failed to load TrOCR: {e}")
17
+ trocr_available = False
18
+
19
+ def preprocess_image(image):
20
+ """
21
+ Preprocess image for OCR: convert to grayscale and enhance contrast.
22
+ """
23
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
24
+ denoised = cv2.fastNlMeansDenoising(gray, h=10)
25
+ processed = cv2.adaptiveThreshold(
26
+ denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
27
+ cv2.THRESH_BINARY, 11, 2
28
+ )
29
+ return processed
30
 
31
  def extract_text_from_image(image_path):
32
  """
33
+ Extract text using TrOCR from a handwritten image
34
  """
35
  try:
36
+ if not trocr_available:
37
+ raise ValueError("TrOCR is not available.")
38
+
39
  print(f"πŸ“‚ Reading image from: {image_path}")
40
 
41
+ # Read and preprocess image
 
 
42
  image = cv2.imread(image_path)
43
  if image is None:
44
+ raise ValueError(f"Could not load image: {image_path}")
 
 
 
 
 
45
 
46
+ processed_image = preprocess_image(image)
47
+ pil_image = Image.fromarray(processed_image).convert("RGB")
 
 
 
 
48
 
49
+ # TrOCR expects pixel values between 0-1
50
+ pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values
51
 
52
+ # Disable gradient for inference
53
+ with torch.no_grad():
54
+ generated_ids = model.generate(pixel_values)
55
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
56
 
57
+ return generated_text.strip() or "Text extraction failed. Please enter text manually."
 
58
 
 
 
59
  except Exception as e:
60
+ print(f"OCR failed: {e}")
61
  return "Text extraction failed. Please enter text manually."