yeswanthvarma commited on
Commit
21202e9
Β·
verified Β·
1 Parent(s): 7a89229

Update utils/image_processor.py

Browse files
Files changed (1) hide show
  1. utils/image_processor.py +12 -28
utils/image_processor.py CHANGED
@@ -5,46 +5,30 @@ from PIL import Image
5
  import torch
6
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
7
 
8
- # βœ… Load TrOCR model and processor once
9
  try:
10
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
11
- model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
12
- model.eval()
13
  print("βœ… TrOCR model loaded.")
14
- trocr_available = True
15
  except Exception as e:
16
- print(f"❌ Failed to load TrOCR: {e}")
17
- trocr_available = False
18
-
19
- def preprocess_image(image):
20
- """
21
- Preprocess image for OCR: convert to grayscale and enhance contrast.
22
- """
23
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
24
- denoised = cv2.fastNlMeansDenoising(gray, h=10)
25
- processed = cv2.adaptiveThreshold(
26
- denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
27
- cv2.THRESH_BINARY, 11, 2
28
- )
29
- return processed
30
 
31
  def extract_text_from_image(image_path):
32
- """
33
- Extract handwritten text from an image using TrOCR
34
- """
35
  try:
36
- print(f"πŸ“‚ Reading image from: {image_path}") # βœ… log file path
 
37
 
38
  image = Image.open(image_path).convert("RGB")
39
  pixel_values = processor(images=image, return_tensors="pt").pixel_values
40
 
41
  generated_ids = trocr_model.generate(pixel_values)
42
- text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
43
-
44
- print(f"πŸ“ Extracted Text from {os.path.basename(image_path)}: {text}") # βœ… PRINT EXTRACTED TEXT
45
 
46
- return text or "Text extraction failed."
47
 
48
  except Exception as e:
49
- print(f"❌ OCR failed on {image_path}: {str(e)}")
50
- return "Text extraction failed."
 
5
  import torch
6
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
7
 
8
+ # βœ… Load TrOCR processor and model once
9
  try:
10
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
11
+ trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
 
12
  print("βœ… TrOCR model loaded.")
 
13
  except Exception as e:
14
+ print(f"❌ Failed to load TrOCR model: {e}")
15
+ processor = None
16
+ trocr_model = None
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def extract_text_from_image(image_path):
19
+ print(f"πŸ“‚ Reading image from: {image_path}")
 
 
20
  try:
21
+ if trocr_model is None or processor is None:
22
+ raise RuntimeError("TrOCR model or processor not loaded.")
23
 
24
  image = Image.open(image_path).convert("RGB")
25
  pixel_values = processor(images=image, return_tensors="pt").pixel_values
26
 
27
  generated_ids = trocr_model.generate(pixel_values)
28
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
29
 
30
+ return generated_text.strip() if generated_text else "Text extraction failed."
31
 
32
  except Exception as e:
33
+ print(f"❌ OCR failed on {image_path}: {e}")
34
+ return "Text extraction failed."