davanstrien HF Staff commited on
Commit
864e5c4
·
1 Parent(s): c2a6750

Refactor OCR processing by introducing a GPU-accelerated predict function and updating the run_hf_ocr method to utilize it

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
4
  import os
5
  import torch
6
  from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
7
-
8
  # --- Global Model and Processor Initialization ---
9
  # Load the OCR model and processor once when the app starts
10
  try:
@@ -85,11 +85,7 @@ def run_hf_ocr(image_path):
85
  # [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
86
  # This suggests the pipeline is returning a conversational style output.
87
  # We will try to call the pipeline with the image and prompt directly.
88
- ocr_results = HF_PIPE(
89
- pil_image,
90
- prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
91
- # The pipeline should handle formatting this into messages if needed by the model.
92
- )
93
 
94
  # Parse the output based on the user's example structure
95
  if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
@@ -129,6 +125,15 @@ def run_hf_ocr(image_path):
129
  except Exception as e:
130
  print(f"Error during Hugging Face OCR: {e}")
131
  return f"Error during Hugging Face OCR: {str(e)}"
 
 
 
 
 
 
 
 
 
132
 
133
  # --- Gradio Interface Function ---
134
 
 
4
  import os
5
  import torch
6
  from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
7
+ import spaces
8
  # --- Global Model and Processor Initialization ---
9
  # Load the OCR model and processor once when the app starts
10
  try:
 
85
  # [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
86
  # This suggests the pipeline is returning a conversational style output.
87
  # We will try to call the pipeline with the image and prompt directly.
88
+ ocr_results = predict(pil_image)
 
 
 
 
89
 
90
  # Parse the output based on the user's example structure
91
  if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
 
125
  except Exception as e:
126
  print(f"Error during Hugging Face OCR: {e}")
127
  return f"Error during Hugging Face OCR: {str(e)}"
128
+ @spaces.GPU
129
+ def predict(pil_image):
130
+ ocr_results = HF_PIPE(
131
+ pil_image,
132
+ prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
133
+ # The pipeline should handle formatting this into messages if needed by the model.
134
+ )
135
+
136
+ return ocr_results
137
 
138
  # --- Gradio Interface Function ---
139