Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
864e5c4
1
Parent(s):
c2a6750
Refactor OCR processing by introducing a GPU-accelerated predict function and updating the run_hf_ocr method to utilize it
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
|
4 |
import os
|
5 |
import torch
|
6 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
|
7 |
-
|
8 |
# --- Global Model and Processor Initialization ---
|
9 |
# Load the OCR model and processor once when the app starts
|
10 |
try:
|
@@ -85,11 +85,7 @@ def run_hf_ocr(image_path):
|
|
85 |
# [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
|
86 |
# This suggests the pipeline is returning a conversational style output.
|
87 |
# We will try to call the pipeline with the image and prompt directly.
|
88 |
-
ocr_results =
|
89 |
-
pil_image,
|
90 |
-
prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
|
91 |
-
# The pipeline should handle formatting this into messages if needed by the model.
|
92 |
-
)
|
93 |
|
94 |
# Parse the output based on the user's example structure
|
95 |
if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
|
@@ -129,6 +125,15 @@ def run_hf_ocr(image_path):
|
|
129 |
except Exception as e:
|
130 |
print(f"Error during Hugging Face OCR: {e}")
|
131 |
return f"Error during Hugging Face OCR: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
# --- Gradio Interface Function ---
|
134 |
|
|
|
4 |
import os
|
5 |
import torch
|
6 |
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
|
7 |
+
import spaces
|
8 |
# --- Global Model and Processor Initialization ---
|
9 |
# Load the OCR model and processor once when the app starts
|
10 |
try:
|
|
|
85 |
# [{'generated_text': [{'role': 'user', ...}, {'role': 'assistant', 'content': "TEXT..."}]}]
|
86 |
# This suggests the pipeline is returning a conversational style output.
|
87 |
# We will try to call the pipeline with the image and prompt directly.
|
88 |
+
ocr_results = predict(pil_image)
|
|
|
|
|
|
|
|
|
89 |
|
90 |
# Parse the output based on the user's example structure
|
91 |
if isinstance(ocr_results, list) and ocr_results and 'generated_text' in ocr_results[0]:
|
|
|
125 |
except Exception as e:
|
126 |
print(f"Error during Hugging Face OCR: {e}")
|
127 |
return f"Error during Hugging Face OCR: {str(e)}"
|
128 |
+
@spaces.GPU
|
129 |
+
def predict(pil_image):
|
130 |
+
ocr_results = HF_PIPE(
|
131 |
+
pil_image,
|
132 |
+
prompt="Return the plain text representation of this document as if you were reading it naturally.\n"
|
133 |
+
# The pipeline should handle formatting this into messages if needed by the model.
|
134 |
+
)
|
135 |
+
|
136 |
+
return ocr_results
|
137 |
|
138 |
# --- Gradio Interface Function ---
|
139 |
|