|
import os
|
|
import requests
|
|
import base64
|
|
import json
|
|
import gradio as gr
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
|
|
API_KEY = os.environ.get("API_KEY")
|
|
OCR_API_URL = os.environ.get("OCR_API_URL")
|
|
OCR_TASK = os.environ.get("OCR_TASK")
|
|
OCR_FUNCTION_NAME = os.environ.get("OCR_FUNCTION_NAME")
|
|
|
|
def call_ocr(image_path: str):
|
|
|
|
with open(image_path, "rb") as image_file:
|
|
base64_string = base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
|
payload = {
|
|
"image": base64_string,
|
|
"task": OCR_TASK,
|
|
"function_name": OCR_FUNCTION_NAME
|
|
}
|
|
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Accept": "application/json",
|
|
"Authorization": f"Basic {API_KEY}"
|
|
}
|
|
|
|
response = requests.post(OCR_API_URL, json=payload, headers=headers)
|
|
response.raise_for_status()
|
|
response_json = response.json()
|
|
|
|
|
|
data = response_json.get('data', {}).get(OCR_TASK, {})
|
|
quad_boxes = data.get('quad_boxes', [])
|
|
labels = data.get('labels', [])
|
|
|
|
|
|
cleaned_labels = [label.replace("</s>", "").strip() for label in labels]
|
|
|
|
|
|
image = Image.open(image_path)
|
|
draw = ImageDraw.Draw(image)
|
|
|
|
|
|
try:
|
|
font = ImageFont.truetype("arial.ttf", 16)
|
|
except IOError:
|
|
font = None
|
|
|
|
for quad, label in zip(quad_boxes, cleaned_labels):
|
|
polygon = [(quad[i], quad[i+1]) for i in range(0, len(quad), 2)]
|
|
draw.polygon(polygon, outline="red", width=2)
|
|
draw.text((quad[0], quad[1]), label, fill="blue", font=font)
|
|
|
|
output_labels = "\n".join(cleaned_labels)
|
|
return image, output_labels
|
|
|
|
iface = gr.Interface(
|
|
fn=call_ocr,
|
|
inputs=gr.Image(type="filepath", label="Upload Image"),
|
|
outputs=[gr.Image(label="Annotated Image"), gr.Textbox(label="Detected Labels")],
|
|
title="Optical Character Recognition (OCR)"
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
iface.launch(debug=True)
|
|
|