Spaces:

muhammadsalmanalfaridzi
/

Optical-Character-Recognition

Running

App Files Files Community

Optical-Character-Recognition / app.py

muhammadsalmanalfaridzi

Upload 2 files

792db51 verified 2 months ago

raw

history blame contribute delete

2.22 kB

	import os
	import requests
	import base64
	import json
	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont

	# Load secrets from environment variables (configure these in your Hugging Face Space)
	API_KEY = os.environ.get("API_KEY")
	OCR_API_URL = os.environ.get("OCR_API_URL")
	OCR_TASK = os.environ.get("OCR_TASK")
	OCR_FUNCTION_NAME = os.environ.get("OCR_FUNCTION_NAME")

	def call_ocr(image_path: str):
	# Read and encode the image to base64
	with open(image_path, "rb") as image_file:
	base64_string = base64.b64encode(image_file.read()).decode('utf-8')

	payload = {
	"image": base64_string,
	"task": OCR_TASK,
	"function_name": OCR_FUNCTION_NAME
	}

	headers = {
	"Content-Type": "application/json",
	"Accept": "application/json",
	"Authorization": f"Basic {API_KEY}"
	}

	response = requests.post(OCR_API_URL, json=payload, headers=headers)
	response.raise_for_status()
	response_json = response.json()

	# Extract quad_boxes and labels
	data = response_json.get('data', {}).get(OCR_TASK, {})
	quad_boxes = data.get('quad_boxes', [])
	labels = data.get('labels', [])

	# Clean up labels
	cleaned_labels = [label.replace("</s>", "").strip() for label in labels]

	# Open the original image and draw boxes
	image = Image.open(image_path)
	draw = ImageDraw.Draw(image)

	# Use a true-type font if available
	try:
	font = ImageFont.truetype("arial.ttf", 16)
	except IOError:
	font = None

	for quad, label in zip(quad_boxes, cleaned_labels):
	polygon = [(quad[i], quad[i+1]) for i in range(0, len(quad), 2)]
	draw.polygon(polygon, outline="red", width=2)
	draw.text((quad[0], quad[1]), label, fill="blue", font=font)

	output_labels = "\n".join(cleaned_labels)
	return image, output_labels

	iface = gr.Interface(
	fn=call_ocr,
	inputs=gr.Image(type="filepath", label="Upload Image"),
	outputs=[gr.Image(label="Annotated Image"), gr.Textbox(label="Detected Labels")],
	title="Optical Character Recognition (OCR)"
	)

	if __name__ == "__main__":
	iface.launch(debug=True)