from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image, ImageDraw
import torch
import gradio as gr

# Load model and processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

FACE_CLASS_INDEX = 1  # COCO class ID for 'person'

def detect_faces(img: Image.Image):
    # Make a copy to draw on
    img_draw = img.copy()
    draw = ImageDraw.Draw(img_draw)

    # Preprocess and predict
    inputs = processor(images=img, return_tensors="pt")
    outputs = model(**inputs)

    # Get results
    target_sizes = torch.tensor([img.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.8)[0]

    count = 0
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        if label.item() == FACE_CLASS_INDEX:
            count += 1
            box = [round(i, 2) for i in box.tolist()]
            draw.rectangle(box, outline="lime", width=3)
            draw.text((box[0], box[1] - 10), f"{score:.2f}", fill="lime")

    return img_draw, f"Total Persons Detected: {count}"

# Gradio Interface
iface = gr.Interface(
    fn=detect_faces,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Image(type="pil"), gr.Text()],
    title="Person Detection with DETR",
    description="Uses DETR model to detect people (class 1 - COCO dataset). Note: not specialized for face detection."
)

iface.launch()