Spaces:

pratikshahp
/

object-detection-app

Sleeping

File size: 1,518 Bytes

f9d3b76
 
fcb3849
f9d3b76
fcb3849
43f3f52
f9d3b76
 
 
43f3f52
f9d3b76
 
43f3f52
 
 
 
 
f9d3b76
eb9abdb
 
43f3f52
f9d3b76
43f3f52
eb9abdb
43f3f52
5f8adcf
43f3f52
 
f9d3b76
43f3f52
 
f9d3b76
43f3f52
f9d3b76
43f3f52
f9d3b76
 
 
43f3f52
 
 
f9d3b76

from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image, ImageDraw
import torch
import gradio as gr

# Load model and processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

FACE_CLASS_INDEX = 1  # COCO class ID for 'person'

def detect_faces(img: Image.Image):
    # Make a copy to draw on
    img_draw = img.copy()
    draw = ImageDraw.Draw(img_draw)

    # Preprocess and predict
    inputs = processor(images=img, return_tensors="pt")
    outputs = model(**inputs)

    # Get results
    target_sizes = torch.tensor([img.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.8)[0]

    count = 0
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        if label.item() == FACE_CLASS_INDEX:
            count += 1
            box = [round(i, 2) for i in box.tolist()]
            draw.rectangle(box, outline="lime", width=3)
            draw.text((box[0], box[1] - 10), f"{score:.2f}", fill="lime")

    return img_draw, f"Total Persons Detected: {count}"

# Gradio Interface
iface = gr.Interface(
    fn=detect_faces,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Image(type="pil"), gr.Text()],
    title="Person Detection with DETR",
    description="Uses DETR model to detect people (class 1 - COCO dataset). Note: not specialized for face detection."
)

iface.launch()