from transformers import DetrImageProcessor, DetrForObjectDetection from PIL import Image, ImageDraw import torch import gradio as gr # Load model and processor processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") FACE_CLASS_INDEX = 1 # COCO class ID for 'person' def detect_faces(img: Image.Image): # Make a copy to draw on img_draw = img.copy() draw = ImageDraw.Draw(img_draw) # Preprocess and predict inputs = processor(images=img, return_tensors="pt") outputs = model(**inputs) # Get results target_sizes = torch.tensor([img.size[::-1]]) results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.8)[0] count = 0 for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): if label.item() == FACE_CLASS_INDEX: count += 1 box = [round(i, 2) for i in box.tolist()] draw.rectangle(box, outline="lime", width=3) draw.text((box[0], box[1] - 10), f"{score:.2f}", fill="lime") return img_draw, f"Total Persons Detected: {count}" # Gradio Interface iface = gr.Interface( fn=detect_faces, inputs=gr.Image(type="pil"), outputs=[gr.Image(type="pil"), gr.Text()], title="Person Detection with DETR", description="Uses DETR model to detect people (class 1 - COCO dataset). Note: not specialized for face detection." ) iface.launch()