import gradio as gr from transformers import pipeline from PIL import Image, ImageDraw, ImageFont import torch import spaces import numpy as np # Modèles optimisés pour le temps réel REALTIME_MODELS = { "YOLOS (tiny-sized) model": "hustvl/yolos-tiny", "RT-DETR": "PekingU/rtdetr_r18vd" } # Variables globales current_detector = None current_model_name = None @spaces.GPU def load_detector(model_name): """Charge le détecteur avec cache""" global current_detector, current_model_name if current_model_name != model_name: print(f"🔄 Chargement du modèle: {model_name}") model_id = REALTIME_MODELS[model_name] current_detector = pipeline( "object-detection", model=model_id, verbose=False, device=0 if torch.cuda.is_available() else -1 ) current_model_name = model_name print(f"✅ Modèle chargé: {model_name}") return current_detector @spaces.GPU def process_webcam(image, model_choice, confidence_threshold): """Traite l'image de la webcam""" print(f"🎥 Frame reçue - Type: {type(image)}, Shape: {getattr(image, 'size', 'N/A')}") if image is None: print("❌ Image None reçue - webcam pas encore initialisée") # Retourner une image d'attente waiting_img = Image.new('RGB', (640, 480), color='#1f2937') draw = ImageDraw.Draw(waiting_img) try: font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 32) except: font = ImageFont.load_default() text = "🎥 En attente de la webcam..." bbox = draw.textbbox((0, 0), text, font=font) x = (640 - bbox[2]) // 2 y = (480 - bbox[3]) // 2 draw.text((x, y), text, fill='white', font=font) return waiting_img try: # S'assurer qu'on a une image PIL if isinstance(image, np.ndarray): pil_image = Image.fromarray(image) else: pil_image = image print(f"📏 Taille image: {pil_image.size}") # Charger le détecteur detector = load_detector(model_choice) # Redimensionner pour la vitesse max_size = 640 original_size = pil_image.size if max(original_size) > max_size: ratio = max_size / max(original_size) new_size = (int(original_size[0] * ratio), int(original_size[1] * ratio)) resized_image = pil_image.resize(new_size) else: resized_image = pil_image ratio = 1.0 print(f"🔍 Lancement détection avec seuil: {confidence_threshold}") # Détection detections = detector(resized_image) print(f"🎯 Détections brutes: {len(detections)}") # Filtrer par confiance filtered_detections = [ det for det in detections if det.get('score', 0) >= confidence_threshold ] print(f"✅ Détections filtrées: {len(filtered_detections)}") for det in filtered_detections: print(f" - {det['label']}: {det['score']:.3f}") # Ajuster les coordonnées for det in filtered_detections: if ratio != 1.0: det['box']['xmin'] = int(det['box']['xmin'] / ratio) det['box']['ymin'] = int(det['box']['ymin'] / ratio) det['box']['xmax'] = int(det['box']['xmax'] / ratio) det['box']['ymax'] = int(det['box']['ymax'] / ratio) # Dessiner les détections result_image = draw_detections(pil_image, filtered_detections) print(f"🎨 Image annotée créée") return result_image except Exception as e: print(f"❌ Erreur dans process_webcam: {e}") import traceback traceback.print_exc() return image def draw_detections(image, detections): """Dessine les détections avec des couleurs vives""" img_copy = image.copy() draw = ImageDraw.Draw(img_copy) # Couleurs très visibles colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF"] # Police par défaut try: font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 24) except: font = ImageFont.load_default() for i, detection in enumerate(detections): box = detection['box'] label = detection['label'] score = detection['score'] x1, y1 = box['xmin'], box['ymin'] x2, y2 = box['xmax'], box['ymax'] color = colors[i % len(colors)] # Boîte très visible draw.rectangle([x1, y1, x2, y2], outline=color, width=5) # Texte avec fond text = f"{label} {score:.2f}" bbox = draw.textbbox((x1, y1-35), text, font=font) draw.rectangle([bbox[0]-5, bbox[1]-5, bbox[2]+5, bbox[3]+5], fill=color) draw.text((x1, y1-35), text, fill="white", font=font) return img_copy # Interface avec activation webcam forcée demo = gr.Interface( fn=process_webcam, inputs=[ gr.Image( sources=["webcam"], streaming=True, type="pil", show_download_button=False, interactive=True ), gr.Dropdown( choices=list(REALTIME_MODELS.keys()), value="YOLOS (tiny-sized) model", label="Modèle" ), gr.Slider(0.1, 1.0, 0.1, step=0.1, label="Confiance") ], outputs=gr.Image(streaming=True, type="pil", show_download_button=False), live=True, title="🎥 Détection Live", description="🚨 IMPORTANT: Activez la webcam en cliquant sur l'icône caméra dans la zone Image", flagging_mode="never" ) if __name__ == "__main__": demo.launch()