import gradio as gr
from transformers import pipeline
from PIL import Image, ImageDraw, ImageFont
import torch
import spaces
import numpy as np

# Modèles optimisés pour le temps réel
REALTIME_MODELS = {
    "YOLOS Tiny (ultra-rapide)": "hustvl/yolos-tiny",
    "DETR ResNet-50": "facebook/detr-resnet-50",
    "YOLOS Small": "hustvl/yolos-small",
    "Conditional DETR": "microsoft/conditional-detr-resnet-50"
}

# Variables globales pour le cache
current_detector = None
current_model_name = None

@spaces.GPU
def load_detector(model_name):
    """Charge le détecteur avec cache"""
    global current_detector, current_model_name
    
    if current_model_name != model_name:
        print(f"🔄 Chargement du modèle: {model_name}")
        model_id = REALTIME_MODELS[model_name]
        current_detector = pipeline(
            "object-detection",
            model=model_id,
            device=0 if torch.cuda.is_available() else -1
        )
        current_model_name = model_name
        print(f"✅ Modèle chargé: {model_name}")
    
    return current_detector

@spaces.GPU
def detect_objects_live(image, model_choice, confidence_threshold):
    """
    Fonction principale de détection pour le streaming live
    """
    if image is None:
        return None
    
    try:
        # Charger le détecteur
        detector = load_detector(model_choice)
        
        # Convertir en PIL Image si c'est un array numpy
        if isinstance(image, np.ndarray):
            pil_image = Image.fromarray(image)
        else:
            pil_image = image
        
        # Redimensionner pour optimiser la vitesse
        original_size = pil_image.size
        max_size = 480  # Taille réduite pour plus de vitesse
        
        if max(original_size) > max_size:
            ratio = max_size / max(original_size)
            new_size = (int(original_size[0] * ratio), int(original_size[1] * ratio))
            resized_image = pil_image.resize(new_size)
        else:
            resized_image = pil_image
            ratio = 1.0
        
        # Effectuer la détection
        detections = detector(resized_image)
        
        # Filtrer par confiance
        filtered_detections = [
            det for det in detections 
            if det['score'] >= confidence_threshold
        ]
        
        print(f"🎯 Détections trouvées: {len(filtered_detections)}")
        
        # Ajuster les coordonnées à la taille originale
        for det in filtered_detections:
            if ratio != 1.0:
                det['box']['xmin'] = int(det['box']['xmin'] / ratio)
                det['box']['ymin'] = int(det['box']['ymin'] / ratio)
                det['box']['xmax'] = int(det['box']['xmax'] / ratio)
                det['box']['ymax'] = int(det['box']['ymax'] / ratio)
        
        # Dessiner les détections
        annotated_image = draw_detections(pil_image, filtered_detections)
        
        return annotated_image
        
    except Exception as e:
        print(f"❌ Erreur: {e}")
        return image

def draw_detections(image, detections):
    """Dessine les boîtes de détection sur l'image"""
    if not detections:
        return image
    
    # Créer une copie pour dessiner
    img_copy = image.copy()
    draw = ImageDraw.Draw(img_copy)
    
    # Couleurs vives pour les détections
    colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF"]
    
    try:
        # Essayer de charger une police
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
    except:
        font = ImageFont.load_default()
    
    for i, detection in enumerate(detections):
        box = detection['box']
        label = detection['label']
        score = detection['score']
        
        # Coordonnées de la boîte
        x1, y1 = box['xmin'], box['ymin']
        x2, y2 = box['xmax'], box['ymax']
        
        # Couleur pour cette détection
        color = colors[i % len(colors)]
        
        # Dessiner la boîte (plus épaisse pour être visible)
        draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
        
        # Texte du label
        text = f"{label} ({score:.2f})"
        
        # Fond du texte pour la lisibilité
        bbox = draw.textbbox((x1, y1-30), text, font=font)
        draw.rectangle([bbox[0]-2, bbox[1]-2, bbox[2]+2, bbox[3]+2], fill=color)
        
        # Texte en blanc
        draw.text((x1, y1-30), text, fill="white", font=font)
    
    return img_copy

# Interface Gradio simplifiée
with gr.Blocks(title="🎥 Détection Live", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown("""
    # 🎥 Détection d'Objets en Temps Réel
    
    **Autorisez l'accès à votre webcam** et la détection se fera automatiquement !
    """)
    
    with gr.Row():
        with gr.Column():
            # Contrôles
            model_dropdown = gr.Dropdown(
                choices=list(REALTIME_MODELS.keys()),
                value="YOLOS Tiny (ultra-rapide)",
                label="🤖 Modèle de détection"
            )
            
            confidence_slider = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.5,
                step=0.1,
                label="🎯 Seuil de confiance minimum"
            )
        
        with gr.Column():
            gr.Markdown("""
            ### 📊 Info
            - **Streaming automatique** activé
            - **Détection en continu** sur chaque frame
            - **Ajustements en temps réel**
            """)
    
    # Interface de streaming principal
    webcam_interface = gr.Interface(
        fn=detect_objects_live,
        inputs=[
            gr.Image(sources=["webcam"], streaming=True, label="📹 Webcam Live"),
            model_dropdown,
            confidence_slider
        ],
        outputs=gr.Image(streaming=True, label="🎯 Détection en Temps Réel"),
        live=True,
        allow_flagging="never",
        title=None,
        description="La détection se fait automatiquement sur chaque frame de la webcam"
    )

if __name__ == "__main__":
    demo.launch()