import gradio as gr
from transformers import pipeline
from PIL import Image, ImageDraw, ImageFont
import torch
import spaces
import numpy as np

# Modèles optimisés pour le temps réel
REALTIME_MODELS = {
    "YOLOS (tiny-sized) model": "hustvl/yolos-tiny",
    "RT-DETR": "PekingU/rtdetr_r18vd"
}

# Variables globales
current_detector = None
current_model_name = None

@spaces.GPU
def load_detector(model_name):
    """Charge le détecteur avec cache"""
    global current_detector, current_model_name
    
    if current_model_name != model_name:
        print(f"🔄 Chargement du modèle: {model_name}")
        model_id = REALTIME_MODELS[model_name]
        current_detector = pipeline(
            "object-detection",
            model=model_id,
            verbose=False,
            device=0 if torch.cuda.is_available() else -1
        )
        current_model_name = model_name
        print(f"✅ Modèle chargé: {model_name}")
    
    return current_detector

@spaces.GPU
def process_webcam(image, model_choice, confidence_threshold):
    """Traite l'image de la webcam"""
    print(f"🎥 Frame reçue - Type: {type(image)}, Shape: {getattr(image, 'size', 'N/A')}")
    
    if image is None:
        print("❌ Image None reçue - webcam pas encore initialisée")
        # Retourner une image d'attente
        waiting_img = Image.new('RGB', (640, 480), color='#1f2937')
        draw = ImageDraw.Draw(waiting_img)
        try:
            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 32)
        except:
            font = ImageFont.load_default()
        
        text = "🎥 En attente de la webcam..."
        bbox = draw.textbbox((0, 0), text, font=font)
        x = (640 - bbox[2]) // 2
        y = (480 - bbox[3]) // 2
        draw.text((x, y), text, fill='white', font=font)
        return waiting_img
    
    try:
        # S'assurer qu'on a une image PIL
        if isinstance(image, np.ndarray):
            pil_image = Image.fromarray(image)
        else:
            pil_image = image
            
        print(f"📏 Taille image: {pil_image.size}")
        
        # Charger le détecteur
        detector = load_detector(model_choice)
        
        # Redimensionner pour la vitesse
        max_size = 640
        original_size = pil_image.size
        
        if max(original_size) > max_size:
            ratio = max_size / max(original_size)
            new_size = (int(original_size[0] * ratio), int(original_size[1] * ratio))
            resized_image = pil_image.resize(new_size)
        else:
            resized_image = pil_image
            ratio = 1.0
        
        print(f"🔍 Lancement détection avec seuil: {confidence_threshold}")
        
        # Détection
        detections = detector(resized_image)
        print(f"🎯 Détections brutes: {len(detections)}")
        
        # Filtrer par confiance
        filtered_detections = [
            det for det in detections 
            if det.get('score', 0) >= confidence_threshold
        ]
        
        print(f"✅ Détections filtrées: {len(filtered_detections)}")
        for det in filtered_detections:
            print(f"   - {det['label']}: {det['score']:.3f}")
        
        # Ajuster les coordonnées
        for det in filtered_detections:
            if ratio != 1.0:
                det['box']['xmin'] = int(det['box']['xmin'] / ratio)
                det['box']['ymin'] = int(det['box']['ymin'] / ratio)
                det['box']['xmax'] = int(det['box']['xmax'] / ratio)
                det['box']['ymax'] = int(det['box']['ymax'] / ratio)
        
        # Dessiner les détections
        result_image = draw_detections(pil_image, filtered_detections)
        
        print(f"🎨 Image annotée créée")
        return result_image
        
    except Exception as e:
        print(f"❌ Erreur dans process_webcam: {e}")
        import traceback
        traceback.print_exc()
        return image

def draw_detections(image, detections):
    """Dessine les détections avec des couleurs vives"""
    img_copy = image.copy()
    draw = ImageDraw.Draw(img_copy)
    
    # Couleurs très visibles
    colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF"]
    
    # Police par défaut
    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 24)
    except:
        font = ImageFont.load_default()
    
    for i, detection in enumerate(detections):
        box = detection['box']
        label = detection['label']
        score = detection['score']
        
        x1, y1 = box['xmin'], box['ymin']
        x2, y2 = box['xmax'], box['ymax']
        
        color = colors[i % len(colors)]
        
        # Boîte très visible
        draw.rectangle([x1, y1, x2, y2], outline=color, width=5)
        
        # Texte avec fond
        text = f"{label} {score:.2f}"
        bbox = draw.textbbox((x1, y1-35), text, font=font)
        draw.rectangle([bbox[0]-5, bbox[1]-5, bbox[2]+5, bbox[3]+5], fill=color)
        draw.text((x1, y1-35), text, fill="white", font=font)
    
    return img_copy

# Interface avec activation webcam forcée
demo = gr.Interface(
    fn=process_webcam,
    inputs=[
        gr.Image(
            sources=["webcam"], 
            streaming=True, 
            type="pil",
            show_download_button=False,
            interactive=True
        ),
        gr.Dropdown(
            choices=list(REALTIME_MODELS.keys()),
            value="YOLOS (tiny-sized) model",
            label="Modèle"
        ),
        gr.Slider(0.1, 1.0, 0.1, step=0.1, label="Confiance")
    ],
    outputs=gr.Image(streaming=True, type="pil", show_download_button=False),
    live=True,
    title="🎥 Détection Live",
    description="🚨 IMPORTANT: Activez la webcam en cliquant sur l'icône caméra dans la zone Image",
    flagging_mode="never"
)

if __name__ == "__main__":
    demo.launch()