hoololi's picture
Upload 2 files
d26af75 verified
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageDraw, ImageFont
import torch
import spaces
import numpy as np
# Modèles optimisés pour le temps réel
REALTIME_MODELS = {
"YOLOS (tiny-sized) model": "hustvl/yolos-tiny",
"RT-DETR": "PekingU/rtdetr_r18vd"
}
# Variables globales
current_detector = None
current_model_name = None
@spaces.GPU
def load_detector(model_name):
"""Charge le détecteur avec cache"""
global current_detector, current_model_name
if current_model_name != model_name:
print(f"🔄 Chargement du modèle: {model_name}")
model_id = REALTIME_MODELS[model_name]
current_detector = pipeline(
"object-detection",
model=model_id,
verbose=False,
device=0 if torch.cuda.is_available() else -1
)
current_model_name = model_name
print(f"✅ Modèle chargé: {model_name}")
return current_detector
@spaces.GPU
def process_webcam(image, model_choice, confidence_threshold):
"""Traite l'image de la webcam"""
print(f"🎥 Frame reçue - Type: {type(image)}, Shape: {getattr(image, 'size', 'N/A')}")
if image is None:
print("❌ Image None reçue - webcam pas encore initialisée")
# Retourner une image d'attente
waiting_img = Image.new('RGB', (640, 480), color='#1f2937')
draw = ImageDraw.Draw(waiting_img)
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 32)
except:
font = ImageFont.load_default()
text = "🎥 En attente de la webcam..."
bbox = draw.textbbox((0, 0), text, font=font)
x = (640 - bbox[2]) // 2
y = (480 - bbox[3]) // 2
draw.text((x, y), text, fill='white', font=font)
return waiting_img
try:
# S'assurer qu'on a une image PIL
if isinstance(image, np.ndarray):
pil_image = Image.fromarray(image)
else:
pil_image = image
print(f"📏 Taille image: {pil_image.size}")
# Charger le détecteur
detector = load_detector(model_choice)
# Redimensionner pour la vitesse
max_size = 640
original_size = pil_image.size
if max(original_size) > max_size:
ratio = max_size / max(original_size)
new_size = (int(original_size[0] * ratio), int(original_size[1] * ratio))
resized_image = pil_image.resize(new_size)
else:
resized_image = pil_image
ratio = 1.0
print(f"🔍 Lancement détection avec seuil: {confidence_threshold}")
# Détection
detections = detector(resized_image)
print(f"🎯 Détections brutes: {len(detections)}")
# Filtrer par confiance
filtered_detections = [
det for det in detections
if det.get('score', 0) >= confidence_threshold
]
print(f"✅ Détections filtrées: {len(filtered_detections)}")
for det in filtered_detections:
print(f" - {det['label']}: {det['score']:.3f}")
# Ajuster les coordonnées
for det in filtered_detections:
if ratio != 1.0:
det['box']['xmin'] = int(det['box']['xmin'] / ratio)
det['box']['ymin'] = int(det['box']['ymin'] / ratio)
det['box']['xmax'] = int(det['box']['xmax'] / ratio)
det['box']['ymax'] = int(det['box']['ymax'] / ratio)
# Dessiner les détections
result_image = draw_detections(pil_image, filtered_detections)
print(f"🎨 Image annotée créée")
return result_image
except Exception as e:
print(f"❌ Erreur dans process_webcam: {e}")
import traceback
traceback.print_exc()
return image
def draw_detections(image, detections):
"""Dessine les détections avec des couleurs vives"""
img_copy = image.copy()
draw = ImageDraw.Draw(img_copy)
# Couleurs très visibles
colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF"]
# Police par défaut
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 24)
except:
font = ImageFont.load_default()
for i, detection in enumerate(detections):
box = detection['box']
label = detection['label']
score = detection['score']
x1, y1 = box['xmin'], box['ymin']
x2, y2 = box['xmax'], box['ymax']
color = colors[i % len(colors)]
# Boîte très visible
draw.rectangle([x1, y1, x2, y2], outline=color, width=5)
# Texte avec fond
text = f"{label} {score:.2f}"
bbox = draw.textbbox((x1, y1-35), text, font=font)
draw.rectangle([bbox[0]-5, bbox[1]-5, bbox[2]+5, bbox[3]+5], fill=color)
draw.text((x1, y1-35), text, fill="white", font=font)
return img_copy
# Interface avec activation webcam forcée
demo = gr.Interface(
fn=process_webcam,
inputs=[
gr.Image(
sources=["webcam"],
streaming=True,
type="pil",
show_download_button=False,
interactive=True
),
gr.Dropdown(
choices=list(REALTIME_MODELS.keys()),
value="YOLOS (tiny-sized) model",
label="Modèle"
),
gr.Slider(0.1, 1.0, 0.1, step=0.1, label="Confiance")
],
outputs=gr.Image(streaming=True, type="pil", show_download_button=False),
live=True,
title="🎥 Détection Live",
description="🚨 IMPORTANT: Activez la webcam en cliquant sur l'icône caméra dans la zone Image",
flagging_mode="never"
)
if __name__ == "__main__":
demo.launch()