Spaces:
Sleeping
Sleeping
File size: 6,252 Bytes
9a2edf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import cv2
import torch
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
from ultralytics import YOLO
import time
import os
import tempfile
from flask import Flask, request, jsonify
import gradio as gr
# Initialize Flask app and Gradio interface
app = Flask(__name__)
# Global variable to store detection history
detection_history = []
# Emotion labels
emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
# Load models (cache in Hugging Face Space)
def load_models():
# Face detection model
face_model = YOLO('yolov8n-face.pt')
# Emotion model (simplified version of your CNN)
class EmotionCNN(torch.nn.Module):
def __init__(self, num_classes=7):
super().__init__()
self.features = torch.nn.Sequential(
torch.nn.Conv2d(1, 64, 3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2),
torch.nn.Conv2d(64, 128, 3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2),
torch.nn.Conv2d(128, 256, 3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(2)
)
self.classifier = torch.nn.Sequential(
torch.nn.Dropout(0.5),
torch.nn.Linear(256*6*6, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(1024, num_classes)
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
emotion_model = EmotionCNN()
# Load your pretrained weights here
# emotion_model.load_state_dict(torch.load('emotion_model.pth'))
emotion_model.eval()
return face_model, emotion_model
face_model, emotion_model = load_models()
# Preprocessing function
def preprocess_face(face_img):
transform = transforms.Compose([
transforms.Resize((48, 48)),
transforms.Grayscale(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
return transform(face_pil).unsqueeze(0)
# Process video function
def process_video(video_path):
global detection_history
detection_history = []
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return {"error": "Could not open video"}
frame_count = 0
fps = cap.get(cv2.CAP_PROP_FPS)
frame_skip = int(fps / 3) # Process ~3 frames per second
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if frame_count % frame_skip != 0:
continue
# Face detection
results = face_model(frame)
for result in results:
boxes = result.boxes
if len(boxes) == 0:
continue
for box in boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
face_img = frame[y1:y2, x1:x2]
if face_img.size == 0:
continue
# Emotion prediction
face_tensor = preprocess_face(face_img)
with torch.no_grad():
output = emotion_model(face_tensor)
prob = torch.nn.functional.softmax(output, dim=1)[0]
pred_idx = torch.argmax(output).item()
confidence = prob[pred_idx].item()
detection_history.append({
"frame": frame_count,
"time": frame_count / fps,
"emotion": emotions[pred_idx],
"confidence": confidence,
"box": [x1, y1, x2, y2]
})
cap.release()
if not detection_history:
return {"error": "No faces detected"}
return {
"detections": detection_history,
"summary": {
"total_frames": frame_count,
"fps": fps,
"duration": frame_count / fps
}
}
# Flask API endpoint
@app.route('/api/predict', methods=['POST'])
def api_predict():
if 'file' not in request.files:
return jsonify({"error": "No file provided"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
# Save to temp file
temp_path = os.path.join(tempfile.gettempdir(), file.filename)
file.save(temp_path)
# Process video
result = process_video(temp_path)
# Clean up
os.remove(temp_path)
return jsonify(result)
# Gradio interface
def gradio_predict(video):
temp_path = os.path.join(tempfile.gettempdir(), video.name)
with open(temp_path, 'wb') as f:
f.write(video.read())
result = process_video(temp_path)
os.remove(temp_path)
if "error" in result:
return result["error"]
# Create visualization
cap = cv2.VideoCapture(video.name)
ret, frame = cap.read()
cap.release()
if ret:
# Draw last detection on frame
last_det = result["detections"][-1]
x1, y1, x2, y2 = last_det["box"]
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f"{last_det['emotion']} ({last_det['confidence']:.2f})",
(x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# Convert to RGB for Gradio
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
return frame, result
return result
# Create Gradio interface
demo = gr.Interface(
fn=gradio_predict,
inputs=gr.Video(label="Upload Video"),
outputs=[
gr.Image(label="Detection Preview"),
gr.JSON(label="Results")
],
title="Video Emotion Detection",
description="Upload a video to detect emotions in faces"
)
# Mount Gradio app
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) |