Rivalcoder commited on
Commit
4168c5d
·
1 Parent(s): 4dd5fdd
Files changed (3) hide show
  1. app.py +120 -158
  2. models +0 -0
  3. requirements.txt +5 -5
app.py CHANGED
@@ -4,208 +4,170 @@ import numpy as np
4
  from PIL import Image
5
  import torchvision.transforms as transforms
6
  from ultralytics import YOLO
 
7
  import time
8
  import os
9
- import tempfile
10
- from flask import Flask, request, jsonify
11
  import gradio as gr
 
 
12
 
13
- # Initialize Flask app and Gradio interface
14
- app = Flask(__name__)
15
 
16
- # Global variable to store detection history
17
- detection_history = []
18
 
19
- # Emotion labels
20
- emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
 
21
 
22
- # Load models (cache in Hugging Face Space)
23
- def load_models():
24
- # Face detection model
25
- face_model = YOLO('yolov8n-face.pt')
26
-
27
- # Emotion model (simplified version of your CNN)
28
- class EmotionCNN(torch.nn.Module):
 
 
 
 
 
29
  def __init__(self, num_classes=7):
30
- super().__init__()
31
- self.features = torch.nn.Sequential(
32
- torch.nn.Conv2d(1, 64, 3, padding=1),
33
- torch.nn.ReLU(),
34
- torch.nn.MaxPool2d(2),
35
- torch.nn.Conv2d(64, 128, 3, padding=1),
36
- torch.nn.ReLU(),
37
- torch.nn.MaxPool2d(2),
38
- torch.nn.Conv2d(128, 256, 3, padding=1),
39
- torch.nn.ReLU(),
40
- torch.nn.MaxPool2d(2)
41
- )
42
- self.classifier = torch.nn.Sequential(
43
- torch.nn.Dropout(0.5),
44
- torch.nn.Linear(256*6*6, 1024),
45
- torch.nn.ReLU(),
46
- torch.nn.Dropout(0.5),
47
- torch.nn.Linear(1024, num_classes)
48
- )
49
-
50
  def forward(self, x):
51
- x = self.features(x)
52
- x = torch.flatten(x, 1)
53
- x = self.classifier(x)
54
  return x
55
-
56
- emotion_model = EmotionCNN()
57
- # Load your pretrained weights here
58
- # emotion_model.load_state_dict(torch.load('emotion_model.pth'))
 
59
  emotion_model.eval()
60
-
61
- return face_model, emotion_model
62
 
63
- face_model, emotion_model = load_models()
 
64
 
65
- # Preprocessing function
66
  def preprocess_face(face_img):
 
67
  transform = transforms.Compose([
68
  transforms.Resize((48, 48)),
69
- transforms.Grayscale(),
70
  transforms.ToTensor(),
71
  transforms.Normalize(mean=[0.5], std=[0.5])
72
  ])
73
- face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
74
- return transform(face_pil).unsqueeze(0)
75
-
76
- # Process video function
77
- def process_video(video_path):
78
- global detection_history
79
- detection_history = []
80
 
 
 
 
 
 
 
 
 
 
81
  cap = cv2.VideoCapture(video_path)
82
  if not cap.isOpened():
83
- return {"error": "Could not open video"}
84
-
85
- frame_count = 0
86
- fps = cap.get(cv2.CAP_PROP_FPS)
87
- frame_skip = int(fps / 3) # Process ~3 frames per second
88
-
89
  while True:
90
  ret, frame = cap.read()
91
  if not ret:
92
  break
93
-
94
- frame_count += 1
95
- if frame_count % frame_skip != 0:
96
- continue
97
-
98
- # Face detection
99
- results = face_model(frame)
100
-
101
  for result in results:
102
  boxes = result.boxes
103
- if len(boxes) == 0:
104
- continue
105
-
106
  for box in boxes:
107
- x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
108
  face_img = frame[y1:y2, x1:x2]
109
 
110
  if face_img.size == 0:
111
  continue
112
-
113
- # Emotion prediction
114
- face_tensor = preprocess_face(face_img)
115
  with torch.no_grad():
116
  output = emotion_model(face_tensor)
117
- prob = torch.nn.functional.softmax(output, dim=1)[0]
118
- pred_idx = torch.argmax(output).item()
119
- confidence = prob[pred_idx].item()
120
-
121
- detection_history.append({
122
- "frame": frame_count,
123
- "time": frame_count / fps,
124
- "emotion": emotions[pred_idx],
125
- "confidence": confidence,
126
- "box": [x1, y1, x2, y2]
127
- })
128
-
 
129
  cap.release()
130
 
131
- if not detection_history:
132
- return {"error": "No faces detected"}
133
-
134
  return {
135
- "detections": detection_history,
136
- "summary": {
137
- "total_frames": frame_count,
138
- "fps": fps,
139
- "duration": frame_count / fps
140
- }
141
  }
142
 
143
- # Flask API endpoint
144
- @app.route('/api/predict', methods=['POST'])
145
- def api_predict():
146
- if 'file' not in request.files:
147
- return jsonify({"error": "No file provided"}), 400
148
-
149
- file = request.files['file']
150
- if file.filename == '':
151
- return jsonify({"error": "No selected file"}), 400
152
-
153
- # Save to temp file
154
- temp_path = os.path.join(tempfile.gettempdir(), file.filename)
155
- file.save(temp_path)
156
-
157
- # Process video
158
- result = process_video(temp_path)
159
-
160
- # Clean up
161
- os.remove(temp_path)
162
-
163
- return jsonify(result)
164
 
165
- # Gradio interface
166
- def gradio_predict(video):
167
- temp_path = os.path.join(tempfile.gettempdir(), video.name)
168
- with open(temp_path, 'wb') as f:
169
- f.write(video.read())
170
-
171
- result = process_video(temp_path)
172
- os.remove(temp_path)
173
-
174
- if "error" in result:
175
- return result["error"]
176
-
177
- # Create visualization
178
- cap = cv2.VideoCapture(video.name)
179
- ret, frame = cap.read()
180
- cap.release()
 
 
 
181
 
182
- if ret:
183
- # Draw last detection on frame
184
- last_det = result["detections"][-1]
185
- x1, y1, x2, y2 = last_det["box"]
186
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
187
- cv2.putText(frame, f"{last_det['emotion']} ({last_det['confidence']:.2f})",
188
- (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
189
 
190
- # Convert to RGB for Gradio
191
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
192
- return frame, result
193
- return result
194
 
195
- # Create Gradio interface
196
- demo = gr.Interface(
197
- fn=gradio_predict,
198
- inputs=gr.Video(label="Upload Video"),
199
- outputs=[
200
- gr.Image(label="Detection Preview"),
201
- gr.JSON(label="Results")
202
- ],
203
- title="Video Emotion Detection",
204
- description="Upload a video to detect emotions in faces"
205
- )
206
-
207
- # Mount Gradio app
208
  app = gr.mount_gradio_app(app, demo, path="/")
209
 
210
  if __name__ == "__main__":
211
- app.run(host="0.0.0.0", port=7860)
 
4
  from PIL import Image
5
  import torchvision.transforms as transforms
6
  from ultralytics import YOLO
7
+ import tempfile
8
  import time
9
  import os
10
+ import json
 
11
  import gradio as gr
12
+ from fastapi import FastAPI, UploadFile, File, HTTPException
13
+ import uvicorn
14
 
15
+ # Initialize FastAPI
16
+ app = FastAPI()
17
 
18
+ # Global variable for face detections
19
+ largest_face_detections = []
20
 
21
+ # Load models
22
+ yolo_model_path = "yolov8n-face.pt"
23
+ emotion_model_path = "best_emotion_model.pth"
24
 
25
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
+
27
+ # Check if models exist
28
+ if os.path.exists(yolo_model_path):
29
+ yolo_model = YOLO(yolo_model_path)
30
+ else:
31
+ raise FileNotFoundError(f"YOLO model not found at {yolo_model_path}")
32
+
33
+ if os.path.exists(emotion_model_path):
34
+ from torch import nn
35
+
36
+ class EmotionCNN(nn.Module):
37
  def __init__(self, num_classes=7):
38
+ super(EmotionCNN, self).__init__()
39
+ self.conv1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3, padding=1),
40
+ nn.BatchNorm2d(64),
41
+ nn.ReLU(),
42
+ nn.MaxPool2d(kernel_size=2, stride=2))
43
+
44
+ self.fc = nn.Sequential(nn.Linear(64 * 24 * 24, 1024),
45
+ nn.ReLU(),
46
+ nn.Linear(1024, num_classes))
47
+
 
 
 
 
 
 
 
 
 
 
48
  def forward(self, x):
49
+ x = self.conv1(x)
50
+ x = x.view(x.size(0), -1)
51
+ x = self.fc(x)
52
  return x
53
+
54
+ emotion_model = EmotionCNN(num_classes=7)
55
+ checkpoint = torch.load(emotion_model_path, map_location=device)
56
+ emotion_model.load_state_dict(checkpoint['model_state_dict'])
57
+ emotion_model.to(device)
58
  emotion_model.eval()
59
+ else:
60
+ raise FileNotFoundError(f"Emotion model not found at {emotion_model_path}")
61
 
62
+ # Emotion labels
63
+ emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
64
 
 
65
  def preprocess_face(face_img):
66
+ """Preprocess face image for emotion detection"""
67
  transform = transforms.Compose([
68
  transforms.Resize((48, 48)),
 
69
  transforms.ToTensor(),
70
  transforms.Normalize(mean=[0.5], std=[0.5])
71
  ])
 
 
 
 
 
 
 
72
 
73
+ face_img = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)).convert('L')
74
+ face_tensor = transform(face_img).unsqueeze(0)
75
+ return face_tensor
76
+
77
+ def process_video(video_path: str):
78
+ """Process video and return emotion results"""
79
+ global largest_face_detections
80
+ largest_face_detections = []
81
+
82
  cap = cv2.VideoCapture(video_path)
83
  if not cap.isOpened():
84
+ return {"success": False, "message": "Could not open video file"}
85
+
 
 
 
 
86
  while True:
87
  ret, frame = cap.read()
88
  if not ret:
89
  break
90
+
91
+ largest_face_area = 0
92
+ current_detection = None
93
+
94
+ results = yolo_model(frame, stream=True)
 
 
 
95
  for result in results:
96
  boxes = result.boxes
 
 
 
97
  for box in boxes:
98
+ x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
99
  face_img = frame[y1:y2, x1:x2]
100
 
101
  if face_img.size == 0:
102
  continue
103
+
104
+ face_tensor = preprocess_face(face_img).to(device)
105
+
106
  with torch.no_grad():
107
  output = emotion_model(face_tensor)
108
+ probabilities = torch.nn.functional.softmax(output, dim=1)
109
+ emotion_idx = torch.argmax(output, dim=1).item()
110
+ confidence = probabilities[0][emotion_idx].item()
111
+
112
+ emotion = emotions[emotion_idx]
113
+
114
+ if (x2 - x1) * (y2 - y1) > largest_face_area:
115
+ largest_face_area = (x2 - x1) * (y2 - y1)
116
+ current_detection = {"emotion": emotion, "confidence": confidence}
117
+
118
+ if current_detection:
119
+ largest_face_detections.append(current_detection)
120
+
121
  cap.release()
122
 
123
+ if not largest_face_detections:
124
+ return {"success": True, "message": "No faces detected", "results": []}
125
+
126
  return {
127
+ "success": True,
128
+ "message": "Video processed",
129
+ "results": largest_face_detections
 
 
 
130
  }
131
 
132
+ @app.post("/api/video")
133
+ async def handle_video(file: UploadFile = File(...)):
134
+ """API endpoint for video emotion detection"""
135
+ try:
136
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
137
+ tmp.write(await file.read())
138
+ video_path = tmp.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ result = process_video(video_path)
141
+ os.remove(video_path)
142
+ return result
143
+
144
+ except Exception as e:
145
+ return {"success": False, "message": "Error processing video", "error": str(e)}
146
+
147
+ # Gradio UI
148
+ def gradio_process(video):
149
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
150
+ tmp.write(video)
151
+ video_path = tmp.name
152
+
153
+ result = process_video(video_path)
154
+ os.remove(video_path)
155
+ return result
156
+
157
+ with gr.Blocks() as demo:
158
+ gr.Markdown("# Video Emotion Analysis")
159
 
160
+ with gr.Row():
161
+ with gr.Column():
162
+ video_input = gr.File(label="Upload a video", file_types=[".mp4"])
163
+ submit_btn = gr.Button("Analyze")
 
 
 
164
 
165
+ with gr.Column():
166
+ output = gr.JSON(label="Results")
167
+
168
+ submit_btn.click(fn=gradio_process, inputs=video_input, outputs=output)
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  app = gr.mount_gradio_app(app, demo, path="/")
171
 
172
  if __name__ == "__main__":
173
+ uvicorn.run(app, host="0.0.0.0", port=7860)
models DELETED
File without changes
requirements.txt CHANGED
@@ -1,8 +1,8 @@
 
1
  torch
2
  torchvision
3
- opencv-python
4
- ultralytics
5
  gradio
6
- flask
7
- numpy
8
- Pillow
 
 
1
+ ultralytics
2
  torch
3
  torchvision
 
 
4
  gradio
5
+ fastapi
6
+ uvicorn
7
+ opencv-python
8
+ pillow