Rivalcoder commited on
Commit
d044a6c
·
1 Parent(s): 4168c5d

Add New Version

Browse files
app.py CHANGED
@@ -1,173 +1,189 @@
 
1
  import cv2
2
  import torch
3
  import numpy as np
4
  from PIL import Image
5
  import torchvision.transforms as transforms
6
- from ultralytics import YOLO
7
- import tempfile
8
  import time
9
- import os
10
  import json
 
 
 
11
  import gradio as gr
12
- from fastapi import FastAPI, UploadFile, File, HTTPException
13
- import uvicorn
14
 
15
- # Initialize FastAPI
16
  app = FastAPI()
17
 
18
- # Global variable for face detections
19
  largest_face_detections = []
20
 
21
- # Load models
22
- yolo_model_path = "yolov8n-face.pt"
23
- emotion_model_path = "best_emotion_model.pth"
24
-
25
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
-
27
- # Check if models exist
28
- if os.path.exists(yolo_model_path):
29
- yolo_model = YOLO(yolo_model_path)
30
- else:
31
- raise FileNotFoundError(f"YOLO model not found at {yolo_model_path}")
32
-
33
- if os.path.exists(emotion_model_path):
34
- from torch import nn
35
-
36
- class EmotionCNN(nn.Module):
37
- def __init__(self, num_classes=7):
38
- super(EmotionCNN, self).__init__()
39
- self.conv1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3, padding=1),
40
- nn.BatchNorm2d(64),
41
- nn.ReLU(),
42
- nn.MaxPool2d(kernel_size=2, stride=2))
43
-
44
- self.fc = nn.Sequential(nn.Linear(64 * 24 * 24, 1024),
45
- nn.ReLU(),
46
- nn.Linear(1024, num_classes))
47
-
48
- def forward(self, x):
49
- x = self.conv1(x)
50
- x = x.view(x.size(0), -1)
51
- x = self.fc(x)
52
- return x
53
-
54
- emotion_model = EmotionCNN(num_classes=7)
55
- checkpoint = torch.load(emotion_model_path, map_location=device)
56
- emotion_model.load_state_dict(checkpoint['model_state_dict'])
57
- emotion_model.to(device)
58
- emotion_model.eval()
59
- else:
60
- raise FileNotFoundError(f"Emotion model not found at {emotion_model_path}")
61
-
62
- # Emotion labels
63
- emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
64
-
65
- def preprocess_face(face_img):
66
- """Preprocess face image for emotion detection"""
67
- transform = transforms.Compose([
68
- transforms.Resize((48, 48)),
69
- transforms.ToTensor(),
70
- transforms.Normalize(mean=[0.5], std=[0.5])
71
- ])
72
-
73
- face_img = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)).convert('L')
74
- face_tensor = transform(face_img).unsqueeze(0)
75
- return face_tensor
76
-
77
- def process_video(video_path: str):
78
- """Process video and return emotion results"""
79
  global largest_face_detections
80
- largest_face_detections = []
81
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  cap = cv2.VideoCapture(video_path)
83
  if not cap.isOpened():
84
- return {"success": False, "message": "Could not open video file"}
85
-
 
 
 
86
  while True:
87
  ret, frame = cap.read()
88
  if not ret:
89
  break
90
-
 
 
91
  largest_face_area = 0
92
  current_detection = None
93
-
94
- results = yolo_model(frame, stream=True)
95
- for result in results:
96
- boxes = result.boxes
97
- for box in boxes:
98
- x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
99
- face_img = frame[y1:y2, x1:x2]
100
-
101
- if face_img.size == 0:
102
- continue
103
-
104
- face_tensor = preprocess_face(face_img).to(device)
105
-
106
- with torch.no_grad():
107
- output = emotion_model(face_tensor)
108
- probabilities = torch.nn.functional.softmax(output, dim=1)
109
- emotion_idx = torch.argmax(output, dim=1).item()
110
- confidence = probabilities[0][emotion_idx].item()
111
-
112
- emotion = emotions[emotion_idx]
113
-
114
- if (x2 - x1) * (y2 - y1) > largest_face_area:
115
- largest_face_area = (x2 - x1) * (y2 - y1)
116
- current_detection = {"emotion": emotion, "confidence": confidence}
117
-
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  if current_detection:
119
  largest_face_detections.append(current_detection)
120
-
121
  cap.release()
122
 
123
  if not largest_face_detections:
124
- return {"success": True, "message": "No faces detected", "results": []}
125
-
 
 
 
 
 
 
 
 
 
 
 
 
126
  return {
127
  "success": True,
128
- "message": "Video processed",
129
- "results": largest_face_detections
 
 
 
 
 
 
 
 
 
130
  }
131
 
132
- @app.post("/api/video")
133
- async def handle_video(file: UploadFile = File(...)):
134
- """API endpoint for video emotion detection"""
135
- try:
136
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
137
- tmp.write(await file.read())
138
- video_path = tmp.name
139
-
140
- result = process_video(video_path)
141
- os.remove(video_path)
142
- return result
143
 
 
 
 
 
 
 
144
  except Exception as e:
145
- return {"success": False, "message": "Error processing video", "error": str(e)}
146
-
147
- # Gradio UI
148
- def gradio_process(video):
149
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
150
- tmp.write(video)
151
- video_path = tmp.name
152
 
153
- result = process_video(video_path)
154
- os.remove(video_path)
155
- return result
 
 
156
 
157
- with gr.Blocks() as demo:
158
- gr.Markdown("# Video Emotion Analysis")
159
-
160
- with gr.Row():
161
- with gr.Column():
162
- video_input = gr.File(label="Upload a video", file_types=[".mp4"])
163
- submit_btn = gr.Button("Analyze")
164
-
165
- with gr.Column():
166
- output = gr.JSON(label="Results")
167
-
168
- submit_btn.click(fn=gradio_process, inputs=video_input, outputs=output)
169
 
170
- app = gr.mount_gradio_app(app, demo, path="/")
171
 
172
- if __name__ == "__main__":
173
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os
2
  import cv2
3
  import torch
4
  import numpy as np
5
  from PIL import Image
6
  import torchvision.transforms as transforms
 
 
7
  import time
 
8
  import json
9
+ from typing import Dict, Any
10
+ from fastapi import FastAPI, HTTPException, File, UploadFile
11
+ from pydantic import BaseModel
12
  import gradio as gr
13
+ import shutil
14
+ import tempfile
15
 
 
16
  app = FastAPI()
17
 
18
+ # Global variable to store the history of largest face detections
19
  largest_face_detections = []
20
 
21
+ # EmotionCNN model definition (same as in your original code)
22
+ class EmotionCNN(torch.nn.Module):
23
+ def __init__(self, num_classes=7):
24
+ super(EmotionCNN, self).__init__()
25
+
26
+ # Your convolutional layers and other definitions
27
+ # ...
28
+
29
+ def forward(self, x):
30
+ # Forward method as in your code
31
+ pass
32
+
33
+ # Load emotion model
34
+ def load_emotion_model(model_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
35
+ checkpoint = torch.load(model_path, map_location=device)
36
+ model = EmotionCNN(num_classes=7)
37
+ model.load_state_dict(checkpoint['model_state_dict'])
38
+ model.to(device)
39
+ model.eval()
40
+ return model
41
+
42
+ # Process the uploaded video (either MP4 or WebM)
43
+ def process_video(video_file: UploadFile) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  global largest_face_detections
45
+ largest_face_detections = [] # Reset detections for new video
46
+
47
+ # Path to models and other setup
48
+ face_cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
49
+ emotion_model_path = "best_emotion_model.pth"
50
+
51
+ if not os.path.exists(face_cascade_path):
52
+ raise HTTPException(status_code=400, detail="Face cascade classifier not found")
53
+
54
+ if not os.path.exists(emotion_model_path):
55
+ raise HTTPException(status_code=400, detail="Emotion model not found")
56
+
57
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
58
+
59
+ try:
60
+ face_cascade = cv2.CascadeClassifier(face_cascade_path)
61
+ emotion_model = load_emotion_model(emotion_model_path, device)
62
+ except Exception as e:
63
+ raise HTTPException(status_code=500, detail=f"Error loading models: {str(e)}")
64
+
65
+ emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
66
+
67
+ # Save the uploaded video file to a temporary directory
68
+ temp_dir = tempfile.mkdtemp()
69
+ video_path = os.path.join(temp_dir, "uploaded_video")
70
+ with open(video_path, "wb") as buffer:
71
+ shutil.copyfileobj(video_file.file, buffer)
72
+
73
  cap = cv2.VideoCapture(video_path)
74
  if not cap.isOpened():
75
+ raise HTTPException(status_code=400, detail=f"Could not open video file at {video_path}")
76
+
77
+ frame_count = 0
78
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
79
+
80
  while True:
81
  ret, frame = cap.read()
82
  if not ret:
83
  break
84
+
85
+ frame_count += 1
86
+
87
  largest_face_area = 0
88
  current_detection = None
89
+
90
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
91
+
92
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
93
+
94
+ for (x, y, w, h) in faces:
95
+ face_area = w * h
96
+ margin = 20
97
+ x1 = max(0, x - margin)
98
+ y1 = max(0, y - margin)
99
+ x2 = min(frame.shape[1], x + w + margin)
100
+ y2 = min(frame.shape[0], y + h + margin)
101
+
102
+ face_img = frame[y1:y2, x1:x2]
103
+
104
+ if face_img.size == 0 or face_img.shape[0] < 20 or face_img.shape[1] < 20:
105
+ continue
106
+
107
+ face_tensor = preprocess_face(face_img)
108
+
109
+ with torch.no_grad():
110
+ face_tensor = face_tensor.to(device)
111
+ output = emotion_model(face_tensor)
112
+ probabilities = torch.nn.functional.softmax(output, dim=1)
113
+ emotion_idx = torch.argmax(output, dim=1).item()
114
+ confidence = probabilities[0][emotion_idx].item()
115
+
116
+ emotion = emotions[emotion_idx]
117
+
118
+ if face_area > largest_face_area:
119
+ largest_face_area = face_area
120
+ current_detection = {
121
+ 'emotion': emotion,
122
+ 'confidence': confidence,
123
+ 'timestamp': time.time(),
124
+ 'frame_number': frame_count
125
+ }
126
+
127
  if current_detection:
128
  largest_face_detections.append(current_detection)
129
+
130
  cap.release()
131
 
132
  if not largest_face_detections:
133
+ return {
134
+ "success": True,
135
+ "message": "No faces detected in video",
136
+ "results": [],
137
+ "error": None
138
+ }
139
+
140
+ emotions_count = {}
141
+ for detection in largest_face_detections:
142
+ emotion = detection['emotion']
143
+ emotions_count[emotion] = emotions_count.get(emotion, 0) + 1
144
+
145
+ dominant_emotion = max(emotions_count.items(), key=lambda x: x[1])[0]
146
+
147
  return {
148
  "success": True,
149
+ "message": "Video processed successfully",
150
+ "results": {
151
+ "detections": largest_face_detections,
152
+ "summary": {
153
+ "total_frames": total_frames,
154
+ "total_detections": len(largest_face_detections),
155
+ "emotions_count": emotions_count,
156
+ "dominant_emotion": dominant_emotion
157
+ }
158
+ },
159
+ "error": None
160
  }
161
 
162
+ class VideoRequest(BaseModel):
163
+ path: str
 
 
 
 
 
 
 
 
 
164
 
165
+ # FastAPI endpoint for processing the video file
166
+ @app.post("/process_video/")
167
+ async def process_video_request(file: UploadFile = File(...)):
168
+ try:
169
+ results = process_video(file)
170
+ return results
171
  except Exception as e:
172
+ raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
173
 
174
+ # Gradio interface
175
+ def gradio_interface():
176
+ def process_gradio_video(video_file):
177
+ # This function now accepts WebM files and other video formats.
178
+ return process_video(video_file)
179
 
180
+ interface = gr.Interface(
181
+ fn=process_gradio_video,
182
+ inputs=gr.inputs.Video(type="file"), # 'file' ensures that Gradio handles all formats including WebM
183
+ outputs="json"
184
+ )
 
 
 
 
 
 
 
185
 
186
+ return interface
187
 
188
+ # Launch Gradio Interface on FastAPI
189
+ gradio_interface().launch(server_name="0.0.0.0", server_port=7860, share=True)
best_emotion_model.pth → models/best_emotion_model.pth RENAMED
File without changes
requirements.txt CHANGED
@@ -1,8 +1,7 @@
1
- ultralytics
2
- torch
3
- torchvision
4
- gradio
5
  fastapi
6
- uvicorn
 
7
  opencv-python
8
  pillow
 
 
 
 
 
 
 
1
  fastapi
2
+ gradio
3
+ torch
4
  opencv-python
5
  pillow
6
+ torchvision
7
+ uvicorn
yolov8n-face.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17b38523a994b13ee604b67f02791ca0f43b9f446a32fd7bc44e17c56ead077
3
- size 6250099