Fix import issue
Browse files- README.md +23 -34
- __pycache__/video_accent_analyzer.cpython-310.pyc +0 -0
- api.py +101 -0
- app.py +13 -11
- templates/index.html +155 -0
- video_accent_analyzer.py +125 -164
README.md
CHANGED
@@ -1,46 +1,35 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: 'a tools to automate real hiring decisions. '
|
12 |
---
|
13 |
|
14 |
-
|
15 |
-
# π§ Video Accent Analyzer
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
|
19 |
## Features
|
20 |
-
-
|
21 |
-
-
|
22 |
-
-
|
23 |
-
-
|
24 |
-
- Multiple English accent detection
|
25 |
-
|
26 |
-
## Requirements
|
27 |
-
- Python 3.8+
|
28 |
-
- FFmpeg
|
29 |
-
- PyTorch
|
30 |
-
- Transformers
|
31 |
|
32 |
## Usage
|
33 |
-
1. Enter a video URL or upload a file
|
34 |
-
2.
|
35 |
-
|
36 |
-
|
37 |
-
Enhanced Video Accent Analyzer
|
38 |
-
Supports YouTube, Loom, direct MP4 links, and local video files with improved error handling and features.
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
results = analyzer.analyze_local_video("/local/input/video.mp4", max_duration=30)
|
45 |
-
analyzer.display_results(results)
|
46 |
-
"""
|
|
|
1 |
---
|
2 |
+
title: Video Accent Analyzer
|
3 |
+
emoji: π§
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.0.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
|
12 |
+
# Video Accent Analyzer
|
|
|
13 |
|
14 |
+
This application analyzes English accents in videos using machine learning. It supports:
|
15 |
+
- YouTube videos
|
16 |
+
- Loom recordings
|
17 |
+
- Direct video links
|
18 |
+
- Uploaded video files
|
19 |
|
20 |
## Features
|
21 |
+
- Accent detection with confidence scores
|
22 |
+
- English proficiency assessment
|
23 |
+
- Interactive visualizations
|
24 |
+
- Support for multiple video sources
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
## Usage
|
27 |
+
1. Enter a video URL or upload a video file
|
28 |
+
2. Set the maximum duration to analyze
|
29 |
+
3. Click "Analyze Video"
|
30 |
+
4. View the detailed results and visualization
|
|
|
|
|
31 |
|
32 |
+
## Tips
|
33 |
+
- Keep videos under 2 minutes for best results
|
34 |
+
- Ensure clear audio quality
|
35 |
+
- Multiple speakers may affect accuracy
|
|
|
|
|
|
__pycache__/video_accent_analyzer.cpython-310.pyc
ADDED
Binary file (24.1 kB). View file
|
|
api.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify, render_template
|
2 |
+
from video_accent_analyzer import VideoAccentAnalyzer
|
3 |
+
import plotly
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
|
7 |
+
app = Flask(__name__)
|
8 |
+
analyzer = VideoAccentAnalyzer()
|
9 |
+
|
10 |
+
@app.route('/')
|
11 |
+
def home():
|
12 |
+
return render_template('index.html')
|
13 |
+
|
14 |
+
@app.route('/api/analyze', methods=['POST'])
|
15 |
+
def analyze():
|
16 |
+
try:
|
17 |
+
data = request.json
|
18 |
+
url = data.get('url')
|
19 |
+
duration = int(data.get('duration', 30))
|
20 |
+
|
21 |
+
if not url:
|
22 |
+
return jsonify({'error': 'No URL provided'}), 400
|
23 |
+
|
24 |
+
# Initialize analyzer with display=False to avoid IPython dependency
|
25 |
+
result = analyzer.analyze_video_url(url, max_duration=duration)
|
26 |
+
|
27 |
+
if 'error' in result:
|
28 |
+
return jsonify({'error': result['error']}), 400
|
29 |
+
|
30 |
+
# Create Plotly figure
|
31 |
+
probabilities = result['all_probabilities']
|
32 |
+
accents = [analyzer.accent_display_names.get(acc, acc.title())
|
33 |
+
for acc in probabilities.keys()]
|
34 |
+
probs = list(probabilities.values())
|
35 |
+
|
36 |
+
# Format detailed results
|
37 |
+
accent = result['predicted_accent']
|
38 |
+
confidence = result['accent_confidence']
|
39 |
+
english_conf = result['english_confidence']
|
40 |
+
|
41 |
+
details = {
|
42 |
+
'primary_classification': {
|
43 |
+
'accent': analyzer.accent_display_names.get(accent, accent.title()),
|
44 |
+
'confidence': f"{confidence:.1f}%",
|
45 |
+
'english_confidence': f"{english_conf:.1f}%"
|
46 |
+
},
|
47 |
+
'audio_analysis': {
|
48 |
+
'duration': f"{result['audio_duration']:.1f}s",
|
49 |
+
'quality_score': result.get('audio_quality_score', 'N/A'),
|
50 |
+
'chunks_analyzed': result.get('chunks_analyzed', 1)
|
51 |
+
},
|
52 |
+
'assessment': {
|
53 |
+
'english_level': 'Strong' if english_conf >= 70 else 'Moderate' if english_conf >= 50 else 'Low',
|
54 |
+
'confidence_level': 'High' if confidence >= 70 else 'Moderate' if confidence >= 50 else 'Low'
|
55 |
+
}
|
56 |
+
}
|
57 |
+
|
58 |
+
# Add visualization data
|
59 |
+
plot_data = {
|
60 |
+
'data': [{
|
61 |
+
'type': 'bar',
|
62 |
+
'x': accents,
|
63 |
+
'y': probs,
|
64 |
+
'text': [f'{p:.1f}%' for p in probs],
|
65 |
+
'textposition': 'auto',
|
66 |
+
'marker': {
|
67 |
+
'color': ['#4CAF50' if p == max(probs) else '#2196F3'
|
68 |
+
if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
|
69 |
+
for p in probs]
|
70 |
+
}
|
71 |
+
}],
|
72 |
+
'layout': {
|
73 |
+
'title': 'Accent Probability Distribution',
|
74 |
+
'xaxis': {'title': 'Accent Type'},
|
75 |
+
'yaxis': {'title': 'Probability (%)', 'range': [0, 100]},
|
76 |
+
'template': 'plotly_white'
|
77 |
+
}
|
78 |
+
}
|
79 |
+
|
80 |
+
# Combine all results
|
81 |
+
response = {
|
82 |
+
'details': details,
|
83 |
+
'plot': plot_data,
|
84 |
+
'raw_results': result
|
85 |
+
}
|
86 |
+
|
87 |
+
return jsonify(response)
|
88 |
+
|
89 |
+
except Exception as e:
|
90 |
+
return jsonify({'error': str(e)}), 500
|
91 |
+
|
92 |
+
@app.route('/api/cleanup', methods=['POST'])
|
93 |
+
def cleanup():
|
94 |
+
try:
|
95 |
+
analyzer.cleanup()
|
96 |
+
return jsonify({'message': 'Cleanup successful'})
|
97 |
+
except Exception as e:
|
98 |
+
return jsonify({'error': str(e)}), 500
|
99 |
+
|
100 |
+
if __name__ == '__main__':
|
101 |
+
app.run(debug=True)
|
app.py
CHANGED
@@ -155,17 +155,19 @@ with gr.Blocks(css=css) as interface:
|
|
155 |
|
156 |
# Add requirements.txt
|
157 |
requirements = """
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
169 |
|
170 |
with open("requirements.txt", "w") as f:
|
171 |
f.write(requirements)
|
|
|
155 |
|
156 |
# Add requirements.txt
|
157 |
requirements = """
|
158 |
+
gradio>=4.0.0
|
159 |
+
plotly>=5.0.0
|
160 |
+
yt-dlp
|
161 |
+
librosa
|
162 |
+
soundfile
|
163 |
+
transformers
|
164 |
+
torch
|
165 |
+
ffmpeg-python
|
166 |
+
matplotlib
|
167 |
+
seaborn
|
168 |
+
pandas
|
169 |
+
ipython
|
170 |
+
"""
|
171 |
|
172 |
with open("requirements.txt", "w") as f:
|
173 |
f.write(requirements)
|
templates/index.html
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- templates/index.html -->
|
2 |
+
<!DOCTYPE html>
|
3 |
+
<html>
|
4 |
+
<head>
|
5 |
+
<title>Video Accent Analyzer</title>
|
6 |
+
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
|
7 |
+
<style>
|
8 |
+
body {
|
9 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
10 |
+
max-width: 1000px;
|
11 |
+
margin: 0 auto;
|
12 |
+
padding: 20px;
|
13 |
+
background-color: #f5f5f5;
|
14 |
+
}
|
15 |
+
.container {
|
16 |
+
background: white;
|
17 |
+
padding: 20px;
|
18 |
+
border-radius: 10px;
|
19 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
20 |
+
}
|
21 |
+
.input-group {
|
22 |
+
margin-bottom: 20px;
|
23 |
+
}
|
24 |
+
input, button {
|
25 |
+
padding: 10px;
|
26 |
+
margin: 5px 0;
|
27 |
+
border-radius: 5px;
|
28 |
+
border: 1px solid #ddd;
|
29 |
+
}
|
30 |
+
input[type="text"] {
|
31 |
+
width: 100%;
|
32 |
+
box-sizing: border-box;
|
33 |
+
}
|
34 |
+
button {
|
35 |
+
background: linear-gradient(45deg, #4CAF50, #2196F3);
|
36 |
+
color: white;
|
37 |
+
border: none;
|
38 |
+
cursor: pointer;
|
39 |
+
transition: transform 0.2s;
|
40 |
+
}
|
41 |
+
button:hover {
|
42 |
+
transform: scale(1.02);
|
43 |
+
}
|
44 |
+
#results {
|
45 |
+
margin-top: 20px;
|
46 |
+
padding: 20px;
|
47 |
+
border-radius: 5px;
|
48 |
+
background-color: #f8f9fa;
|
49 |
+
}
|
50 |
+
.error {
|
51 |
+
color: red;
|
52 |
+
padding: 10px;
|
53 |
+
background-color: #fee;
|
54 |
+
border-radius: 5px;
|
55 |
+
}
|
56 |
+
#plot {
|
57 |
+
margin-top: 20px;
|
58 |
+
}
|
59 |
+
.metric {
|
60 |
+
display: inline-block;
|
61 |
+
padding: 10px;
|
62 |
+
margin: 5px;
|
63 |
+
background: #fff;
|
64 |
+
border-radius: 5px;
|
65 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
66 |
+
}
|
67 |
+
</style>
|
68 |
+
</head>
|
69 |
+
<body>
|
70 |
+
<div class="container">
|
71 |
+
<h1>π§ Video Accent Analyzer</h1>
|
72 |
+
|
73 |
+
<div class="input-group">
|
74 |
+
<input type="text" id="videoUrl" placeholder="Enter YouTube, Loom, or direct video URL">
|
75 |
+
<input type="number" id="duration" value="30" min="10" max="120" step="10">
|
76 |
+
<button onclick="analyzeVideo()">Analyze Video</button>
|
77 |
+
</div>
|
78 |
+
|
79 |
+
<div id="loading" style="display: none;">
|
80 |
+
Analyzing video... Please wait...
|
81 |
+
</div>
|
82 |
+
|
83 |
+
<div id="results" style="display: none;"></div>
|
84 |
+
<div id="plot"></div>
|
85 |
+
</div>
|
86 |
+
|
87 |
+
<script>
|
88 |
+
async function analyzeVideo() {
|
89 |
+
const url = document.getElementById('videoUrl').value;
|
90 |
+
const duration = document.getElementById('duration').value;
|
91 |
+
const loading = document.getElementById('loading');
|
92 |
+
const results = document.getElementById('results');
|
93 |
+
const plot = document.getElementById('plot');
|
94 |
+
|
95 |
+
if (!url) {
|
96 |
+
alert('Please enter a video URL');
|
97 |
+
return;
|
98 |
+
}
|
99 |
+
|
100 |
+
loading.style.display = 'block';
|
101 |
+
results.style.display = 'none';
|
102 |
+
plot.innerHTML = '';
|
103 |
+
|
104 |
+
try {
|
105 |
+
const response = await fetch('/api/analyze', {
|
106 |
+
method: 'POST',
|
107 |
+
headers: {
|
108 |
+
'Content-Type': 'application/json',
|
109 |
+
},
|
110 |
+
body: JSON.stringify({ url, duration }),
|
111 |
+
});
|
112 |
+
|
113 |
+
const data = await response.json();
|
114 |
+
|
115 |
+
if (data.error) {
|
116 |
+
results.innerHTML = `<div class="error">Error: ${data.error}</div>`;
|
117 |
+
results.style.display = 'block';
|
118 |
+
return;
|
119 |
+
}
|
120 |
+
|
121 |
+
// Display results
|
122 |
+
results.innerHTML = `
|
123 |
+
<h2>Analysis Results</h2>
|
124 |
+
<div class="metric">
|
125 |
+
<strong>Predicted Accent:</strong><br>
|
126 |
+
${data.predicted_accent}
|
127 |
+
</div>
|
128 |
+
<div class="metric">
|
129 |
+
<strong>Confidence:</strong><br>
|
130 |
+
${data.accent_confidence.toFixed(1)}%
|
131 |
+
</div>
|
132 |
+
<div class="metric">
|
133 |
+
<strong>English Confidence:</strong><br>
|
134 |
+
${data.english_confidence.toFixed(1)}%
|
135 |
+
</div>
|
136 |
+
<div class="metric">
|
137 |
+
<strong>Audio Duration:</strong><br>
|
138 |
+
${data.audio_duration.toFixed(1)}s
|
139 |
+
</div>
|
140 |
+
`;
|
141 |
+
|
142 |
+
// Create plot
|
143 |
+
Plotly.newPlot('plot', data.plot.data, data.plot.layout);
|
144 |
+
results.style.display = 'block';
|
145 |
+
|
146 |
+
} catch (error) {
|
147 |
+
results.innerHTML = `<div class="error">Error: ${error.message}</div>`;
|
148 |
+
results.style.display = 'block';
|
149 |
+
} finally {
|
150 |
+
loading.style.display = 'none';
|
151 |
+
}
|
152 |
+
}
|
153 |
+
</script>
|
154 |
+
</body>
|
155 |
+
</html>
|
video_accent_analyzer.py
CHANGED
@@ -22,6 +22,7 @@ import seaborn as sns
|
|
22 |
# Suppress warnings for cleaner output
|
23 |
warnings.filterwarnings('ignore')
|
24 |
|
|
|
25 |
def install_if_missing(packages):
|
26 |
"""Install packages if they're not already available in Kaggle"""
|
27 |
for package in packages:
|
@@ -34,6 +35,7 @@ def install_if_missing(packages):
|
|
34 |
print(f"Installing {package}...")
|
35 |
subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
|
36 |
|
|
|
37 |
# Required packages for Kaggle
|
38 |
required_packages = [
|
39 |
"yt-dlp",
|
@@ -55,6 +57,7 @@ import librosa
|
|
55 |
import soundfile as sf
|
56 |
import yt_dlp
|
57 |
|
|
|
58 |
class VideoAccentAnalyzer:
|
59 |
def __init__(self, model_name="dima806/multiple_accent_classification"):
|
60 |
"""Initialize the accent analyzer for Kaggle environment"""
|
@@ -103,20 +106,19 @@ class VideoAccentAnalyzer:
|
|
103 |
|
104 |
return True, url
|
105 |
|
106 |
-
|
107 |
def trim_video(self, input_path, output_path, duration):
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
|
121 |
def download_video(self, url, max_duration=None):
|
122 |
"""Download video using yt-dlp with improved error handling"""
|
@@ -145,7 +147,7 @@ class VideoAccentAnalyzer:
|
|
145 |
|
146 |
if max_duration:
|
147 |
# More generous time limit for download
|
148 |
-
|
149 |
|
150 |
try:
|
151 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
@@ -166,9 +168,10 @@ class VideoAccentAnalyzer:
|
|
166 |
# Find downloaded file (try multiple patterns)
|
167 |
video_path = None
|
168 |
for file in os.listdir(self.temp_dir):
|
169 |
-
if file.startswith("video.") and os.path.getsize(
|
|
|
170 |
potential_path = os.path.join(self.temp_dir, file)
|
171 |
-
print(f"π Found downloaded file: {file} ({os.path.getsize(potential_path)/1024:.1f}KB)")
|
172 |
|
173 |
# Try basic validation - if ffprobe fails, still try to extract audio
|
174 |
if self._is_valid_video(potential_path):
|
@@ -254,7 +257,7 @@ class VideoAccentAnalyzer:
|
|
254 |
f.write(chunk)
|
255 |
file_size += len(chunk)
|
256 |
|
257 |
-
print(f"π Downloaded {file_size / (1024*1024):.1f} MB")
|
258 |
|
259 |
if self._is_valid_video(video_path):
|
260 |
print("β
Direct download successful")
|
@@ -288,7 +291,7 @@ class VideoAccentAnalyzer:
|
|
288 |
extraction_time = time.time() - start_time
|
289 |
|
290 |
if result.returncode == 0 and os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
|
291 |
-
file_size = os.path.getsize(audio_path) / (1024*1024)
|
292 |
print(f"β
Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
|
293 |
return audio_path
|
294 |
else:
|
@@ -298,7 +301,7 @@ class VideoAccentAnalyzer:
|
|
298 |
# Try alternative extraction method
|
299 |
print("π Trying alternative audio extraction...")
|
300 |
cmd_alt = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'libmp3lame',
|
301 |
-
|
302 |
if max_duration:
|
303 |
cmd_alt.extend(['-t', str(max_duration)])
|
304 |
|
@@ -310,11 +313,11 @@ class VideoAccentAnalyzer:
|
|
310 |
if result_alt.returncode == 0 and os.path.exists(audio_path_alt):
|
311 |
# Convert mp3 to wav
|
312 |
cmd_convert = ['ffmpeg', '-i', audio_path_alt, '-ar', '16000', '-ac', '1',
|
313 |
-
|
314 |
result_convert = subprocess.run(cmd_convert, capture_output=True, text=True, timeout=60)
|
315 |
|
316 |
if result_convert.returncode == 0 and os.path.exists(audio_path):
|
317 |
-
file_size = os.path.getsize(audio_path) / (1024*1024)
|
318 |
print(f"β
Alternative extraction successful ({file_size:.1f}MB)")
|
319 |
return audio_path
|
320 |
|
@@ -463,7 +466,7 @@ class VideoAccentAnalyzer:
|
|
463 |
return {"error": f"File not found: {file_path}"}
|
464 |
|
465 |
# Check file size
|
466 |
-
file_size = os.path.getsize(file_path) / (1024*1024) # MB
|
467 |
print(f"π File size: {file_size:.1f} MB")
|
468 |
|
469 |
video_filename = os.path.basename(file_path)
|
@@ -488,166 +491,121 @@ class VideoAccentAnalyzer:
|
|
488 |
|
489 |
return results
|
490 |
|
491 |
-
def display_results(self, results):
|
492 |
-
"""Enhanced results display with visualizations"""
|
493 |
-
if 'error' in results:
|
494 |
-
display(HTML(f"<div style='color: red; font-size: 16px; padding: 10px; border: 1px solid red; border-radius: 5px;'>β {results['error']}</div>"))
|
495 |
-
return
|
496 |
-
|
497 |
-
accent = results['predicted_accent']
|
498 |
-
confidence = results['accent_confidence']
|
499 |
-
english_conf = results['english_confidence']
|
500 |
-
duration = results['audio_duration']
|
501 |
-
processed_duration = results.get('processed_duration', duration)
|
502 |
-
quality_score = results.get('audio_quality_score', 50)
|
503 |
-
|
504 |
-
accent_display = self.accent_display_names.get(accent, accent.title())
|
505 |
-
|
506 |
-
# Enhanced HTML display
|
507 |
-
html = f"""
|
508 |
-
<div style='border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; margin: 10px 0; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);'>
|
509 |
-
<h2 style='color: #2E7D32; margin-top: 0; text-align: center;'>π― Accent Analysis Results</h2>
|
510 |
-
|
511 |
-
<div style='display: flex; flex-wrap: wrap; gap: 20px; margin-bottom: 20px;'>
|
512 |
-
<div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
|
513 |
-
<h3 style='color: #1976D2; margin-top: 0;'>π Primary Classification</h3>
|
514 |
-
<p style='font-size: 20px; margin: 5px 0; font-weight: bold;'>{accent_display}</p>
|
515 |
-
<p style='margin: 5px 0;'>Confidence: <strong style='color: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"};'>{confidence:.1f}%</strong></p>
|
516 |
-
</div>
|
517 |
-
|
518 |
-
<div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
|
519 |
-
<h3 style='color: #1976D2; margin-top: 0;'>π English Proficiency</h3>
|
520 |
-
<p style='font-size: 18px; margin: 5px 0;'><strong style='color: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"};'>{english_conf:.1f}%</strong></p>
|
521 |
-
<p style='margin: 5px 0;'>Audio Quality: <strong>{quality_score:.0f}/100</strong></p>
|
522 |
-
</div>
|
523 |
-
|
524 |
-
<div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
|
525 |
-
<h3 style='color: #1976D2; margin-top: 0;'>β±οΈ Processing Info</h3>
|
526 |
-
<p style='margin: 5px 0;'>Duration: <strong>{duration:.1f}s</strong></p>
|
527 |
-
<p style='margin: 5px 0;'>Processed: <strong>{processed_duration:.1f}s</strong></p>
|
528 |
-
<p style='margin: 5px 0;'>Chunks: <strong>{results.get("chunks_analyzed", 1)}</strong></p>
|
529 |
-
</div>
|
530 |
-
</div>
|
531 |
-
|
532 |
-
<div style='background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
|
533 |
-
<h3 style='color: #1976D2; margin-top: 0;'>π Assessment</h3>
|
534 |
-
<div style='display: flex; flex-wrap: wrap; gap: 10px;'>
|
535 |
-
<span style='background: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
|
536 |
-
{'β
Strong English Speaker' if english_conf >= 70 else 'β οΈ Moderate English Confidence' if english_conf >= 50 else 'β Low English Confidence'}
|
537 |
-
</span>
|
538 |
-
<span style='background: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
|
539 |
-
{'π― High Confidence' if confidence >= 70 else 'π€ Moderate Confidence' if confidence >= 50 else 'β Low Confidence'}
|
540 |
-
</span>
|
541 |
-
<span style='background: {"#4CAF50" if quality_score >= 70 else "#FF9800" if quality_score >= 40 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
|
542 |
-
{'π€ Good Audio Quality' if quality_score >= 70 else 'π’ Fair Audio Quality' if quality_score >= 40 else 'π Poor Audio Quality'}
|
543 |
-
</span>
|
544 |
-
</div>
|
545 |
-
</div>
|
546 |
-
</div>
|
547 |
-
"""
|
548 |
-
display(HTML(html))
|
549 |
-
|
550 |
-
# Create probability breakdown visualization
|
551 |
-
self._plot_probabilities(results['all_probabilities'])
|
552 |
-
|
553 |
-
# Display detailed breakdown table
|
554 |
-
prob_df = pd.DataFrame([
|
555 |
-
{
|
556 |
-
'Accent': self.accent_display_names.get(accent, accent.title()),
|
557 |
-
'Probability': f"{prob:.1f}%",
|
558 |
-
'Confidence': 'π’ High' if prob >= 70 else 'π‘ Medium' if prob >= 30 else 'π΄ Low'
|
559 |
-
}
|
560 |
-
for accent, prob in sorted(results['all_probabilities'].items(), key=lambda x: x[1], reverse=True)
|
561 |
-
])
|
562 |
|
563 |
-
|
564 |
-
|
|
|
|
|
|
|
565 |
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
|
|
|
|
570 |
|
571 |
-
|
572 |
-
probs = list(probabilities.values())
|
573 |
|
574 |
-
|
575 |
-
|
576 |
-
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
-
bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
|
579 |
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
plt.grid(axis='y', alpha=0.3)
|
585 |
|
586 |
-
|
587 |
-
|
588 |
-
height = bar.get_height()
|
589 |
-
plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
|
590 |
-
f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
|
591 |
|
592 |
-
|
593 |
-
|
|
|
594 |
|
595 |
-
|
596 |
-
print(f"β οΈ Could not create visualization: {e}")
|
597 |
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
|
|
602 |
|
603 |
-
|
|
|
|
|
|
|
|
|
604 |
|
605 |
-
|
606 |
-
|
607 |
-
print(f"Processing video {i}/{len(urls)}")
|
608 |
|
609 |
-
|
610 |
-
|
611 |
|
612 |
-
if 'error' in result:
|
613 |
-
failed_count += 1
|
614 |
-
print(f"β Failed: {result['error']}")
|
615 |
-
else:
|
616 |
-
print(f"β
Success: {result['predicted_accent']} ({result['accent_confidence']:.1f}%)")
|
617 |
|
618 |
-
|
619 |
-
|
|
|
|
|
620 |
|
621 |
-
|
622 |
-
if i < len(urls):
|
623 |
-
time.sleep(1)
|
624 |
|
625 |
-
|
626 |
-
|
627 |
-
print(f"
|
628 |
-
print(f" β
Successful: {success_count}/{len(urls)}")
|
629 |
-
print(f" β Failed: {failed_count}/{len(urls)}")
|
630 |
|
631 |
-
|
|
|
632 |
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
|
642 |
-
def cleanup(self):
|
643 |
-
"""Clean up temporary files"""
|
644 |
-
try:
|
645 |
-
import shutil
|
646 |
-
if os.path.exists(self.temp_dir):
|
647 |
-
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
648 |
-
print("π§Ή Cleaned up temporary files")
|
649 |
-
except Exception as e:
|
650 |
-
print(f"β οΈ Cleanup warning: {e}")
|
651 |
|
652 |
# Helper Functions
|
653 |
def show_examples():
|
@@ -669,6 +627,7 @@ def show_examples():
|
|
669 |
print(" β’ Multiple speakers may affect accuracy")
|
670 |
print(" β’ Model works best with sustained speech")
|
671 |
|
|
|
672 |
def quick_test_url():
|
673 |
"""Interactive test for video URLs"""
|
674 |
print("π Quick Test Mode for Video URLs")
|
@@ -699,10 +658,11 @@ def quick_test_url():
|
|
699 |
finally:
|
700 |
analyzer.cleanup()
|
701 |
|
|
|
702 |
def demo_analysis():
|
703 |
"""Demo function with example usage"""
|
704 |
print("π¬ Video Accent Analyzer Demo")
|
705 |
-
print("="*50)
|
706 |
|
707 |
# Initialize analyzer
|
708 |
analyzer = VideoAccentAnalyzer()
|
@@ -722,5 +682,6 @@ def demo_analysis():
|
|
722 |
print("3. analyzer.display_results(results)")
|
723 |
print("4. analyzer.cleanup() # Clean up temporary files")
|
724 |
|
|
|
725 |
# Show examples on import
|
726 |
-
show_examples()
|
|
|
22 |
# Suppress warnings for cleaner output
|
23 |
warnings.filterwarnings('ignore')
|
24 |
|
25 |
+
|
26 |
def install_if_missing(packages):
|
27 |
"""Install packages if they're not already available in Kaggle"""
|
28 |
for package in packages:
|
|
|
35 |
print(f"Installing {package}...")
|
36 |
subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
|
37 |
|
38 |
+
|
39 |
# Required packages for Kaggle
|
40 |
required_packages = [
|
41 |
"yt-dlp",
|
|
|
57 |
import soundfile as sf
|
58 |
import yt_dlp
|
59 |
|
60 |
+
|
61 |
class VideoAccentAnalyzer:
|
62 |
def __init__(self, model_name="dima806/multiple_accent_classification"):
|
63 |
"""Initialize the accent analyzer for Kaggle environment"""
|
|
|
106 |
|
107 |
return True, url
|
108 |
|
|
|
109 |
def trim_video(self, input_path, output_path, duration):
|
110 |
+
try:
|
111 |
+
cmd = ['ffmpeg', '-i', input_path, '-t', str(duration), '-c', 'copy', output_path, '-y']
|
112 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
113 |
+
if result.returncode == 0:
|
114 |
+
print(f"βοΈ Trimmed video to {duration} seconds")
|
115 |
+
return output_path
|
116 |
+
else:
|
117 |
+
print(f"β Trimming failed: {result.stderr}")
|
118 |
+
return input_path # fallback to original
|
119 |
+
except Exception as e:
|
120 |
+
print(f"β Trimming exception: {e}")
|
121 |
+
return input_path
|
122 |
|
123 |
def download_video(self, url, max_duration=None):
|
124 |
"""Download video using yt-dlp with improved error handling"""
|
|
|
147 |
|
148 |
if max_duration:
|
149 |
# More generous time limit for download
|
150 |
+
ydl_opts['match_filter'] = lambda info: None if info.get('duration', 0) <= 200000 else "Video too long"
|
151 |
|
152 |
try:
|
153 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
|
168 |
# Find downloaded file (try multiple patterns)
|
169 |
video_path = None
|
170 |
for file in os.listdir(self.temp_dir):
|
171 |
+
if file.startswith("video.") and os.path.getsize(
|
172 |
+
os.path.join(self.temp_dir, file)) > 1000: # At least 1KB
|
173 |
potential_path = os.path.join(self.temp_dir, file)
|
174 |
+
print(f"π Found downloaded file: {file} ({os.path.getsize(potential_path) / 1024:.1f}KB)")
|
175 |
|
176 |
# Try basic validation - if ffprobe fails, still try to extract audio
|
177 |
if self._is_valid_video(potential_path):
|
|
|
257 |
f.write(chunk)
|
258 |
file_size += len(chunk)
|
259 |
|
260 |
+
print(f"π Downloaded {file_size / (1024 * 1024):.1f} MB")
|
261 |
|
262 |
if self._is_valid_video(video_path):
|
263 |
print("β
Direct download successful")
|
|
|
291 |
extraction_time = time.time() - start_time
|
292 |
|
293 |
if result.returncode == 0 and os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
|
294 |
+
file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
295 |
print(f"β
Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
|
296 |
return audio_path
|
297 |
else:
|
|
|
301 |
# Try alternative extraction method
|
302 |
print("π Trying alternative audio extraction...")
|
303 |
cmd_alt = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'libmp3lame',
|
304 |
+
'-ar', '16000', '-ac', '1', '-y', '-v', 'warning']
|
305 |
if max_duration:
|
306 |
cmd_alt.extend(['-t', str(max_duration)])
|
307 |
|
|
|
313 |
if result_alt.returncode == 0 and os.path.exists(audio_path_alt):
|
314 |
# Convert mp3 to wav
|
315 |
cmd_convert = ['ffmpeg', '-i', audio_path_alt, '-ar', '16000', '-ac', '1',
|
316 |
+
audio_path, '-y', '-v', 'quiet']
|
317 |
result_convert = subprocess.run(cmd_convert, capture_output=True, text=True, timeout=60)
|
318 |
|
319 |
if result_convert.returncode == 0 and os.path.exists(audio_path):
|
320 |
+
file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
321 |
print(f"β
Alternative extraction successful ({file_size:.1f}MB)")
|
322 |
return audio_path
|
323 |
|
|
|
466 |
return {"error": f"File not found: {file_path}"}
|
467 |
|
468 |
# Check file size
|
469 |
+
file_size = os.path.getsize(file_path) / (1024 * 1024) # MB
|
470 |
print(f"π File size: {file_size:.1f} MB")
|
471 |
|
472 |
video_filename = os.path.basename(file_path)
|
|
|
491 |
|
492 |
return results
|
493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
|
495 |
+
def display_results(self, results):
|
496 |
+
"""Display results in text format"""
|
497 |
+
if 'error' in results:
|
498 |
+
print(f"β {results['error']}")
|
499 |
+
return
|
500 |
|
501 |
+
accent = results['predicted_accent']
|
502 |
+
confidence = results['accent_confidence']
|
503 |
+
english_conf = results['english_confidence']
|
504 |
+
duration = results['audio_duration']
|
505 |
+
processed_duration = results.get('processed_duration', duration)
|
506 |
+
quality_score = results.get('audio_quality_score', 50)
|
507 |
|
508 |
+
accent_display = self.accent_display_names.get(accent, accent.title())
|
|
|
509 |
|
510 |
+
print(f"\n=== Accent Analysis Results ===")
|
511 |
+
print(f"Predicted Accent: {accent_display}")
|
512 |
+
print(f"Confidence: {confidence:.1f}%")
|
513 |
+
print(f"English Confidence: {english_conf:.1f}%")
|
514 |
+
print(f"Audio Duration: {duration:.1f}s")
|
515 |
+
print(f"Processed Duration: {processed_duration:.1f}s")
|
516 |
+
print(f"Audio Quality: {quality_score:.0f}/100")
|
517 |
+
print(f"Chunks Analyzed: {results.get('chunks_analyzed', 1)}")
|
518 |
|
|
|
519 |
|
520 |
+
def _plot_probabilities(self, probabilities):
|
521 |
+
"""Create a visualization of accent probabilities"""
|
522 |
+
try:
|
523 |
+
plt.figure(figsize=(10, 6))
|
|
|
524 |
|
525 |
+
accents = [self.accent_display_names.get(acc, acc.title()) for acc in probabilities.keys()]
|
526 |
+
probs = list(probabilities.values())
|
|
|
|
|
|
|
527 |
|
528 |
+
# Create color map
|
529 |
+
colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
|
530 |
+
for p in probs]
|
531 |
|
532 |
+
bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
|
|
|
533 |
|
534 |
+
plt.title('Accent Classification Probabilities', fontsize=16, fontweight='bold', pad=20)
|
535 |
+
plt.xlabel('Accent Type', fontsize=12)
|
536 |
+
plt.ylabel('Probability (%)', fontsize=12)
|
537 |
+
plt.xticks(rotation=45, ha='right')
|
538 |
+
plt.grid(axis='y', alpha=0.3)
|
539 |
|
540 |
+
# Add value labels on bars
|
541 |
+
for bar, prob in zip(bars, probs):
|
542 |
+
height = bar.get_height()
|
543 |
+
plt.text(bar.get_x() + bar.get_width() / 2., height + 0.5,
|
544 |
+
f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
|
545 |
|
546 |
+
plt.tight_layout()
|
547 |
+
plt.show()
|
|
|
548 |
|
549 |
+
except Exception as e:
|
550 |
+
print(f"β οΈ Could not create visualization: {e}")
|
551 |
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
+
def batch_analyze(self, urls, max_duration=30):
|
554 |
+
"""Analyze multiple videos with progress tracking"""
|
555 |
+
results = []
|
556 |
+
failed_count = 0
|
557 |
|
558 |
+
print(f"π Starting batch analysis of {len(urls)} videos")
|
|
|
|
|
559 |
|
560 |
+
for i, url in enumerate(urls, 1):
|
561 |
+
print(f"\n{'=' * 60}")
|
562 |
+
print(f"Processing video {i}/{len(urls)}")
|
|
|
|
|
563 |
|
564 |
+
result = self.analyze_video_url(url, max_duration)
|
565 |
+
result['video_index'] = i
|
566 |
|
567 |
+
if 'error' in result:
|
568 |
+
failed_count += 1
|
569 |
+
print(f"β Failed: {result['error']}")
|
570 |
+
else:
|
571 |
+
print(f"β
Success: {result['predicted_accent']} ({result['accent_confidence']:.1f}%)")
|
572 |
+
|
573 |
+
results.append(result)
|
574 |
+
self.display_results(result)
|
575 |
+
|
576 |
+
# Small delay to prevent overwhelming servers
|
577 |
+
if i < len(urls):
|
578 |
+
time.sleep(1)
|
579 |
+
|
580 |
+
# Summary
|
581 |
+
success_count = len(urls) - failed_count
|
582 |
+
print(f"\nπ Batch Analysis Summary:")
|
583 |
+
print(f" β
Successful: {success_count}/{len(urls)}")
|
584 |
+
print(f" β Failed: {failed_count}/{len(urls)}")
|
585 |
+
|
586 |
+
return results
|
587 |
+
|
588 |
+
|
589 |
+
def export_results(self, results, filename="accent_analysis_results.json"):
|
590 |
+
"""Export results to JSON file"""
|
591 |
+
try:
|
592 |
+
with open(filename, 'w') as f:
|
593 |
+
json.dump(results, f, indent=2, default=str)
|
594 |
+
print(f"πΎ Results exported to {filename}")
|
595 |
+
except Exception as e:
|
596 |
+
print(f"β Export failed: {e}")
|
597 |
+
|
598 |
+
|
599 |
+
def cleanup(self):
|
600 |
+
"""Clean up temporary files"""
|
601 |
+
try:
|
602 |
+
import shutil
|
603 |
+
if os.path.exists(self.temp_dir):
|
604 |
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
605 |
+
print("π§Ή Cleaned up temporary files")
|
606 |
+
except Exception as e:
|
607 |
+
print(f"β οΈ Cleanup warning: {e}")
|
608 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
609 |
|
610 |
# Helper Functions
|
611 |
def show_examples():
|
|
|
627 |
print(" β’ Multiple speakers may affect accuracy")
|
628 |
print(" β’ Model works best with sustained speech")
|
629 |
|
630 |
+
|
631 |
def quick_test_url():
|
632 |
"""Interactive test for video URLs"""
|
633 |
print("π Quick Test Mode for Video URLs")
|
|
|
658 |
finally:
|
659 |
analyzer.cleanup()
|
660 |
|
661 |
+
|
662 |
def demo_analysis():
|
663 |
"""Demo function with example usage"""
|
664 |
print("π¬ Video Accent Analyzer Demo")
|
665 |
+
print("=" * 50)
|
666 |
|
667 |
# Initialize analyzer
|
668 |
analyzer = VideoAccentAnalyzer()
|
|
|
682 |
print("3. analyzer.display_results(results)")
|
683 |
print("4. analyzer.cleanup() # Clean up temporary files")
|
684 |
|
685 |
+
|
686 |
# Show examples on import
|
687 |
+
show_examples()
|