ferhatbou commited on
Commit
0292df6
Β·
1 Parent(s): 5cdbd8d

Fix import issue

Browse files
README.md CHANGED
@@ -1,46 +1,35 @@
1
  ---
2
- title: Detect English Language Speaking
3
- emoji: πŸ”₯
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.31.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: 'a tools to automate real hiring decisions. '
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
- # 🎧 Video Accent Analyzer
16
 
17
- Analyze accents in videos from YouTube, Loom, or uploaded files. Supports multiple English accents.
 
 
 
 
18
 
19
  ## Features
20
- - YouTube video analysis
21
- - Loom video analysis
22
- - Direct MP4 link support
23
- - Local file upload
24
- - Multiple English accent detection
25
-
26
- ## Requirements
27
- - Python 3.8+
28
- - FFmpeg
29
- - PyTorch
30
- - Transformers
31
 
32
  ## Usage
33
- 1. Enter a video URL or upload a file
34
- 2. Get instant accent analysis results
35
-
36
- """
37
- Enhanced Video Accent Analyzer
38
- Supports YouTube, Loom, direct MP4 links, and local video files with improved error handling and features.
39
 
40
- Usage:
41
- analyzer = VideoAccentAnalyzer()
42
- results = analyzer.analyze_video_url("https://example.com/video.mp4", max_duration=30)
43
- or
44
- results = analyzer.analyze_local_video("/local/input/video.mp4", max_duration=30)
45
- analyzer.display_results(results)
46
- """
 
1
  ---
2
+ title: Video Accent Analyzer
3
+ emoji: 🎧
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # Video Accent Analyzer
 
13
 
14
+ This application analyzes English accents in videos using machine learning. It supports:
15
+ - YouTube videos
16
+ - Loom recordings
17
+ - Direct video links
18
+ - Uploaded video files
19
 
20
  ## Features
21
+ - Accent detection with confidence scores
22
+ - English proficiency assessment
23
+ - Interactive visualizations
24
+ - Support for multiple video sources
 
 
 
 
 
 
 
25
 
26
  ## Usage
27
+ 1. Enter a video URL or upload a video file
28
+ 2. Set the maximum duration to analyze
29
+ 3. Click "Analyze Video"
30
+ 4. View the detailed results and visualization
 
 
31
 
32
+ ## Tips
33
+ - Keep videos under 2 minutes for best results
34
+ - Ensure clear audio quality
35
+ - Multiple speakers may affect accuracy
 
 
 
__pycache__/video_accent_analyzer.cpython-310.pyc ADDED
Binary file (24.1 kB). View file
 
api.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ from video_accent_analyzer import VideoAccentAnalyzer
3
+ import plotly
4
+ import json
5
+ import os
6
+
7
+ app = Flask(__name__)
8
+ analyzer = VideoAccentAnalyzer()
9
+
10
+ @app.route('/')
11
+ def home():
12
+ return render_template('index.html')
13
+
14
+ @app.route('/api/analyze', methods=['POST'])
15
+ def analyze():
16
+ try:
17
+ data = request.json
18
+ url = data.get('url')
19
+ duration = int(data.get('duration', 30))
20
+
21
+ if not url:
22
+ return jsonify({'error': 'No URL provided'}), 400
23
+
24
+ # Initialize analyzer with display=False to avoid IPython dependency
25
+ result = analyzer.analyze_video_url(url, max_duration=duration)
26
+
27
+ if 'error' in result:
28
+ return jsonify({'error': result['error']}), 400
29
+
30
+ # Create Plotly figure
31
+ probabilities = result['all_probabilities']
32
+ accents = [analyzer.accent_display_names.get(acc, acc.title())
33
+ for acc in probabilities.keys()]
34
+ probs = list(probabilities.values())
35
+
36
+ # Format detailed results
37
+ accent = result['predicted_accent']
38
+ confidence = result['accent_confidence']
39
+ english_conf = result['english_confidence']
40
+
41
+ details = {
42
+ 'primary_classification': {
43
+ 'accent': analyzer.accent_display_names.get(accent, accent.title()),
44
+ 'confidence': f"{confidence:.1f}%",
45
+ 'english_confidence': f"{english_conf:.1f}%"
46
+ },
47
+ 'audio_analysis': {
48
+ 'duration': f"{result['audio_duration']:.1f}s",
49
+ 'quality_score': result.get('audio_quality_score', 'N/A'),
50
+ 'chunks_analyzed': result.get('chunks_analyzed', 1)
51
+ },
52
+ 'assessment': {
53
+ 'english_level': 'Strong' if english_conf >= 70 else 'Moderate' if english_conf >= 50 else 'Low',
54
+ 'confidence_level': 'High' if confidence >= 70 else 'Moderate' if confidence >= 50 else 'Low'
55
+ }
56
+ }
57
+
58
+ # Add visualization data
59
+ plot_data = {
60
+ 'data': [{
61
+ 'type': 'bar',
62
+ 'x': accents,
63
+ 'y': probs,
64
+ 'text': [f'{p:.1f}%' for p in probs],
65
+ 'textposition': 'auto',
66
+ 'marker': {
67
+ 'color': ['#4CAF50' if p == max(probs) else '#2196F3'
68
+ if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
69
+ for p in probs]
70
+ }
71
+ }],
72
+ 'layout': {
73
+ 'title': 'Accent Probability Distribution',
74
+ 'xaxis': {'title': 'Accent Type'},
75
+ 'yaxis': {'title': 'Probability (%)', 'range': [0, 100]},
76
+ 'template': 'plotly_white'
77
+ }
78
+ }
79
+
80
+ # Combine all results
81
+ response = {
82
+ 'details': details,
83
+ 'plot': plot_data,
84
+ 'raw_results': result
85
+ }
86
+
87
+ return jsonify(response)
88
+
89
+ except Exception as e:
90
+ return jsonify({'error': str(e)}), 500
91
+
92
+ @app.route('/api/cleanup', methods=['POST'])
93
+ def cleanup():
94
+ try:
95
+ analyzer.cleanup()
96
+ return jsonify({'message': 'Cleanup successful'})
97
+ except Exception as e:
98
+ return jsonify({'error': str(e)}), 500
99
+
100
+ if __name__ == '__main__':
101
+ app.run(debug=True)
app.py CHANGED
@@ -155,17 +155,19 @@ with gr.Blocks(css=css) as interface:
155
 
156
  # Add requirements.txt
157
  requirements = """
158
- gradio>=4.0.0
159
- plotly>=5.0.0
160
- yt-dlp
161
- librosa
162
- soundfile
163
- transformers
164
- torch
165
- ffmpeg-python
166
- matplotlib
167
- seaborn
168
- """
 
 
169
 
170
  with open("requirements.txt", "w") as f:
171
  f.write(requirements)
 
155
 
156
  # Add requirements.txt
157
  requirements = """
158
+ gradio>=4.0.0
159
+ plotly>=5.0.0
160
+ yt-dlp
161
+ librosa
162
+ soundfile
163
+ transformers
164
+ torch
165
+ ffmpeg-python
166
+ matplotlib
167
+ seaborn
168
+ pandas
169
+ ipython
170
+ """
171
 
172
  with open("requirements.txt", "w") as f:
173
  f.write(requirements)
templates/index.html ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/index.html -->
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Video Accent Analyzer</title>
6
+ <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
7
+ <style>
8
+ body {
9
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
10
+ max-width: 1000px;
11
+ margin: 0 auto;
12
+ padding: 20px;
13
+ background-color: #f5f5f5;
14
+ }
15
+ .container {
16
+ background: white;
17
+ padding: 20px;
18
+ border-radius: 10px;
19
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
20
+ }
21
+ .input-group {
22
+ margin-bottom: 20px;
23
+ }
24
+ input, button {
25
+ padding: 10px;
26
+ margin: 5px 0;
27
+ border-radius: 5px;
28
+ border: 1px solid #ddd;
29
+ }
30
+ input[type="text"] {
31
+ width: 100%;
32
+ box-sizing: border-box;
33
+ }
34
+ button {
35
+ background: linear-gradient(45deg, #4CAF50, #2196F3);
36
+ color: white;
37
+ border: none;
38
+ cursor: pointer;
39
+ transition: transform 0.2s;
40
+ }
41
+ button:hover {
42
+ transform: scale(1.02);
43
+ }
44
+ #results {
45
+ margin-top: 20px;
46
+ padding: 20px;
47
+ border-radius: 5px;
48
+ background-color: #f8f9fa;
49
+ }
50
+ .error {
51
+ color: red;
52
+ padding: 10px;
53
+ background-color: #fee;
54
+ border-radius: 5px;
55
+ }
56
+ #plot {
57
+ margin-top: 20px;
58
+ }
59
+ .metric {
60
+ display: inline-block;
61
+ padding: 10px;
62
+ margin: 5px;
63
+ background: #fff;
64
+ border-radius: 5px;
65
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1);
66
+ }
67
+ </style>
68
+ </head>
69
+ <body>
70
+ <div class="container">
71
+ <h1>🎧 Video Accent Analyzer</h1>
72
+
73
+ <div class="input-group">
74
+ <input type="text" id="videoUrl" placeholder="Enter YouTube, Loom, or direct video URL">
75
+ <input type="number" id="duration" value="30" min="10" max="120" step="10">
76
+ <button onclick="analyzeVideo()">Analyze Video</button>
77
+ </div>
78
+
79
+ <div id="loading" style="display: none;">
80
+ Analyzing video... Please wait...
81
+ </div>
82
+
83
+ <div id="results" style="display: none;"></div>
84
+ <div id="plot"></div>
85
+ </div>
86
+
87
+ <script>
88
+ async function analyzeVideo() {
89
+ const url = document.getElementById('videoUrl').value;
90
+ const duration = document.getElementById('duration').value;
91
+ const loading = document.getElementById('loading');
92
+ const results = document.getElementById('results');
93
+ const plot = document.getElementById('plot');
94
+
95
+ if (!url) {
96
+ alert('Please enter a video URL');
97
+ return;
98
+ }
99
+
100
+ loading.style.display = 'block';
101
+ results.style.display = 'none';
102
+ plot.innerHTML = '';
103
+
104
+ try {
105
+ const response = await fetch('/api/analyze', {
106
+ method: 'POST',
107
+ headers: {
108
+ 'Content-Type': 'application/json',
109
+ },
110
+ body: JSON.stringify({ url, duration }),
111
+ });
112
+
113
+ const data = await response.json();
114
+
115
+ if (data.error) {
116
+ results.innerHTML = `<div class="error">Error: ${data.error}</div>`;
117
+ results.style.display = 'block';
118
+ return;
119
+ }
120
+
121
+ // Display results
122
+ results.innerHTML = `
123
+ <h2>Analysis Results</h2>
124
+ <div class="metric">
125
+ <strong>Predicted Accent:</strong><br>
126
+ ${data.predicted_accent}
127
+ </div>
128
+ <div class="metric">
129
+ <strong>Confidence:</strong><br>
130
+ ${data.accent_confidence.toFixed(1)}%
131
+ </div>
132
+ <div class="metric">
133
+ <strong>English Confidence:</strong><br>
134
+ ${data.english_confidence.toFixed(1)}%
135
+ </div>
136
+ <div class="metric">
137
+ <strong>Audio Duration:</strong><br>
138
+ ${data.audio_duration.toFixed(1)}s
139
+ </div>
140
+ `;
141
+
142
+ // Create plot
143
+ Plotly.newPlot('plot', data.plot.data, data.plot.layout);
144
+ results.style.display = 'block';
145
+
146
+ } catch (error) {
147
+ results.innerHTML = `<div class="error">Error: ${error.message}</div>`;
148
+ results.style.display = 'block';
149
+ } finally {
150
+ loading.style.display = 'none';
151
+ }
152
+ }
153
+ </script>
154
+ </body>
155
+ </html>
video_accent_analyzer.py CHANGED
@@ -22,6 +22,7 @@ import seaborn as sns
22
  # Suppress warnings for cleaner output
23
  warnings.filterwarnings('ignore')
24
 
 
25
  def install_if_missing(packages):
26
  """Install packages if they're not already available in Kaggle"""
27
  for package in packages:
@@ -34,6 +35,7 @@ def install_if_missing(packages):
34
  print(f"Installing {package}...")
35
  subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
36
 
 
37
  # Required packages for Kaggle
38
  required_packages = [
39
  "yt-dlp",
@@ -55,6 +57,7 @@ import librosa
55
  import soundfile as sf
56
  import yt_dlp
57
 
 
58
  class VideoAccentAnalyzer:
59
  def __init__(self, model_name="dima806/multiple_accent_classification"):
60
  """Initialize the accent analyzer for Kaggle environment"""
@@ -103,20 +106,19 @@ class VideoAccentAnalyzer:
103
 
104
  return True, url
105
 
106
-
107
  def trim_video(self, input_path, output_path, duration):
108
- try:
109
- cmd = ['ffmpeg', '-i', input_path, '-t', str(duration), '-c', 'copy', output_path, '-y']
110
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
111
- if result.returncode == 0:
112
- print(f"βœ‚οΈ Trimmed video to {duration} seconds")
113
- return output_path
114
- else:
115
- print(f"❌ Trimming failed: {result.stderr}")
116
- return input_path # fallback to original
117
- except Exception as e:
118
- print(f"❌ Trimming exception: {e}")
119
- return input_path
120
 
121
  def download_video(self, url, max_duration=None):
122
  """Download video using yt-dlp with improved error handling"""
@@ -145,7 +147,7 @@ class VideoAccentAnalyzer:
145
 
146
  if max_duration:
147
  # More generous time limit for download
148
- ydl_opts['match_filter'] = lambda info: None if info.get('duration', 0) <= 200000 else "Video too long"
149
 
150
  try:
151
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -166,9 +168,10 @@ class VideoAccentAnalyzer:
166
  # Find downloaded file (try multiple patterns)
167
  video_path = None
168
  for file in os.listdir(self.temp_dir):
169
- if file.startswith("video.") and os.path.getsize(os.path.join(self.temp_dir, file)) > 1000: # At least 1KB
 
170
  potential_path = os.path.join(self.temp_dir, file)
171
- print(f"πŸ“ Found downloaded file: {file} ({os.path.getsize(potential_path)/1024:.1f}KB)")
172
 
173
  # Try basic validation - if ffprobe fails, still try to extract audio
174
  if self._is_valid_video(potential_path):
@@ -254,7 +257,7 @@ class VideoAccentAnalyzer:
254
  f.write(chunk)
255
  file_size += len(chunk)
256
 
257
- print(f"πŸ“ Downloaded {file_size / (1024*1024):.1f} MB")
258
 
259
  if self._is_valid_video(video_path):
260
  print("βœ… Direct download successful")
@@ -288,7 +291,7 @@ class VideoAccentAnalyzer:
288
  extraction_time = time.time() - start_time
289
 
290
  if result.returncode == 0 and os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
291
- file_size = os.path.getsize(audio_path) / (1024*1024)
292
  print(f"βœ… Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
293
  return audio_path
294
  else:
@@ -298,7 +301,7 @@ class VideoAccentAnalyzer:
298
  # Try alternative extraction method
299
  print("πŸ”„ Trying alternative audio extraction...")
300
  cmd_alt = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'libmp3lame',
301
- '-ar', '16000', '-ac', '1', '-y', '-v', 'warning']
302
  if max_duration:
303
  cmd_alt.extend(['-t', str(max_duration)])
304
 
@@ -310,11 +313,11 @@ class VideoAccentAnalyzer:
310
  if result_alt.returncode == 0 and os.path.exists(audio_path_alt):
311
  # Convert mp3 to wav
312
  cmd_convert = ['ffmpeg', '-i', audio_path_alt, '-ar', '16000', '-ac', '1',
313
- audio_path, '-y', '-v', 'quiet']
314
  result_convert = subprocess.run(cmd_convert, capture_output=True, text=True, timeout=60)
315
 
316
  if result_convert.returncode == 0 and os.path.exists(audio_path):
317
- file_size = os.path.getsize(audio_path) / (1024*1024)
318
  print(f"βœ… Alternative extraction successful ({file_size:.1f}MB)")
319
  return audio_path
320
 
@@ -463,7 +466,7 @@ class VideoAccentAnalyzer:
463
  return {"error": f"File not found: {file_path}"}
464
 
465
  # Check file size
466
- file_size = os.path.getsize(file_path) / (1024*1024) # MB
467
  print(f"πŸ“ File size: {file_size:.1f} MB")
468
 
469
  video_filename = os.path.basename(file_path)
@@ -488,166 +491,121 @@ class VideoAccentAnalyzer:
488
 
489
  return results
490
 
491
- def display_results(self, results):
492
- """Enhanced results display with visualizations"""
493
- if 'error' in results:
494
- display(HTML(f"<div style='color: red; font-size: 16px; padding: 10px; border: 1px solid red; border-radius: 5px;'>❌ {results['error']}</div>"))
495
- return
496
-
497
- accent = results['predicted_accent']
498
- confidence = results['accent_confidence']
499
- english_conf = results['english_confidence']
500
- duration = results['audio_duration']
501
- processed_duration = results.get('processed_duration', duration)
502
- quality_score = results.get('audio_quality_score', 50)
503
-
504
- accent_display = self.accent_display_names.get(accent, accent.title())
505
-
506
- # Enhanced HTML display
507
- html = f"""
508
- <div style='border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; margin: 10px 0; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);'>
509
- <h2 style='color: #2E7D32; margin-top: 0; text-align: center;'>🎯 Accent Analysis Results</h2>
510
-
511
- <div style='display: flex; flex-wrap: wrap; gap: 20px; margin-bottom: 20px;'>
512
- <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
513
- <h3 style='color: #1976D2; margin-top: 0;'>🎭 Primary Classification</h3>
514
- <p style='font-size: 20px; margin: 5px 0; font-weight: bold;'>{accent_display}</p>
515
- <p style='margin: 5px 0;'>Confidence: <strong style='color: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"};'>{confidence:.1f}%</strong></p>
516
- </div>
517
-
518
- <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
519
- <h3 style='color: #1976D2; margin-top: 0;'>🌍 English Proficiency</h3>
520
- <p style='font-size: 18px; margin: 5px 0;'><strong style='color: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"};'>{english_conf:.1f}%</strong></p>
521
- <p style='margin: 5px 0;'>Audio Quality: <strong>{quality_score:.0f}/100</strong></p>
522
- </div>
523
-
524
- <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
525
- <h3 style='color: #1976D2; margin-top: 0;'>⏱️ Processing Info</h3>
526
- <p style='margin: 5px 0;'>Duration: <strong>{duration:.1f}s</strong></p>
527
- <p style='margin: 5px 0;'>Processed: <strong>{processed_duration:.1f}s</strong></p>
528
- <p style='margin: 5px 0;'>Chunks: <strong>{results.get("chunks_analyzed", 1)}</strong></p>
529
- </div>
530
- </div>
531
-
532
- <div style='background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
533
- <h3 style='color: #1976D2; margin-top: 0;'>πŸ“Š Assessment</h3>
534
- <div style='display: flex; flex-wrap: wrap; gap: 10px;'>
535
- <span style='background: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
536
- {'βœ… Strong English Speaker' if english_conf >= 70 else '⚠️ Moderate English Confidence' if english_conf >= 50 else '❓ Low English Confidence'}
537
- </span>
538
- <span style='background: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
539
- {'🎯 High Confidence' if confidence >= 70 else 'πŸ€” Moderate Confidence' if confidence >= 50 else '❓ Low Confidence'}
540
- </span>
541
- <span style='background: {"#4CAF50" if quality_score >= 70 else "#FF9800" if quality_score >= 40 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
542
- {'🎀 Good Audio Quality' if quality_score >= 70 else 'πŸ“’ Fair Audio Quality' if quality_score >= 40 else 'πŸ”‡ Poor Audio Quality'}
543
- </span>
544
- </div>
545
- </div>
546
- </div>
547
- """
548
- display(HTML(html))
549
-
550
- # Create probability breakdown visualization
551
- self._plot_probabilities(results['all_probabilities'])
552
-
553
- # Display detailed breakdown table
554
- prob_df = pd.DataFrame([
555
- {
556
- 'Accent': self.accent_display_names.get(accent, accent.title()),
557
- 'Probability': f"{prob:.1f}%",
558
- 'Confidence': '🟒 High' if prob >= 70 else '🟑 Medium' if prob >= 30 else 'πŸ”΄ Low'
559
- }
560
- for accent, prob in sorted(results['all_probabilities'].items(), key=lambda x: x[1], reverse=True)
561
- ])
562
 
563
- print("\nπŸ“Š Detailed Probability Breakdown:")
564
- display(prob_df)
 
 
 
565
 
566
- def _plot_probabilities(self, probabilities):
567
- """Create a visualization of accent probabilities"""
568
- try:
569
- plt.figure(figsize=(10, 6))
 
 
570
 
571
- accents = [self.accent_display_names.get(acc, acc.title()) for acc in probabilities.keys()]
572
- probs = list(probabilities.values())
573
 
574
- # Create color map
575
- colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
576
- for p in probs]
 
 
 
 
 
577
 
578
- bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
579
 
580
- plt.title('Accent Classification Probabilities', fontsize=16, fontweight='bold', pad=20)
581
- plt.xlabel('Accent Type', fontsize=12)
582
- plt.ylabel('Probability (%)', fontsize=12)
583
- plt.xticks(rotation=45, ha='right')
584
- plt.grid(axis='y', alpha=0.3)
585
 
586
- # Add value labels on bars
587
- for bar, prob in zip(bars, probs):
588
- height = bar.get_height()
589
- plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
590
- f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
591
 
592
- plt.tight_layout()
593
- plt.show()
 
594
 
595
- except Exception as e:
596
- print(f"⚠️ Could not create visualization: {e}")
597
 
598
- def batch_analyze(self, urls, max_duration=30):
599
- """Analyze multiple videos with progress tracking"""
600
- results = []
601
- failed_count = 0
 
602
 
603
- print(f"πŸš€ Starting batch analysis of {len(urls)} videos")
 
 
 
 
604
 
605
- for i, url in enumerate(urls, 1):
606
- print(f"\n{'='*60}")
607
- print(f"Processing video {i}/{len(urls)}")
608
 
609
- result = self.analyze_video_url(url, max_duration)
610
- result['video_index'] = i
611
 
612
- if 'error' in result:
613
- failed_count += 1
614
- print(f"❌ Failed: {result['error']}")
615
- else:
616
- print(f"βœ… Success: {result['predicted_accent']} ({result['accent_confidence']:.1f}%)")
617
 
618
- results.append(result)
619
- self.display_results(result)
 
 
620
 
621
- # Small delay to prevent overwhelming servers
622
- if i < len(urls):
623
- time.sleep(1)
624
 
625
- # Summary
626
- success_count = len(urls) - failed_count
627
- print(f"\nπŸ“ˆ Batch Analysis Summary:")
628
- print(f" βœ… Successful: {success_count}/{len(urls)}")
629
- print(f" ❌ Failed: {failed_count}/{len(urls)}")
630
 
631
- return results
 
632
 
633
- def export_results(self, results, filename="accent_analysis_results.json"):
634
- """Export results to JSON file"""
635
- try:
636
- with open(filename, 'w') as f:
637
- json.dump(results, f, indent=2, default=str)
638
- print(f"πŸ’Ύ Results exported to {filename}")
639
- except Exception as e:
640
- print(f"❌ Export failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
 
642
- def cleanup(self):
643
- """Clean up temporary files"""
644
- try:
645
- import shutil
646
- if os.path.exists(self.temp_dir):
647
- shutil.rmtree(self.temp_dir, ignore_errors=True)
648
- print("🧹 Cleaned up temporary files")
649
- except Exception as e:
650
- print(f"⚠️ Cleanup warning: {e}")
651
 
652
  # Helper Functions
653
  def show_examples():
@@ -669,6 +627,7 @@ def show_examples():
669
  print(" β€’ Multiple speakers may affect accuracy")
670
  print(" β€’ Model works best with sustained speech")
671
 
 
672
  def quick_test_url():
673
  """Interactive test for video URLs"""
674
  print("πŸ” Quick Test Mode for Video URLs")
@@ -699,10 +658,11 @@ def quick_test_url():
699
  finally:
700
  analyzer.cleanup()
701
 
 
702
  def demo_analysis():
703
  """Demo function with example usage"""
704
  print("🎬 Video Accent Analyzer Demo")
705
- print("="*50)
706
 
707
  # Initialize analyzer
708
  analyzer = VideoAccentAnalyzer()
@@ -722,5 +682,6 @@ def demo_analysis():
722
  print("3. analyzer.display_results(results)")
723
  print("4. analyzer.cleanup() # Clean up temporary files")
724
 
 
725
  # Show examples on import
726
- show_examples()
 
22
  # Suppress warnings for cleaner output
23
  warnings.filterwarnings('ignore')
24
 
25
+
26
  def install_if_missing(packages):
27
  """Install packages if they're not already available in Kaggle"""
28
  for package in packages:
 
35
  print(f"Installing {package}...")
36
  subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
37
 
38
+
39
  # Required packages for Kaggle
40
  required_packages = [
41
  "yt-dlp",
 
57
  import soundfile as sf
58
  import yt_dlp
59
 
60
+
61
  class VideoAccentAnalyzer:
62
  def __init__(self, model_name="dima806/multiple_accent_classification"):
63
  """Initialize the accent analyzer for Kaggle environment"""
 
106
 
107
  return True, url
108
 
 
109
  def trim_video(self, input_path, output_path, duration):
110
+ try:
111
+ cmd = ['ffmpeg', '-i', input_path, '-t', str(duration), '-c', 'copy', output_path, '-y']
112
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
113
+ if result.returncode == 0:
114
+ print(f"βœ‚οΈ Trimmed video to {duration} seconds")
115
+ return output_path
116
+ else:
117
+ print(f"❌ Trimming failed: {result.stderr}")
118
+ return input_path # fallback to original
119
+ except Exception as e:
120
+ print(f"❌ Trimming exception: {e}")
121
+ return input_path
122
 
123
  def download_video(self, url, max_duration=None):
124
  """Download video using yt-dlp with improved error handling"""
 
147
 
148
  if max_duration:
149
  # More generous time limit for download
150
+ ydl_opts['match_filter'] = lambda info: None if info.get('duration', 0) <= 200000 else "Video too long"
151
 
152
  try:
153
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 
168
  # Find downloaded file (try multiple patterns)
169
  video_path = None
170
  for file in os.listdir(self.temp_dir):
171
+ if file.startswith("video.") and os.path.getsize(
172
+ os.path.join(self.temp_dir, file)) > 1000: # At least 1KB
173
  potential_path = os.path.join(self.temp_dir, file)
174
+ print(f"πŸ“ Found downloaded file: {file} ({os.path.getsize(potential_path) / 1024:.1f}KB)")
175
 
176
  # Try basic validation - if ffprobe fails, still try to extract audio
177
  if self._is_valid_video(potential_path):
 
257
  f.write(chunk)
258
  file_size += len(chunk)
259
 
260
+ print(f"πŸ“ Downloaded {file_size / (1024 * 1024):.1f} MB")
261
 
262
  if self._is_valid_video(video_path):
263
  print("βœ… Direct download successful")
 
291
  extraction_time = time.time() - start_time
292
 
293
  if result.returncode == 0 and os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
294
+ file_size = os.path.getsize(audio_path) / (1024 * 1024)
295
  print(f"βœ… Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
296
  return audio_path
297
  else:
 
301
  # Try alternative extraction method
302
  print("πŸ”„ Trying alternative audio extraction...")
303
  cmd_alt = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'libmp3lame',
304
+ '-ar', '16000', '-ac', '1', '-y', '-v', 'warning']
305
  if max_duration:
306
  cmd_alt.extend(['-t', str(max_duration)])
307
 
 
313
  if result_alt.returncode == 0 and os.path.exists(audio_path_alt):
314
  # Convert mp3 to wav
315
  cmd_convert = ['ffmpeg', '-i', audio_path_alt, '-ar', '16000', '-ac', '1',
316
+ audio_path, '-y', '-v', 'quiet']
317
  result_convert = subprocess.run(cmd_convert, capture_output=True, text=True, timeout=60)
318
 
319
  if result_convert.returncode == 0 and os.path.exists(audio_path):
320
+ file_size = os.path.getsize(audio_path) / (1024 * 1024)
321
  print(f"βœ… Alternative extraction successful ({file_size:.1f}MB)")
322
  return audio_path
323
 
 
466
  return {"error": f"File not found: {file_path}"}
467
 
468
  # Check file size
469
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # MB
470
  print(f"πŸ“ File size: {file_size:.1f} MB")
471
 
472
  video_filename = os.path.basename(file_path)
 
491
 
492
  return results
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
+ def display_results(self, results):
496
+ """Display results in text format"""
497
+ if 'error' in results:
498
+ print(f"❌ {results['error']}")
499
+ return
500
 
501
+ accent = results['predicted_accent']
502
+ confidence = results['accent_confidence']
503
+ english_conf = results['english_confidence']
504
+ duration = results['audio_duration']
505
+ processed_duration = results.get('processed_duration', duration)
506
+ quality_score = results.get('audio_quality_score', 50)
507
 
508
+ accent_display = self.accent_display_names.get(accent, accent.title())
 
509
 
510
+ print(f"\n=== Accent Analysis Results ===")
511
+ print(f"Predicted Accent: {accent_display}")
512
+ print(f"Confidence: {confidence:.1f}%")
513
+ print(f"English Confidence: {english_conf:.1f}%")
514
+ print(f"Audio Duration: {duration:.1f}s")
515
+ print(f"Processed Duration: {processed_duration:.1f}s")
516
+ print(f"Audio Quality: {quality_score:.0f}/100")
517
+ print(f"Chunks Analyzed: {results.get('chunks_analyzed', 1)}")
518
 
 
519
 
520
+ def _plot_probabilities(self, probabilities):
521
+ """Create a visualization of accent probabilities"""
522
+ try:
523
+ plt.figure(figsize=(10, 6))
 
524
 
525
+ accents = [self.accent_display_names.get(acc, acc.title()) for acc in probabilities.keys()]
526
+ probs = list(probabilities.values())
 
 
 
527
 
528
+ # Create color map
529
+ colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
530
+ for p in probs]
531
 
532
+ bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
 
533
 
534
+ plt.title('Accent Classification Probabilities', fontsize=16, fontweight='bold', pad=20)
535
+ plt.xlabel('Accent Type', fontsize=12)
536
+ plt.ylabel('Probability (%)', fontsize=12)
537
+ plt.xticks(rotation=45, ha='right')
538
+ plt.grid(axis='y', alpha=0.3)
539
 
540
+ # Add value labels on bars
541
+ for bar, prob in zip(bars, probs):
542
+ height = bar.get_height()
543
+ plt.text(bar.get_x() + bar.get_width() / 2., height + 0.5,
544
+ f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
545
 
546
+ plt.tight_layout()
547
+ plt.show()
 
548
 
549
+ except Exception as e:
550
+ print(f"⚠️ Could not create visualization: {e}")
551
 
 
 
 
 
 
552
 
553
+ def batch_analyze(self, urls, max_duration=30):
554
+ """Analyze multiple videos with progress tracking"""
555
+ results = []
556
+ failed_count = 0
557
 
558
+ print(f"πŸš€ Starting batch analysis of {len(urls)} videos")
 
 
559
 
560
+ for i, url in enumerate(urls, 1):
561
+ print(f"\n{'=' * 60}")
562
+ print(f"Processing video {i}/{len(urls)}")
 
 
563
 
564
+ result = self.analyze_video_url(url, max_duration)
565
+ result['video_index'] = i
566
 
567
+ if 'error' in result:
568
+ failed_count += 1
569
+ print(f"❌ Failed: {result['error']}")
570
+ else:
571
+ print(f"βœ… Success: {result['predicted_accent']} ({result['accent_confidence']:.1f}%)")
572
+
573
+ results.append(result)
574
+ self.display_results(result)
575
+
576
+ # Small delay to prevent overwhelming servers
577
+ if i < len(urls):
578
+ time.sleep(1)
579
+
580
+ # Summary
581
+ success_count = len(urls) - failed_count
582
+ print(f"\nπŸ“ˆ Batch Analysis Summary:")
583
+ print(f" βœ… Successful: {success_count}/{len(urls)}")
584
+ print(f" ❌ Failed: {failed_count}/{len(urls)}")
585
+
586
+ return results
587
+
588
+
589
+ def export_results(self, results, filename="accent_analysis_results.json"):
590
+ """Export results to JSON file"""
591
+ try:
592
+ with open(filename, 'w') as f:
593
+ json.dump(results, f, indent=2, default=str)
594
+ print(f"πŸ’Ύ Results exported to {filename}")
595
+ except Exception as e:
596
+ print(f"❌ Export failed: {e}")
597
+
598
+
599
+ def cleanup(self):
600
+ """Clean up temporary files"""
601
+ try:
602
+ import shutil
603
+ if os.path.exists(self.temp_dir):
604
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
605
+ print("🧹 Cleaned up temporary files")
606
+ except Exception as e:
607
+ print(f"⚠️ Cleanup warning: {e}")
608
 
 
 
 
 
 
 
 
 
 
609
 
610
  # Helper Functions
611
  def show_examples():
 
627
  print(" β€’ Multiple speakers may affect accuracy")
628
  print(" β€’ Model works best with sustained speech")
629
 
630
+
631
  def quick_test_url():
632
  """Interactive test for video URLs"""
633
  print("πŸ” Quick Test Mode for Video URLs")
 
658
  finally:
659
  analyzer.cleanup()
660
 
661
+
662
  def demo_analysis():
663
  """Demo function with example usage"""
664
  print("🎬 Video Accent Analyzer Demo")
665
+ print("=" * 50)
666
 
667
  # Initialize analyzer
668
  analyzer = VideoAccentAnalyzer()
 
682
  print("3. analyzer.display_results(results)")
683
  print("4. analyzer.cleanup() # Clean up temporary files")
684
 
685
+
686
  # Show examples on import
687
+ show_examples()