Spaces:

ferhatbou
/

detect_English_language_speaking

Sleeping

App Files Files Community

ferhatbou commited on May 23

Commit

5cdbd8d

1 Parent(s): 9aa62c0

Fix import issue

Browse files

Files changed (3) hide show

app.py +159 -32
requirements.txt +10 -9
video_accent_analyzer.py +160 -62

app.py CHANGED Viewed

@@ -1,47 +1,174 @@
 import gradio as gr
 from video_accent_analyzer import VideoAccentAnalyzer
 analyzer = VideoAccentAnalyzer()
-def analyze_video(url=None, video_file=None):
-    if url:
-        result = analyzer.analyze_video_url(url)
-    elif video_file:
-        result = analyzer.analyze_local_video(video_file)
-    else:
-        return "❌ Please provide a video URL or upload a file"
-    if 'error' in result:
-        return f"❌ Error: {result['error']}"
-    markdown = f"## 🎯 Results\n"
-    markdown += f"**Predicted Accent:** {result['predicted_accent']}\n"
-    markdown += f"**Confidence:** {result['confidence']:.1f}%\n"
-    markdown += f"**English Confidence:** {result['english_confidence']:.1f}%\n\n"
-    markdown += "### 📊 Probability Breakdown:\n"
-    for accent, prob in sorted(result['probabilities'].items(), key=lambda x: x[1], reverse=True):
-        markdown += f"- {accent}: {prob:.1f}%\n"
-    return markdown
 # Create Gradio interface
-interface = gr.Interface(
-    fn=analyze_video,
-    inputs=[
-        gr.Textbox(label="Video URL (YouTube/Loom/Direct MP4)"),
-        gr.File(label="Or Upload Video File", type="filepath")
-    ],
-    outputs=gr.Markdown(label="Analysis Results"),
-    examples=[
-        ["https://www.youtube.com/watch?v=abc123 ", None],
-        [None, "example_video.mp4"]
-    ],
-    title="🎧 Video Accent Analyzer",
-    description="Detect English accents in videos from YouTube, Loom, or uploaded files"
-)
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 from video_accent_analyzer import VideoAccentAnalyzer
+import plotly.graph_objects as go
+import pandas as pd
 analyzer = VideoAccentAnalyzer()
+def create_plotly_chart(probabilities):
+    """Create an interactive Plotly bar chart for accent probabilities"""
+    accents = [analyzer.accent_display_names.get(acc, acc.title()) for acc in probabilities.keys()]
+    probs = list(probabilities.values())
+    colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20
+    else '#FFC107' if p >= 10 else '#9E9E9E' for p in probs]
+    fig = go.Figure(data=[
+        go.Bar(
+            x=accents,
+            y=probs,
+            marker_color=colors,
+            text=[f'{p:.1f}%' for p in probs],
+            textposition='auto',
+        )
+    ])
+    fig.update_layout(
+        title='Accent Probability Distribution',
+        xaxis_title='Accent Type',
+        yaxis_title='Probability (%)',
+        template='plotly_white',
+        yaxis_range=[0, 100],
+    )
+    return fig
+def analyze_video(url=None, video_file=None, duration=30):
+    """Analyze video from URL or file with enhanced output"""
+    try:
+        if not url and not video_file:
+            return (
+                "### ❌ Error\nPlease provide either a video URL or upload a video file.",
+                None
+            )
+        if url:
+            result = analyzer.analyze_video_url(url, max_duration=duration)
+        else:
+            result = analyzer.analyze_local_video(video_file, max_duration=duration)
+        if 'error' in result:
+            return (
+                f"### ❌ Error\n{result['error']}",
+                None
+            )
+        # Create markdown output
+        markdown = f"""
+    ### 🎯 Analysis Results
+    **Primary Classification:**
+    - 🗣️ Predicted Accent: {analyzer.accent_display_names.get(result['predicted_accent'])}
+    - 📊 Confidence: {result['accent_confidence']:.1f}%
+    - 🌍 English Confidence: {result['english_confidence']:.1f}%
+    **Audio Analysis:**
+    - ⏱️ Duration: {result['audio_duration']:.1f} seconds
+    - 📊 Quality Score: {result.get('audio_quality_score', 'N/A')}
+    - 🎵 Chunks Analyzed: {result.get('chunks_analyzed', 1)}
+    **Assessment:**
+    - {'✅ Strong English Speaker' if result['english_confidence'] >= 70 else '⚠️ Moderate English Confidence' if result['english_confidence'] >= 50 else '❓ Low English Confidence'}
+    - {'🎯 High Accent Confidence' if result['accent_confidence'] >= 70 else '🤔 Moderate Accent Confidence' if result['accent_confidence'] >= 50 else '❓ Low Accent Confidence'}
+    """
+        # Create visualization
+        fig = create_plotly_chart(result['all_probabilities'])
+        return markdown, fig
+    except Exception as e:
+        return f"### ❌ Error\nAn unexpected error occurred: {str(e)}", None
 # Create Gradio interface
+css = """
+    .gradio-container {
+        font-family: 'IBM Plex Sans', sans-serif;
+    }
+    .gr-button {
+        background: linear-gradient(45deg, #4CAF50, #2196F3);
+        border: none;
+    }
+    .gr-button:hover {
+        background: linear-gradient(45deg, #2196F3, #4CAF50);
+        transform: scale(1.02);
+    }
+    """
+with gr.Blocks(css=css) as interface:
+    gr.Markdown("""
+        # 🎧 Video Accent Analyzer
+        Analyze English accents in videos from various sources:
+        - YouTube videos
+        - Loom recordings
+        - Direct video links
+        - Uploaded video files
+        ### 💡 Tips
+        - Keep videos under 2 minutes for best results
+        - Ensure clear audio quality
+        - Multiple speakers may affect accuracy
+        """)
+    with gr.Row():
+        with gr.Column():
+            url_input = gr.Textbox(
+                label="Video URL",
+                placeholder="Enter YouTube, Loom, or direct video URL"
+            )
+            video_input = gr.File(
+                label="Or Upload Video",
+                file_types=["video"]
+            )
+            duration = gr.Slider(
+                minimum=10,
+                maximum=120,
+                value=30,
+                step=10,
+                label="Maximum Duration (seconds)"
+            )
+            analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
+        with gr.Column():
+            output_text = gr.Markdown(label="Analysis Results")
+            output_plot = gr.Plot(label="Accent Distribution")
+    analyze_btn.click(
+        fn=analyze_video,
+        inputs=[url_input, video_input, duration],
+        outputs=[output_text, output_plot]
+    )
+    gr.Examples(
+        examples=[
+            ["https://www.youtube.com/watch?v=NO5SbsvIjHE", None, 30],
+            ["https://www.youtube.com/watch?v=YQHsXMglC9A", None, 30],
+        ],
+        inputs=[url_input, video_input, duration],
+        outputs=[output_text, output_plot],
+        label="Example Videos"
+    )
+# Add requirements.txt
+requirements = """
+    gradio>=4.0.0
+    plotly>=5.0.0
+    yt-dlp
+    librosa
+    soundfile
+    transformers
+    torch
+    ffmpeg-python
+    matplotlib
+    seaborn
+    """
+with open("requirements.txt", "w") as f:
+    f.write(requirements)
 if __name__ == "__main__":
+    interface.launch()

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
-yt-dlp
-librosa
-soundfile
-transformers
-torch
-gradio
-ffmpeg-python
-matplotlib
-seaborn

+ gradio>=4.0.0
+ plotly>=5.0.0
+ yt-dlp
+ librosa
+ soundfile
+ transformers
+ torch
+ ffmpeg-python
+ matplotlib
+ seaborn

video_accent_analyzer.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import os
 import sys
@@ -17,7 +22,6 @@ import seaborn as sns
 # Suppress warnings for cleaner output
 warnings.filterwarnings('ignore')
 def install_if_missing(packages):
     """Install packages if they're not already available in Kaggle"""
     for package in packages:
@@ -30,7 +34,6 @@ def install_if_missing(packages):
             print(f"Installing {package}...")
             subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
 # Required packages for Kaggle
 required_packages = [
     "yt-dlp",
@@ -52,7 +55,6 @@ import librosa
 import soundfile as sf
 import yt_dlp
 class VideoAccentAnalyzer:
     def __init__(self, model_name="dima806/multiple_accent_classification"):
         """Initialize the accent analyzer for Kaggle environment"""
@@ -101,6 +103,21 @@ class VideoAccentAnalyzer:
         return True, url
     def download_video(self, url, max_duration=None):
         """Download video using yt-dlp with improved error handling"""
         is_valid, result = self._validate_url(url)
@@ -111,55 +128,106 @@ class VideoAccentAnalyzer:
         url = result
         output_path = os.path.join(self.temp_dir, "video.%(ext)s")
         ydl_opts = {
             'outtmpl': output_path,
-            'format': 'best[height<=720]/best',  # Limit quality for faster download
-            'quiet': True,
-            'no_warnings': True,
-            'socket_timeout': 30,
-            'retries': 3,
         }
         if max_duration:
-            ydl_opts['match_filter'] = lambda info: None if info.get('duration',
-                                                                     0) <= max_duration * 2 else "Video too long"
         try:
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 print(f"📥 Downloading video from: {url}")
                 start_time = time.time()
                 ydl.download([url])
                 download_time = time.time() - start_time
-                # Find downloaded file
                 for file in os.listdir(self.temp_dir):
-                    if file.startswith("video."):
-                        video_path = os.path.join(self.temp_dir, file)
-                        if self._is_valid_video(video_path):
-                            print(f"✅ Downloaded valid video: {file} ({download_time:.1f}s)")
-                            return video_path
                         else:
-                            print("❌ Downloaded file is not a valid video")
-                            return None
         except Exception as e:
             print(f"⚠️ yt-dlp failed: {e}")
             return self._try_direct_download(url)
     def _is_valid_video(self, file_path):
-        """Verify video file has valid structure"""
         try:
             result = subprocess.run(
-                ['ffprobe', '-v', 'error', '-show_format', '-show_streams', file_path],
-                capture_output=True, text=True, timeout=10
             )
-            return result.returncode == 0
         except subprocess.TimeoutExpired:
-            print("⚠️ Video validation timed out")
-            return False
         except Exception as e:
-            print(f"⚠️ Video validation error: {e}")
-            return False
     def _try_direct_download(self, url):
         """Enhanced fallback for direct video URLs"""
@@ -186,7 +254,7 @@ class VideoAccentAnalyzer:
                         f.write(chunk)
                         file_size += len(chunk)
-            print(f"📁 Downloaded {file_size / (1024 * 1024):.1f} MB")
             if self._is_valid_video(video_path):
                 print("✅ Direct download successful")
@@ -203,8 +271,9 @@ class VideoAccentAnalyzer:
         """Extract audio with improved error handling and progress"""
         audio_path = os.path.join(self.temp_dir, "audio.wav")
         cmd = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le',
-               '-ar', '16000', '-ac', '1', '-y', '-loglevel', 'error']
         if max_duration:
             cmd.extend(['-t', str(max_duration)])
@@ -213,15 +282,43 @@ class VideoAccentAnalyzer:
         try:
             print(f"🎵 Extracting audio (max {max_duration}s)...")
             start_time = time.time()
-            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
             extraction_time = time.time() - start_time
-            if result.returncode == 0 and os.path.exists(audio_path):
-                file_size = os.path.getsize(audio_path) / (1024 * 1024)
                 print(f"✅ Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
                 return audio_path
             else:
-                raise Exception(f"FFmpeg error: {result.stderr}")
         except subprocess.TimeoutExpired:
             print("❌ Audio extraction timed out")
@@ -366,7 +463,7 @@ class VideoAccentAnalyzer:
             return {"error": f"File not found: {file_path}"}
         # Check file size
-        file_size = os.path.getsize(file_path) / (1024 * 1024)  # MB
         print(f"📁 File size: {file_size:.1f} MB")
         video_filename = os.path.basename(file_path)
@@ -394,8 +491,7 @@ class VideoAccentAnalyzer:
     def display_results(self, results):
         """Enhanced results display with visualizations"""
         if 'error' in results:
-            display(HTML(
-                f"<div style='color: red; font-size: 16px; padding: 10px; border: 1px solid red; border-radius: 5px;'>❌ {results['error']}</div>"))
             return
         accent = results['predicted_accent']
@@ -411,20 +507,20 @@ class VideoAccentAnalyzer:
         html = f"""
         <div style='border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; margin: 10px 0; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);'>
             <h2 style='color: #2E7D32; margin-top: 0; text-align: center;'>🎯 Accent Analysis Results</h2>
             <div style='display: flex; flex-wrap: wrap; gap: 20px; margin-bottom: 20px;'>
                 <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                     <h3 style='color: #1976D2; margin-top: 0;'>🎭 Primary Classification</h3>
                     <p style='font-size: 20px; margin: 5px 0; font-weight: bold;'>{accent_display}</p>
                     <p style='margin: 5px 0;'>Confidence: <strong style='color: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"};'>{confidence:.1f}%</strong></p>
                 </div>
                 <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                     <h3 style='color: #1976D2; margin-top: 0;'>🌍 English Proficiency</h3>
                     <p style='font-size: 18px; margin: 5px 0;'><strong style='color: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"};'>{english_conf:.1f}%</strong></p>
                     <p style='margin: 5px 0;'>Audio Quality: <strong>{quality_score:.0f}/100</strong></p>
                 </div>
                 <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                     <h3 style='color: #1976D2; margin-top: 0;'>⏱️ Processing Info</h3>
                     <p style='margin: 5px 0;'>Duration: <strong>{duration:.1f}s</strong></p>
@@ -432,7 +528,7 @@ class VideoAccentAnalyzer:
                     <p style='margin: 5px 0;'>Chunks: <strong>{results.get("chunks_analyzed", 1)}</strong></p>
                 </div>
             </div>
             <div style='background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                 <h3 style='color: #1976D2; margin-top: 0;'>📊 Assessment</h3>
                 <div style='display: flex; flex-wrap: wrap; gap: 10px;'>
@@ -477,7 +573,7 @@ class VideoAccentAnalyzer:
             # Create color map
             colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
-                      for p in probs]
             bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
@@ -490,8 +586,8 @@ class VideoAccentAnalyzer:
             # Add value labels on bars
             for bar, prob in zip(bars, probs):
                 height = bar.get_height()
-                plt.text(bar.get_x() + bar.get_width() / 2., height + 0.5,
-                         f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
             plt.tight_layout()
             plt.show()
@@ -507,7 +603,7 @@ class VideoAccentAnalyzer:
         print(f"🚀 Starting batch analysis of {len(urls)} videos")
         for i, url in enumerate(urls, 1):
-            print(f"\n{'=' * 60}")
             print(f"Processing video {i}/{len(urls)}")
             result = self.analyze_video_url(url, max_duration)
@@ -553,7 +649,6 @@ class VideoAccentAnalyzer:
         except Exception as e:
             print(f"⚠️ Cleanup warning: {e}")
 # Helper Functions
 def show_examples():
     """Show usage examples"""
@@ -574,36 +669,40 @@ def show_examples():
     print("  • Multiple speakers may affect accuracy")
     print("  • Model works best with sustained speech")
-def quick_test_local():
-    """Interactive test for local files"""
-    print("🔍 Quick Test Mode for Local Files")
-    print("📁 Common Kaggle input paths:")
-    print("   /kaggle/input/your-dataset/video.mp4")
-    print("   /kaggle/input/video-files/sample.mp4")
-    file_path = input("\n📎 Enter full path to your local video: ").strip()
-    if not file_path:
-        print("❌ No path provided.")
         return None
-    if not os.path.exists(file_path):
-        print(f"❌ File not found: {file_path}")
-        return None
     analyzer = VideoAccentAnalyzer()
     try:
-        results = analyzer.analyze_local_video(file_path)
         analyzer.display_results(results)
         return results
     finally:
         analyzer.cleanup()
 def demo_analysis():
     """Demo function with example usage"""
     print("🎬 Video Accent Analyzer Demo")
-    print("=" * 50)
     # Initialize analyzer
     analyzer = VideoAccentAnalyzer()
@@ -623,6 +722,5 @@ def demo_analysis():
     print("3. analyzer.display_results(results)")
     print("4. analyzer.cleanup()  # Clean up temporary files")
 # Show examples on import
 show_examples()

+"""
+Enhanced Video Accent Analyzer
+Supports YouTube, Loom, direct MP4 links, and local video files with improved error handling and features.
+"""
 import os
 import sys
 # Suppress warnings for cleaner output
 warnings.filterwarnings('ignore')
 def install_if_missing(packages):
     """Install packages if they're not already available in Kaggle"""
     for package in packages:
             print(f"Installing {package}...")
             subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
 # Required packages for Kaggle
 required_packages = [
     "yt-dlp",
 import soundfile as sf
 import yt_dlp
 class VideoAccentAnalyzer:
     def __init__(self, model_name="dima806/multiple_accent_classification"):
         """Initialize the accent analyzer for Kaggle environment"""
         return True, url
+    def trim_video(self, input_path, output_path, duration):
+       try:
+        cmd = ['ffmpeg', '-i', input_path, '-t', str(duration), '-c', 'copy', output_path, '-y']
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        if result.returncode == 0:
+            print(f"✂️ Trimmed video to {duration} seconds")
+            return output_path
+        else:
+            print(f"❌ Trimming failed: {result.stderr}")
+            return input_path  # fallback to original
+       except Exception as e:
+        print(f"❌ Trimming exception: {e}")
+        return input_path
     def download_video(self, url, max_duration=None):
         """Download video using yt-dlp with improved error handling"""
         is_valid, result = self._validate_url(url)
         url = result
         output_path = os.path.join(self.temp_dir, "video.%(ext)s")
+        # Enhanced yt-dlp options for better compatibility
         ydl_opts = {
             'outtmpl': output_path,
+            'format': 'worst[ext=mp4]/worst',  # Use worst quality for faster download and better compatibility
+            'quiet': False,  # Show some output for debugging
+            'no_warnings': False,
+            'socket_timeout': 60,
+            'retries': 5,
+            'fragment_retries': 5,
+            'extract_flat': False,
+            'writeinfojson': False,
+            'writethumbnail': False,
+            'writesubtitles': False,
         }
         if max_duration:
+            # More generous time limit for download
+         ydl_opts['match_filter'] = lambda info: None if info.get('duration', 0) <= 200000 else "Video too long"
         try:
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 print(f"📥 Downloading video from: {url}")
                 start_time = time.time()
+                # Get video info first
+                try:
+                    info = ydl.extract_info(url, download=False)
+                    print(f"📺 Found video: {info.get('title', 'Unknown')} ({info.get('duration', 0)}s)")
+                except Exception as e:
+                    print(f"⚠️ Could not extract video info: {e}")
+                # Download the video
                 ydl.download([url])
                 download_time = time.time() - start_time
+                # Find downloaded file (try multiple patterns)
+                video_path = None
                 for file in os.listdir(self.temp_dir):
+                    if file.startswith("video.") and os.path.getsize(os.path.join(self.temp_dir, file)) > 1000:  # At least 1KB
+                        potential_path = os.path.join(self.temp_dir, file)
+                        print(f"📁 Found downloaded file: {file} ({os.path.getsize(potential_path)/1024:.1f}KB)")
+                        # Try basic validation - if ffprobe fails, still try to extract audio
+                        if self._is_valid_video(potential_path):
+                            print(f"✅ Video validation passed: {file}")
+                            video_path = potential_path
+                            break
                         else:
+                            print(f"⚠️ Video validation failed, but continuing with: {file}")
+                            video_path = potential_path  # Still try to use it
+                            break
+                if video_path:
+                    print(f"✅ Downloaded video: {os.path.basename(video_path)} ({download_time:.1f}s)")
+                    return video_path
+                else:
+                    print("❌ No video files found after download")
+                    return None
         except Exception as e:
             print(f"⚠️ yt-dlp failed: {e}")
             return self._try_direct_download(url)
     def _is_valid_video(self, file_path):
+        """Verify video file has valid structure (more lenient)"""
         try:
+            # First check if file exists and has reasonable size
+            if not os.path.exists(file_path) or os.path.getsize(file_path) < 1000:
+                return False
+            # Try ffprobe with more lenient settings
             result = subprocess.run(
+                ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', file_path],
+                capture_output=True, text=True, timeout=15
+            )
+            if result.returncode == 0:
+                try:
+                    # Try to parse the JSON output
+                    info = json.loads(result.stdout)
+                    # Check if we have format information
+                    if 'format' in info and 'duration' in info['format']:
+                        return True
+                except json.JSONDecodeError:
+                    pass
+            # If ffprobe fails, try a simpler check - just see if ffmpeg can read it
+            result2 = subprocess.run(
+                ['ffmpeg', '-i', file_path, '-t', '1', '-f', 'null', '-', '-v', 'quiet'],
+                capture_output=True, text=True, timeout=15
             )
+            return result2.returncode == 0
         except subprocess.TimeoutExpired:
+            print("⚠️ Video validation timed out, assuming valid")
+            return True  # If validation times out, assume it's valid
         except Exception as e:
+            print(f"⚠️ Video validation error: {e}, assuming valid")
+            return True  # If validation fails, assume it's valid and let audio extraction handle it
     def _try_direct_download(self, url):
         """Enhanced fallback for direct video URLs"""
                         f.write(chunk)
                         file_size += len(chunk)
+            print(f"📁 Downloaded {file_size / (1024*1024):.1f} MB")
             if self._is_valid_video(video_path):
                 print("✅ Direct download successful")
         """Extract audio with improved error handling and progress"""
         audio_path = os.path.join(self.temp_dir, "audio.wav")
+        # Enhanced ffmpeg command with better error handling
         cmd = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le',
+               '-ar', '16000', '-ac', '1', '-y', '-v', 'warning']
         if max_duration:
             cmd.extend(['-t', str(max_duration)])
         try:
             print(f"🎵 Extracting audio (max {max_duration}s)...")
             start_time = time.time()
+            # Run ffmpeg with more detailed output for debugging
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=180)
             extraction_time = time.time() - start_time
+            if result.returncode == 0 and os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
+                file_size = os.path.getsize(audio_path) / (1024*1024)
                 print(f"✅ Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
                 return audio_path
             else:
+                print(f"❌ FFmpeg stderr: {result.stderr}")
+                print(f"❌ FFmpeg stdout: {result.stdout}")
+                # Try alternative extraction method
+                print("🔄 Trying alternative audio extraction...")
+                cmd_alt = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'libmp3lame',
+                          '-ar', '16000', '-ac', '1', '-y', '-v', 'warning']
+                if max_duration:
+                    cmd_alt.extend(['-t', str(max_duration)])
+                audio_path_alt = os.path.join(self.temp_dir, "audio.mp3")
+                cmd_alt.append(audio_path_alt)
+                result_alt = subprocess.run(cmd_alt, capture_output=True, text=True, timeout=180)
+                if result_alt.returncode == 0 and os.path.exists(audio_path_alt):
+                    # Convert mp3 to wav
+                    cmd_convert = ['ffmpeg', '-i', audio_path_alt, '-ar', '16000', '-ac', '1',
+                                  audio_path, '-y', '-v', 'quiet']
+                    result_convert = subprocess.run(cmd_convert, capture_output=True, text=True, timeout=60)
+                    if result_convert.returncode == 0 and os.path.exists(audio_path):
+                        file_size = os.path.getsize(audio_path) / (1024*1024)
+                        print(f"✅ Alternative extraction successful ({file_size:.1f}MB)")
+                        return audio_path
+                raise Exception(f"Both extraction methods failed. FFmpeg error: {result.stderr}")
         except subprocess.TimeoutExpired:
             print("❌ Audio extraction timed out")
             return {"error": f"File not found: {file_path}"}
         # Check file size
+        file_size = os.path.getsize(file_path) / (1024*1024)  # MB
         print(f"📁 File size: {file_size:.1f} MB")
         video_filename = os.path.basename(file_path)
     def display_results(self, results):
         """Enhanced results display with visualizations"""
         if 'error' in results:
+            display(HTML(f"<div style='color: red; font-size: 16px; padding: 10px; border: 1px solid red; border-radius: 5px;'>❌ {results['error']}</div>"))
             return
         accent = results['predicted_accent']
         html = f"""
         <div style='border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; margin: 10px 0; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);'>
             <h2 style='color: #2E7D32; margin-top: 0; text-align: center;'>🎯 Accent Analysis Results</h2>
             <div style='display: flex; flex-wrap: wrap; gap: 20px; margin-bottom: 20px;'>
                 <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                     <h3 style='color: #1976D2; margin-top: 0;'>🎭 Primary Classification</h3>
                     <p style='font-size: 20px; margin: 5px 0; font-weight: bold;'>{accent_display}</p>
                     <p style='margin: 5px 0;'>Confidence: <strong style='color: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"};'>{confidence:.1f}%</strong></p>
                 </div>
                 <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                     <h3 style='color: #1976D2; margin-top: 0;'>🌍 English Proficiency</h3>
                     <p style='font-size: 18px; margin: 5px 0;'><strong style='color: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"};'>{english_conf:.1f}%</strong></p>
                     <p style='margin: 5px 0;'>Audio Quality: <strong>{quality_score:.0f}/100</strong></p>
                 </div>
                 <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                     <h3 style='color: #1976D2; margin-top: 0;'>⏱️ Processing Info</h3>
                     <p style='margin: 5px 0;'>Duration: <strong>{duration:.1f}s</strong></p>
                     <p style='margin: 5px 0;'>Chunks: <strong>{results.get("chunks_analyzed", 1)}</strong></p>
                 </div>
             </div>
             <div style='background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
                 <h3 style='color: #1976D2; margin-top: 0;'>📊 Assessment</h3>
                 <div style='display: flex; flex-wrap: wrap; gap: 10px;'>
             # Create color map
             colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
+                     for p in probs]
             bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
             # Add value labels on bars
             for bar, prob in zip(bars, probs):
                 height = bar.get_height()
+                plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
+                        f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
             plt.tight_layout()
             plt.show()
         print(f"🚀 Starting batch analysis of {len(urls)} videos")
         for i, url in enumerate(urls, 1):
+            print(f"\n{'='*60}")
             print(f"Processing video {i}/{len(urls)}")
             result = self.analyze_video_url(url, max_duration)
         except Exception as e:
             print(f"⚠️ Cleanup warning: {e}")
 # Helper Functions
 def show_examples():
     """Show usage examples"""
     print("  • Multiple speakers may affect accuracy")
     print("  • Model works best with sustained speech")
+def quick_test_url():
+    """Interactive test for video URLs"""
+    print("🔍 Quick Test Mode for Video URLs")
+    print("🎯 Supported: YouTube, Loom, Direct MP4 links")
+    print("💡 Examples:")
+    print("   YouTube: https://youtube.com/watch?v=VIDEO_ID")
+    print("   Loom: https://www.loom.com/share/VIDEO_ID")
+    print("   Direct: https://example.com/video.mp4")
+    url = input("\n📎 Enter your video URL (Loom, YouTube, etc.): ").strip()
+    if not url:
+        print("❌ No URL provided.")
         return None
+    max_duration = input("⏱️ Max duration in seconds (default 20): ").strip()
+    try:
+        max_duration = int(max_duration) if max_duration else 20
+    except ValueError:
+        max_duration = 20
+        print(f"⚠️ Invalid duration, using {max_duration} seconds")
     analyzer = VideoAccentAnalyzer()
     try:
+        print(f"\n🚀 Starting analysis...")
+        results = analyzer.analyze_video_url(url, max_duration=max_duration)
         analyzer.display_results(results)
         return results
     finally:
         analyzer.cleanup()
 def demo_analysis():
     """Demo function with example usage"""
     print("🎬 Video Accent Analyzer Demo")
+    print("="*50)
     # Initialize analyzer
     analyzer = VideoAccentAnalyzer()
     print("3. analyzer.display_results(results)")
     print("4. analyzer.cleanup()  # Clean up temporary files")
 # Show examples on import
 show_examples()