Spaces:

ferhatbou
/

detect_English_language_speaking

Sleeping

App Files Files Community

ferhatbou commited on May 23

Commit

6cf7dc2

1 Parent(s): 1d0abcd

Fix import issue

Browse files

Files changed (6) hide show

README.md +32 -0
VideoAccentAnalyzer.py +0 -0
app.py +1 -1
requirements.txt +7 -0
setup.sh +3 -0
video_accent_analyzer.py +628 -0

README.md CHANGED Viewed

@@ -12,3 +12,35 @@ short_description: 'a  tools to automate real hiring decisions. '
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# 🎧 Video Accent Analyzer
+Analyze accents in videos from YouTube, Loom, or uploaded files. Supports multiple English accents.
+## Features
+- YouTube video analysis
+- Loom video analysis
+- Direct MP4 link support
+- Local file upload
+- Multiple English accent detection
+## Requirements
+- Python 3.8+
+- FFmpeg
+- PyTorch
+- Transformers
+## Usage
+1. Enter a video URL or upload a file
+2. Get instant accent analysis results
+"""
+Enhanced  Video Accent Analyzer
+Supports YouTube, Loom, direct MP4 links, and local video files with improved error handling and features.
+Usage:
+    analyzer = VideoAccentAnalyzer()
+    results = analyzer.analyze_video_url("https://example.com/video.mp4", max_duration=30)
+    or
+    results = analyzer.analyze_local_video("/local/input/video.mp4", max_duration=30)
+    analyzer.display_results(results)
+"""

VideoAccentAnalyzer.py DELETED Viewed

File without changes

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from VideoAccentAnalyzer import VideoAccentAnalyzer
 import ffmpeg
 import os

 import gradio as gr
+from video_accent_analyzer import VideoAccentAnalyzer
 import ffmpeg
 import os

requirements.txt CHANGED Viewed

	@@ -0,0 +1,7 @@

+yt-dlp
+librosa
+soundfile
+transformers
+torch
+gradio
+ffmpeg-python

setup.sh CHANGED Viewed

	@@ -0,0 +1,3 @@

+#!/bin/bash
+apt-get update && apt-get install -y ffmpeg
+pip install -r requirements.txt

video_accent_analyzer.py ADDED Viewed

	@@ -0,0 +1,628 @@

+import os
+import sys
+import tempfile
+import subprocess
+import requests
+import json
+import warnings
+import time
+from pathlib import Path
+from urllib.parse import urlparse
+from IPython.display import display, HTML, Audio
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Suppress warnings for cleaner output
+warnings.filterwarnings('ignore')
+def install_if_missing(packages):
+    """Install packages if they're not already available in Kaggle"""
+    for package in packages:
+        try:
+            package_name = package.split('==')[0].replace('-', '_')
+            if package_name == 'yt_dlp':
+                package_name = 'yt_dlp'
+            __import__(package_name)
+        except ImportError:
+            print(f"Installing {package}...")
+            subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
+# Required packages for Kaggle
+required_packages = [
+    "yt-dlp",
+    "librosa",
+    "soundfile",
+    "transformers",
+    "torch",
+    "matplotlib",
+    "seaborn"
+]
+print("🔧 Setting up environment...")
+install_if_missing(required_packages)
+# Now import the packages
+import torch
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
+import librosa
+import soundfile as sf
+import yt_dlp
+class VideoAccentAnalyzer:
+    def __init__(self, model_name="dima806/multiple_accent_classification"):
+        """Initialize the accent analyzer for Kaggle environment"""
+        self.model_name = model_name
+        # Enhanced accent labels with better mapping
+        self.accent_labels = [
+            "british", "canadian", "us", "indian", "australian", "neutral"
+        ]
+        self.accent_display_names = {
+            'british': '🇬🇧 British English',
+            'us': '🇺🇸 American English',
+            'australian': '🇦🇺 Australian English',
+            'canadian': '🇨🇦 Canadian English',
+            'indian': '🇮🇳 Indian English',
+            'neutral': '🌐 Neutral English'
+        }
+        self.temp_dir = "/tmp/accent_analyzer"
+        os.makedirs(self.temp_dir, exist_ok=True)
+        self.model_loaded = False
+        self._load_model()
+    def _load_model(self):
+        """Load the accent classification model with error handling"""
+        print("🤖 Loading accent classification model...")
+        try:
+            self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(self.model_name)
+            self.model = Wav2Vec2ForSequenceClassification.from_pretrained(self.model_name)
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.model.to(self.device)
+            self.model.eval()  # Set to evaluation mode
+            self.model_loaded = True
+            print(f"✅ Model loaded successfully on {self.device}")
+        except Exception as e:
+            print(f"❌ Error loading model: {e}")
+            print("💡 Tip: Check your internet connection and Kaggle environment setup")
+            raise
+    def _validate_url(self, url):
+        """Validate and normalize URL"""
+        if not url or not isinstance(url, str):
+            return False, "Invalid URL format"
+        url = url.strip()
+        if not url.startswith(('http://', 'https://')):
+            return False, "URL must start with http:// or https://"
+        return True, url
+    def download_video(self, url, max_duration=None):
+        """Download video using yt-dlp with improved error handling"""
+        is_valid, result = self._validate_url(url)
+        if not is_valid:
+            print(f"❌ {result}")
+            return None
+        url = result
+        output_path = os.path.join(self.temp_dir, "video.%(ext)s")
+        ydl_opts = {
+            'outtmpl': output_path,
+            'format': 'best[height<=720]/best',  # Limit quality for faster download
+            'quiet': True,
+            'no_warnings': True,
+            'socket_timeout': 30,
+            'retries': 3,
+        }
+        if max_duration:
+            ydl_opts['match_filter'] = lambda info: None if info.get('duration',
+                                                                     0) <= max_duration * 2 else "Video too long"
+        try:
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                print(f"📥 Downloading video from: {url}")
+                start_time = time.time()
+                ydl.download([url])
+                download_time = time.time() - start_time
+                # Find downloaded file
+                for file in os.listdir(self.temp_dir):
+                    if file.startswith("video."):
+                        video_path = os.path.join(self.temp_dir, file)
+                        if self._is_valid_video(video_path):
+                            print(f"✅ Downloaded valid video: {file} ({download_time:.1f}s)")
+                            return video_path
+                        else:
+                            print("❌ Downloaded file is not a valid video")
+                            return None
+        except Exception as e:
+            print(f"⚠️ yt-dlp failed: {e}")
+            return self._try_direct_download(url)
+    def _is_valid_video(self, file_path):
+        """Verify video file has valid structure"""
+        try:
+            result = subprocess.run(
+                ['ffprobe', '-v', 'error', '-show_format', '-show_streams', file_path],
+                capture_output=True, text=True, timeout=10
+            )
+            return result.returncode == 0
+        except subprocess.TimeoutExpired:
+            print("⚠️ Video validation timed out")
+            return False
+        except Exception as e:
+            print(f"⚠️ Video validation error: {e}")
+            return False
+    def _try_direct_download(self, url):
+        """Enhanced fallback for direct video URLs"""
+        try:
+            print("🔄 Trying direct download...")
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+            }
+            response = requests.get(url, stream=True, timeout=60, headers=headers)
+            response.raise_for_status()
+            content_type = response.headers.get("Content-Type", "")
+            if "text/html" in content_type:
+                print("⚠️ Received HTML instead of video - check URL access")
+                return None
+            video_path = os.path.join(self.temp_dir, "video.mp4")
+            file_size = 0
+            with open(video_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        file_size += len(chunk)
+            print(f"📁 Downloaded {file_size / (1024 * 1024):.1f} MB")
+            if self._is_valid_video(video_path):
+                print("✅ Direct download successful")
+                return video_path
+            else:
+                print("❌ Downloaded file is not a valid video")
+                return None
+        except Exception as e:
+            print(f"❌ Direct download failed: {e}")
+            return None
+    def extract_audio(self, video_path, max_duration=None):
+        """Extract audio with improved error handling and progress"""
+        audio_path = os.path.join(self.temp_dir, "audio.wav")
+        cmd = ['ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le',
+               '-ar', '16000', '-ac', '1', '-y', '-loglevel', 'error']
+        if max_duration:
+            cmd.extend(['-t', str(max_duration)])
+        cmd.append(audio_path)
+        try:
+            print(f"🎵 Extracting audio (max {max_duration}s)...")
+            start_time = time.time()
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+            extraction_time = time.time() - start_time
+            if result.returncode == 0 and os.path.exists(audio_path):
+                file_size = os.path.getsize(audio_path) / (1024 * 1024)
+                print(f"✅ Audio extracted successfully ({extraction_time:.1f}s, {file_size:.1f}MB)")
+                return audio_path
+            else:
+                raise Exception(f"FFmpeg error: {result.stderr}")
+        except subprocess.TimeoutExpired:
+            print("❌ Audio extraction timed out")
+            return None
+        except Exception as e:
+            print(f"❌ Audio extraction failed: {e}")
+            return None
+    def classify_accent(self, audio_path):
+        """Enhanced accent classification with better preprocessing"""
+        if not self.model_loaded:
+            print("❌ Model not loaded properly")
+            return None
+        try:
+            print("🔍 Loading and preprocessing audio...")
+            audio, sr = librosa.load(audio_path, sr=16000)
+            # Enhanced preprocessing
+            if len(audio) == 0:
+                print("❌ Empty audio file")
+                return None
+            # Remove silence from beginning and end
+            audio_trimmed, _ = librosa.effects.trim(audio, top_db=20)
+            # Use multiple chunks for better accuracy if audio is long
+            chunk_size = 16000 * 20  # 20 seconds chunks
+            chunks = []
+            if len(audio_trimmed) > chunk_size:
+                # Split into overlapping chunks
+                step_size = chunk_size // 2
+                for i in range(0, len(audio_trimmed) - chunk_size + 1, step_size):
+                    chunks.append(audio_trimmed[i:i + chunk_size])
+                if len(audio_trimmed) % step_size != 0:
+                    chunks.append(audio_trimmed[-chunk_size:])
+            else:
+                chunks = [audio_trimmed]
+            print(f"🎯 Analyzing {len(chunks)} audio chunk(s)...")
+            all_predictions = []
+            for i, chunk in enumerate(chunks[:3]):  # Limit to 3 chunks for efficiency
+                inputs = self.feature_extractor(
+                    chunk,
+                    sampling_rate=16000,
+                    return_tensors="pt",
+                    padding=True,
+                    max_length=16000 * 20,
+                    truncation=True
+                )
+                inputs = {k: v.to(self.device) for k, v in inputs.items()}
+                with torch.no_grad():
+                    outputs = self.model(**inputs)
+                    logits = outputs.logits
+                    probabilities = torch.nn.functional.softmax(logits, dim=-1)
+                    all_predictions.append(probabilities[0].cpu().numpy())
+            # Average predictions across chunks
+            avg_probabilities = sum(all_predictions) / len(all_predictions)
+            predicted_idx = avg_probabilities.argmax()
+            predicted_idx = min(predicted_idx, len(self.accent_labels) - 1)
+            # Calculate English confidence (exclude 'neutral' for this calculation)
+            english_accents = ["british", "canadian", "us", "australian", "indian"]
+            english_confidence = sum(
+                avg_probabilities[i] * 100
+                for i, label in enumerate(self.accent_labels)
+                if label in english_accents
+            )
+            results = {
+                'predicted_accent': self.accent_labels[predicted_idx],
+                'accent_confidence': avg_probabilities[predicted_idx] * 100,
+                'english_confidence': english_confidence,
+                'audio_duration': len(audio) / 16000,
+                'processed_duration': len(audio_trimmed) / 16000,
+                'chunks_analyzed': len(all_predictions),
+                'all_probabilities': {
+                    self.accent_labels[i]: avg_probabilities[i] * 100
+                    for i in range(len(self.accent_labels))
+                },
+                'is_english_likely': english_confidence > 60,
+                'audio_quality_score': self._assess_audio_quality(audio_trimmed)
+            }
+            print(f"✅ Classification complete ({results['chunks_analyzed']} chunks)")
+            return results
+        except Exception as e:
+            print(f"❌ Classification failed: {e}")
+            return None
+    def _assess_audio_quality(self, audio):
+        """Assess audio quality for better result interpretation"""
+        try:
+            # Simple quality metrics
+            rms_energy = librosa.feature.rms(y=audio)[0].mean()
+            zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)[0].mean()
+            # Normalize to 0-100 scale
+            quality_score = min(100, (rms_energy * 1000 + (1 - zero_crossing_rate) * 50))
+            return max(0, quality_score)
+        except:
+            return 50  # Default moderate quality
+    def analyze_video_url(self, url, max_duration=30):
+        """Complete pipeline with enhanced error handling"""
+        print(f"🎬 Starting analysis of: {url}")
+        print(f"⏱️ Max duration: {max_duration} seconds")
+        video_path = self.download_video(url, max_duration)
+        if not video_path:
+            return {"error": "Failed to download video", "url": url}
+        audio_path = self.extract_audio(video_path, max_duration)
+        if not audio_path:
+            return {"error": "Failed to extract audio", "url": url}
+        results = self.classify_accent(audio_path)
+        if not results:
+            return {"error": "Failed to classify accent", "url": url}
+        results.update({
+            'source_url': url,
+            'video_file': os.path.basename(video_path),
+            'audio_file': os.path.basename(audio_path),
+            'analysis_timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
+        })
+        return results
+    def analyze_local_video(self, file_path, max_duration=30):
+        """Enhanced local video analysis"""
+        print(f"🎬 Starting analysis of local file: {file_path}")
+        print(f"⏱️ Max duration: {max_duration} seconds")
+        if not os.path.isfile(file_path):
+            return {"error": f"File not found: {file_path}"}
+        # Check file size
+        file_size = os.path.getsize(file_path) / (1024 * 1024)  # MB
+        print(f"📁 File size: {file_size:.1f} MB")
+        video_filename = os.path.basename(file_path)
+        print(f"✅ Using local video: {video_filename}")
+        audio_path = self.extract_audio(file_path, max_duration)
+        if not audio_path:
+            return {"error": "Failed to extract audio"}
+        results = self.classify_accent(audio_path)
+        if not results:
+            return {"error": "Failed to classify accent"}
+        results.update({
+            'source_file': file_path,
+            'video_file': video_filename,
+            'audio_file': os.path.basename(audio_path),
+            'file_size_mb': file_size,
+            'is_local': True,
+            'analysis_timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
+        })
+        return results
+    def display_results(self, results):
+        """Enhanced results display with visualizations"""
+        if 'error' in results:
+            display(HTML(
+                f"<div style='color: red; font-size: 16px; padding: 10px; border: 1px solid red; border-radius: 5px;'>❌ {results['error']}</div>"))
+            return
+        accent = results['predicted_accent']
+        confidence = results['accent_confidence']
+        english_conf = results['english_confidence']
+        duration = results['audio_duration']
+        processed_duration = results.get('processed_duration', duration)
+        quality_score = results.get('audio_quality_score', 50)
+        accent_display = self.accent_display_names.get(accent, accent.title())
+        # Enhanced HTML display
+        html = f"""
+        <div style='border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; margin: 10px 0; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);'>
+            <h2 style='color: #2E7D32; margin-top: 0; text-align: center;'>🎯 Accent Analysis Results</h2>
+            <div style='display: flex; flex-wrap: wrap; gap: 20px; margin-bottom: 20px;'>
+                <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
+                    <h3 style='color: #1976D2; margin-top: 0;'>🎭 Primary Classification</h3>
+                    <p style='font-size: 20px; margin: 5px 0; font-weight: bold;'>{accent_display}</p>
+                    <p style='margin: 5px 0;'>Confidence: <strong style='color: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"};'>{confidence:.1f}%</strong></p>
+                </div>
+                <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
+                    <h3 style='color: #1976D2; margin-top: 0;'>🌍 English Proficiency</h3>
+                    <p style='font-size: 18px; margin: 5px 0;'><strong style='color: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"};'>{english_conf:.1f}%</strong></p>
+                    <p style='margin: 5px 0;'>Audio Quality: <strong>{quality_score:.0f}/100</strong></p>
+                </div>
+                <div style='flex: 1; min-width: 200px; background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
+                    <h3 style='color: #1976D2; margin-top: 0;'>⏱️ Processing Info</h3>
+                    <p style='margin: 5px 0;'>Duration: <strong>{duration:.1f}s</strong></p>
+                    <p style='margin: 5px 0;'>Processed: <strong>{processed_duration:.1f}s</strong></p>
+                    <p style='margin: 5px 0;'>Chunks: <strong>{results.get("chunks_analyzed", 1)}</strong></p>
+                </div>
+            </div>
+            <div style='background: white; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>
+                <h3 style='color: #1976D2; margin-top: 0;'>📊 Assessment</h3>
+                <div style='display: flex; flex-wrap: wrap; gap: 10px;'>
+                    <span style='background: {"#4CAF50" if english_conf >= 70 else "#FF9800" if english_conf >= 50 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
+                        {'✅ Strong English Speaker' if english_conf >= 70 else '⚠️ Moderate English Confidence' if english_conf >= 50 else '❓ Low English Confidence'}
+                    </span>
+                    <span style='background: {"#4CAF50" if confidence >= 70 else "#FF9800" if confidence >= 50 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
+                        {'🎯 High Confidence' if confidence >= 70 else '🤔 Moderate Confidence' if confidence >= 50 else '❓ Low Confidence'}
+                    </span>
+                    <span style='background: {"#4CAF50" if quality_score >= 70 else "#FF9800" if quality_score >= 40 else "#F44336"}; color: white; padding: 5px 10px; border-radius: 15px; font-size: 14px;'>
+                        {'🎤 Good Audio Quality' if quality_score >= 70 else '📢 Fair Audio Quality' if quality_score >= 40 else '🔇 Poor Audio Quality'}
+                    </span>
+                </div>
+            </div>
+        </div>
+        """
+        display(HTML(html))
+        # Create probability breakdown visualization
+        self._plot_probabilities(results['all_probabilities'])
+        # Display detailed breakdown table
+        prob_df = pd.DataFrame([
+            {
+                'Accent': self.accent_display_names.get(accent, accent.title()),
+                'Probability': f"{prob:.1f}%",
+                'Confidence': '🟢 High' if prob >= 70 else '🟡 Medium' if prob >= 30 else '🔴 Low'
+            }
+            for accent, prob in sorted(results['all_probabilities'].items(), key=lambda x: x[1], reverse=True)
+        ])
+        print("\n📊 Detailed Probability Breakdown:")
+        display(prob_df)
+    def _plot_probabilities(self, probabilities):
+        """Create a visualization of accent probabilities"""
+        try:
+            plt.figure(figsize=(10, 6))
+            accents = [self.accent_display_names.get(acc, acc.title()) for acc in probabilities.keys()]
+            probs = list(probabilities.values())
+            # Create color map
+            colors = ['#4CAF50' if p == max(probs) else '#2196F3' if p >= 20 else '#FFC107' if p >= 10 else '#9E9E9E'
+                      for p in probs]
+            bars = plt.bar(accents, probs, color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
+            plt.title('Accent Classification Probabilities', fontsize=16, fontweight='bold', pad=20)
+            plt.xlabel('Accent Type', fontsize=12)
+            plt.ylabel('Probability (%)', fontsize=12)
+            plt.xticks(rotation=45, ha='right')
+            plt.grid(axis='y', alpha=0.3)
+            # Add value labels on bars
+            for bar, prob in zip(bars, probs):
+                height = bar.get_height()
+                plt.text(bar.get_x() + bar.get_width() / 2., height + 0.5,
+                         f'{prob:.1f}%', ha='center', va='bottom', fontweight='bold')
+            plt.tight_layout()
+            plt.show()
+        except Exception as e:
+            print(f"⚠️ Could not create visualization: {e}")
+    def batch_analyze(self, urls, max_duration=30):
+        """Analyze multiple videos with progress tracking"""
+        results = []
+        failed_count = 0
+        print(f"🚀 Starting batch analysis of {len(urls)} videos")
+        for i, url in enumerate(urls, 1):
+            print(f"\n{'=' * 60}")
+            print(f"Processing video {i}/{len(urls)}")
+            result = self.analyze_video_url(url, max_duration)
+            result['video_index'] = i
+            if 'error' in result:
+                failed_count += 1
+                print(f"❌ Failed: {result['error']}")
+            else:
+                print(f"✅ Success: {result['predicted_accent']} ({result['accent_confidence']:.1f}%)")
+            results.append(result)
+            self.display_results(result)
+            # Small delay to prevent overwhelming servers
+            if i < len(urls):
+                time.sleep(1)
+        # Summary
+        success_count = len(urls) - failed_count
+        print(f"\n📈 Batch Analysis Summary:")
+        print(f"   ✅ Successful: {success_count}/{len(urls)}")
+        print(f"   ❌ Failed: {failed_count}/{len(urls)}")
+        return results
+    def export_results(self, results, filename="accent_analysis_results.json"):
+        """Export results to JSON file"""
+        try:
+            with open(filename, 'w') as f:
+                json.dump(results, f, indent=2, default=str)
+            print(f"💾 Results exported to {filename}")
+        except Exception as e:
+            print(f"❌ Export failed: {e}")
+    def cleanup(self):
+        """Clean up temporary files"""
+        try:
+            import shutil
+            if os.path.exists(self.temp_dir):
+                shutil.rmtree(self.temp_dir, ignore_errors=True)
+                print("🧹 Cleaned up temporary files")
+        except Exception as e:
+            print(f"⚠️ Cleanup warning: {e}")
+# Helper Functions
+def show_examples():
+    """Show usage examples"""
+    examples = {
+        "YouTube": "https://youtube.com/watch?v=abc123",
+        "Loom": "https://www.loom.com/share/abc123def456",
+        "Direct MP4": "https://example.com/video.mp4",
+        "Local File": "/kaggle/input/dataset/video.mp4"
+    }
+    print("\n🎯 Supported Video Formats:")
+    for platform, example in examples.items():
+        print(f"  {platform:12}: {example}")
+    print("\n💡 Usage Tips:")
+    print("  • Keep videos under 2 minutes for best results")
+    print("  • Ensure clear audio quality")
+    print("  • Multiple speakers may affect accuracy")
+    print("  • Model works best with sustained speech")
+def quick_test_local():
+    """Interactive test for local files"""
+    print("🔍 Quick Test Mode for Local Files")
+    print("📁 Common Kaggle input paths:")
+    print("   /kaggle/input/your-dataset/video.mp4")
+    print("   /kaggle/input/video-files/sample.mp4")
+    file_path = input("\n📎 Enter full path to your local video: ").strip()
+    if not file_path:
+        print("❌ No path provided.")
+        return None
+    if not os.path.exists(file_path):
+        print(f"❌ File not found: {file_path}")
+        return None
+    analyzer = VideoAccentAnalyzer()
+    try:
+        results = analyzer.analyze_local_video(file_path)
+        analyzer.display_results(results)
+        return results
+    finally:
+        analyzer.cleanup()
+def demo_analysis():
+    """Demo function with example usage"""
+    print("🎬 Video Accent Analyzer Demo")
+    print("=" * 50)
+    # Initialize analyzer
+    analyzer = VideoAccentAnalyzer()
+    # Example analysis (replace with actual video URL)
+    example_url = "https://example.com/video.mp4"  # Replace with real URL
+    print(f"\n🎯 Example: Analyzing {example_url}")
+    # Uncomment to run actual analysis
+    # results = analyzer.analyze_video_url(example_url, max_duration=30)
+    # analyzer.display_results(results)
+    # analyzer.cleanup()
+    print("\n📚 To use the analyzer:")
+    print("1. analyzer = VideoAccentAnalyzer()")
+    print("2. results = analyzer.analyze_video_url('your-url', max_duration=30)")
+    print("3. analyzer.display_results(results)")
+    print("4. analyzer.cleanup()  # Clean up temporary files")
+# Show examples on import
+show_examples()