Spaces:
Running
Running
File size: 5,703 Bytes
465b605 4c7365e 465b605 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import os
import uuid
import logging
import requests
import traceback
import streamlit as st
from moviepy.video.io.VideoFileClip import VideoFileClip
from speechbrain.pretrained.interfaces import foreign_class
logging.basicConfig(
filename="/tmp/app.log",
filemode="a",
format="%(asctime)s - %(levelname)s - %(message)s",
level=logging.INFO,
)
def download_file(video_url):
"""
Download a file from a URL and save it as a temporary file.
Args:
url (str): The URL to download from.
Returns:
str: Path to the downloaded temporary file.
"""
try:
video_id = str(uuid.uuid4())
video_filename = os.path.join(os.getcwd(), f"{video_id}_video.mp4")
with requests.get(video_url, stream=True) as r:
r.raise_for_status()
with open(video_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
logging.info(f"Downloaded video to {video_filename}")
return video_filename
except Exception as e:
logging.error(f"Error downloading video: {e}\n{traceback.format_exc()}")
raise RuntimeError("Failed to download the video. Please try another video.")
def extract_audio(video_path):
"""
Extract up to 60 seconds of audio from the input video file.
Saves the extracted audio as a temporary .wav file.
Args:
video_path (str): Path to the input video file.
Returns:
str: Path to the extracted audio file.
"""
try:
video = VideoFileClip(video_path)
audio_duration = min(video.audio.duration, 60)
trimmed_audio = video.audio.subclipped(0, audio_duration)
audio_id = str(uuid.uuid4())
audio_filename = os.path.join(os.getcwd(), f"{audio_id}_audio.wav")
trimmed_audio.write_audiofile(audio_filename, codec='pcm_s16le', logger=None)
logging.info(f"Extracted audio to {audio_filename}")
return audio_filename
except Exception as e:
logging.error(f"Error extracting audio: {e}\n{traceback.format_exc()}")
raise RuntimeError("Sorry, we could not extract audio from the video. Please try another video.")
@st.cache_resource(show_spinner=False)
def load_classifier():
"""
Load the SpeechBrain accent classification model.
Returns:
foreign_class instance: Loaded classifier object.
"""
try:
classifier = foreign_class(
source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier"
)
logging.info("Loaded SpeechBrain accent classifier")
return classifier
except Exception as e:
logging.error(f"Error loading SpeechBrain classifier: {e}\n{traceback.format_exc()}")
raise RuntimeError("Failed to load the Classifier. Please try again later.")
def classify_accent(classifier, audio_path):
"""
Classify the English accent from the given audio file using the loaded classifier.
Args:
classifier (foreign_class): The loaded SpeechBrain classifier.
audio_path (str): Path to the audio file.
Returns:
tuple: (accent label (str), confidence score (float))
"""
try:
out_prob, score, index, text_lab = classifier.classify_file(audio_path)
logging.info(f"Classified accent: {text_lab} with confidence {float(score)*100:.2f}%")
return text_lab, score * 100
except Exception as e:
logging.error(f"Error classifying accent: {e}\n{traceback.format_exc()}")
raise RuntimeError("The accent model failed to load. Please try again later.")
def explain_accent(accent, confidence):
"""
Generate a human-readable explanation for the detected accent and confidence score.
Args:
accent (str): Detected accent label.
confidence (float): Confidence score (percentage).
Returns:
str: Explanation markdown string.
"""
return f"""
The system detected a **{accent}** English accent with **{float(confidence):.2f}% confidence**.
This score reflects how closely your voice matches typical speech patterns of native {accent} English speakers based on pronunciation, rhythm, and intonation.
The model analyzes vocal features using a neural network trained on speakers with known accents. While it can differentiate between major English accents, its accuracy may vary with noisy audio, strong regional variation, or non-native speakers.
"""
def process_video_url(video_url):
"""
End-to-end processing of the video URL:
- Download video file
- Extract audio (up to 60 seconds)
- Load classifier model
- Classify the accent
- Cleanup temporary files
Args:
video_url (str): URL of the public video file.
Returns:
tuple: (accent label (str), confidence score (float))
"""
video_path = None
audio_path = None
try:
video_path = download_file(video_url)
audio_path = extract_audio(video_path)
classifier = load_classifier()
accent, confidence = classify_accent(classifier, audio_path)
return accent[0].upper(), confidence
finally:
# Clean up temporary files if they exist
for path in [audio_path, video_path]:
if path and os.path.exists(path):
try:
os.remove(path)
logging.info(f"Removed temporary file: {path}")
except Exception as e:
logging.warning(f"Failed to remove temp file {path}: {e}") |