Spaces:
Running
Running
import os | |
import uuid | |
import logging | |
import requests | |
import traceback | |
import streamlit as st | |
from moviepy.video.io.VideoFileClip import VideoFileClip | |
from speechbrain.pretrained.interfaces import foreign_class | |
logging.basicConfig( | |
filename="/tmp/app.log", | |
filemode="a", | |
format="%(asctime)s - %(levelname)s - %(message)s", | |
level=logging.INFO, | |
) | |
def download_file(video_url): | |
""" | |
Download a file from a URL and save it as a temporary file. | |
Args: | |
url (str): The URL to download from. | |
Returns: | |
str: Path to the downloaded temporary file. | |
""" | |
try: | |
video_id = str(uuid.uuid4()) | |
video_filename = os.path.join(os.getcwd(), f"{video_id}_video.mp4") | |
with requests.get(video_url, stream=True) as r: | |
r.raise_for_status() | |
with open(video_filename, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=8192): | |
if chunk: | |
f.write(chunk) | |
logging.info(f"Downloaded video to {video_filename}") | |
return video_filename | |
except Exception as e: | |
logging.error(f"Error downloading video: {e}\n{traceback.format_exc()}") | |
raise RuntimeError("Failed to download the video. Please try another video.") | |
def extract_audio(video_path): | |
""" | |
Extract up to 60 seconds of audio from the input video file. | |
Saves the extracted audio as a temporary .wav file. | |
Args: | |
video_path (str): Path to the input video file. | |
Returns: | |
str: Path to the extracted audio file. | |
""" | |
try: | |
video = VideoFileClip(video_path) | |
audio_duration = min(video.audio.duration, 60) | |
trimmed_audio = video.audio.subclipped(0, audio_duration) | |
audio_id = str(uuid.uuid4()) | |
audio_filename = os.path.join(os.getcwd(), f"{audio_id}_audio.wav") | |
trimmed_audio.write_audiofile(audio_filename, codec='pcm_s16le', logger=None) | |
logging.info(f"Extracted audio to {audio_filename}") | |
return audio_filename | |
except Exception as e: | |
logging.error(f"Error extracting audio: {e}\n{traceback.format_exc()}") | |
raise RuntimeError("Sorry, we could not extract audio from the video. Please try another video.") | |
def load_classifier(): | |
""" | |
Load the SpeechBrain accent classification model. | |
Returns: | |
foreign_class instance: Loaded classifier object. | |
""" | |
try: | |
classifier = foreign_class( | |
source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", | |
pymodule_file="custom_interface.py", | |
classname="CustomEncoderWav2vec2Classifier" | |
) | |
logging.info("Loaded SpeechBrain accent classifier") | |
return classifier | |
except Exception as e: | |
logging.error(f"Error loading SpeechBrain classifier: {e}\n{traceback.format_exc()}") | |
raise RuntimeError("Failed to load the Classifier. Please try again later.") | |
def classify_accent(classifier, audio_path): | |
""" | |
Classify the English accent from the given audio file using the loaded classifier. | |
Args: | |
classifier (foreign_class): The loaded SpeechBrain classifier. | |
audio_path (str): Path to the audio file. | |
Returns: | |
tuple: (accent label (str), confidence score (float)) | |
""" | |
try: | |
out_prob, score, index, text_lab = classifier.classify_file(audio_path) | |
logging.info(f"Classified accent: {text_lab} with confidence {float(score)*100:.2f}%") | |
return text_lab, score * 100 | |
except Exception as e: | |
logging.error(f"Error classifying accent: {e}\n{traceback.format_exc()}") | |
raise RuntimeError("The accent model failed to load. Please try again later.") | |
def explain_accent(accent, confidence): | |
""" | |
Generate a human-readable explanation for the detected accent and confidence score. | |
Args: | |
accent (str): Detected accent label. | |
confidence (float): Confidence score (percentage). | |
Returns: | |
str: Explanation markdown string. | |
""" | |
return f""" | |
The system detected a **{accent}** English accent with **{float(confidence):.2f}% confidence**. | |
This score reflects how closely your voice matches typical speech patterns of native {accent} English speakers based on pronunciation, rhythm, and intonation. | |
The model analyzes vocal features using a neural network trained on speakers with known accents. While it can differentiate between major English accents, its accuracy may vary with noisy audio, strong regional variation, or non-native speakers. | |
""" | |
def process_video_url(video_url): | |
""" | |
End-to-end processing of the video URL: | |
- Download video file | |
- Extract audio (up to 60 seconds) | |
- Load classifier model | |
- Classify the accent | |
- Cleanup temporary files | |
Args: | |
video_url (str): URL of the public video file. | |
Returns: | |
tuple: (accent label (str), confidence score (float)) | |
""" | |
video_path = None | |
audio_path = None | |
try: | |
video_path = download_file(video_url) | |
audio_path = extract_audio(video_path) | |
classifier = load_classifier() | |
accent, confidence = classify_accent(classifier, audio_path) | |
return accent[0].upper(), confidence | |
finally: | |
# Clean up temporary files if they exist | |
for path in [audio_path, video_path]: | |
if path and os.path.exists(path): | |
try: | |
os.remove(path) | |
logging.info(f"Removed temporary file: {path}") | |
except Exception as e: | |
logging.warning(f"Failed to remove temp file {path}: {e}") |