import os import uuid import logging import requests import traceback import streamlit as st from moviepy.video.io.VideoFileClip import VideoFileClip from speechbrain.pretrained.interfaces import foreign_class logging.basicConfig( filename="/tmp/app.log", filemode="a", format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO, ) def download_file(video_url): """ Download a file from a URL and save it as a temporary file. Args: url (str): The URL to download from. Returns: str: Path to the downloaded temporary file. """ try: video_id = str(uuid.uuid4()) video_filename = os.path.join(os.getcwd(), f"{video_id}_video.mp4") with requests.get(video_url, stream=True) as r: r.raise_for_status() with open(video_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) logging.info(f"Downloaded video to {video_filename}") return video_filename except Exception as e: logging.error(f"Error downloading video: {e}\n{traceback.format_exc()}") raise RuntimeError("Failed to download the video. Please try another video.") def extract_audio(video_path): """ Extract up to 60 seconds of audio from the input video file. Saves the extracted audio as a temporary .wav file. Args: video_path (str): Path to the input video file. Returns: str: Path to the extracted audio file. """ try: video = VideoFileClip(video_path) audio_duration = min(video.audio.duration, 60) trimmed_audio = video.audio.subclipped(0, audio_duration) audio_id = str(uuid.uuid4()) audio_filename = os.path.join(os.getcwd(), f"{audio_id}_audio.wav") trimmed_audio.write_audiofile(audio_filename, codec='pcm_s16le', logger=None) logging.info(f"Extracted audio to {audio_filename}") return audio_filename except Exception as e: logging.error(f"Error extracting audio: {e}\n{traceback.format_exc()}") raise RuntimeError("Sorry, we could not extract audio from the video. Please try another video.") @st.cache_resource(show_spinner=False) def load_classifier(): """ Load the SpeechBrain accent classification model. Returns: foreign_class instance: Loaded classifier object. """ try: classifier = foreign_class( source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier" ) logging.info("Loaded SpeechBrain accent classifier") return classifier except Exception as e: logging.error(f"Error loading SpeechBrain classifier: {e}\n{traceback.format_exc()}") raise RuntimeError("Failed to load the Classifier. Please try again later.") def classify_accent(classifier, audio_path): """ Classify the English accent from the given audio file using the loaded classifier. Args: classifier (foreign_class): The loaded SpeechBrain classifier. audio_path (str): Path to the audio file. Returns: tuple: (accent label (str), confidence score (float)) """ try: out_prob, score, index, text_lab = classifier.classify_file(audio_path) logging.info(f"Classified accent: {text_lab} with confidence {float(score)*100:.2f}%") return text_lab, score * 100 except Exception as e: logging.error(f"Error classifying accent: {e}\n{traceback.format_exc()}") raise RuntimeError("The accent model failed to load. Please try again later.") def explain_accent(accent, confidence): """ Generate a human-readable explanation for the detected accent and confidence score. Args: accent (str): Detected accent label. confidence (float): Confidence score (percentage). Returns: str: Explanation markdown string. """ return f""" The system detected a **{accent}** English accent with **{float(confidence):.2f}% confidence**. This score reflects how closely your voice matches typical speech patterns of native {accent} English speakers based on pronunciation, rhythm, and intonation. The model analyzes vocal features using a neural network trained on speakers with known accents. While it can differentiate between major English accents, its accuracy may vary with noisy audio, strong regional variation, or non-native speakers. """ def process_video_url(video_url): """ End-to-end processing of the video URL: - Download video file - Extract audio (up to 60 seconds) - Load classifier model - Classify the accent - Cleanup temporary files Args: video_url (str): URL of the public video file. Returns: tuple: (accent label (str), confidence score (float)) """ video_path = None audio_path = None try: video_path = download_file(video_url) audio_path = extract_audio(video_path) classifier = load_classifier() accent, confidence = classify_accent(classifier, audio_path) return accent[0].upper(), confidence finally: # Clean up temporary files if they exist for path in [audio_path, video_path]: if path and os.path.exists(path): try: os.remove(path) logging.info(f"Removed temporary file: {path}") except Exception as e: logging.warning(f"Failed to remove temp file {path}: {e}")