accent_classifier / utils.py
kedar432's picture
update permission
4c7365e
import os
import uuid
import logging
import requests
import traceback
import streamlit as st
from moviepy.video.io.VideoFileClip import VideoFileClip
from speechbrain.pretrained.interfaces import foreign_class
logging.basicConfig(
filename="/tmp/app.log",
filemode="a",
format="%(asctime)s - %(levelname)s - %(message)s",
level=logging.INFO,
)
def download_file(video_url):
"""
Download a file from a URL and save it as a temporary file.
Args:
url (str): The URL to download from.
Returns:
str: Path to the downloaded temporary file.
"""
try:
video_id = str(uuid.uuid4())
video_filename = os.path.join(os.getcwd(), f"{video_id}_video.mp4")
with requests.get(video_url, stream=True) as r:
r.raise_for_status()
with open(video_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
logging.info(f"Downloaded video to {video_filename}")
return video_filename
except Exception as e:
logging.error(f"Error downloading video: {e}\n{traceback.format_exc()}")
raise RuntimeError("Failed to download the video. Please try another video.")
def extract_audio(video_path):
"""
Extract up to 60 seconds of audio from the input video file.
Saves the extracted audio as a temporary .wav file.
Args:
video_path (str): Path to the input video file.
Returns:
str: Path to the extracted audio file.
"""
try:
video = VideoFileClip(video_path)
audio_duration = min(video.audio.duration, 60)
trimmed_audio = video.audio.subclipped(0, audio_duration)
audio_id = str(uuid.uuid4())
audio_filename = os.path.join(os.getcwd(), f"{audio_id}_audio.wav")
trimmed_audio.write_audiofile(audio_filename, codec='pcm_s16le', logger=None)
logging.info(f"Extracted audio to {audio_filename}")
return audio_filename
except Exception as e:
logging.error(f"Error extracting audio: {e}\n{traceback.format_exc()}")
raise RuntimeError("Sorry, we could not extract audio from the video. Please try another video.")
@st.cache_resource(show_spinner=False)
def load_classifier():
"""
Load the SpeechBrain accent classification model.
Returns:
foreign_class instance: Loaded classifier object.
"""
try:
classifier = foreign_class(
source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier"
)
logging.info("Loaded SpeechBrain accent classifier")
return classifier
except Exception as e:
logging.error(f"Error loading SpeechBrain classifier: {e}\n{traceback.format_exc()}")
raise RuntimeError("Failed to load the Classifier. Please try again later.")
def classify_accent(classifier, audio_path):
"""
Classify the English accent from the given audio file using the loaded classifier.
Args:
classifier (foreign_class): The loaded SpeechBrain classifier.
audio_path (str): Path to the audio file.
Returns:
tuple: (accent label (str), confidence score (float))
"""
try:
out_prob, score, index, text_lab = classifier.classify_file(audio_path)
logging.info(f"Classified accent: {text_lab} with confidence {float(score)*100:.2f}%")
return text_lab, score * 100
except Exception as e:
logging.error(f"Error classifying accent: {e}\n{traceback.format_exc()}")
raise RuntimeError("The accent model failed to load. Please try again later.")
def explain_accent(accent, confidence):
"""
Generate a human-readable explanation for the detected accent and confidence score.
Args:
accent (str): Detected accent label.
confidence (float): Confidence score (percentage).
Returns:
str: Explanation markdown string.
"""
return f"""
The system detected a **{accent}** English accent with **{float(confidence):.2f}% confidence**.
This score reflects how closely your voice matches typical speech patterns of native {accent} English speakers based on pronunciation, rhythm, and intonation.
The model analyzes vocal features using a neural network trained on speakers with known accents. While it can differentiate between major English accents, its accuracy may vary with noisy audio, strong regional variation, or non-native speakers.
"""
def process_video_url(video_url):
"""
End-to-end processing of the video URL:
- Download video file
- Extract audio (up to 60 seconds)
- Load classifier model
- Classify the accent
- Cleanup temporary files
Args:
video_url (str): URL of the public video file.
Returns:
tuple: (accent label (str), confidence score (float))
"""
video_path = None
audio_path = None
try:
video_path = download_file(video_url)
audio_path = extract_audio(video_path)
classifier = load_classifier()
accent, confidence = classify_accent(classifier, audio_path)
return accent[0].upper(), confidence
finally:
# Clean up temporary files if they exist
for path in [audio_path, video_path]:
if path and os.path.exists(path):
try:
os.remove(path)
logging.info(f"Removed temporary file: {path}")
except Exception as e:
logging.warning(f"Failed to remove temp file {path}: {e}")