File size: 3,245 Bytes
1d33b47 0381907 61bf2df 0381907 c48241c 0381907 b25a75c 0381907 61bf2df 0381907 61bf2df 0381907 61bf2df 0381907 61bf2df 0381907 61bf2df 0381907 b25a75c 0381907 b25a75c 0381907 b25a75c 0381907 b25a75c 0381907 61bf2df 0381907 61bf2df 0381907 8265a8e 0381907 61bf2df 0381907 61bf2df 0381907 61bf2df b25a75c 61bf2df 2dd9c4b 1fd173a 4aab7b8 61bf2df 0381907 61bf2df 0381907 61bf2df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import streamlit as st
import os
from transformers import pipeline
from transformers.utils import logging
import numpy as np
import pandas as pd
import yt_dlp
import torchaudio
import ffmpeg
logging.set_verbosity_info()
RATE_HZ = 16000
MAX_SECONDS = 1
MAX_LENGTH = RATE_HZ * MAX_SECONDS
MAX_SEGMENTS = 250
def download_video(url, output_path="video.mp4"):
ydl_opts = {
'format': 'worstvideo[ext=mp4]+bestaudio[ext=m4a]/bestaudio',
'outtmpl': output_path,
'merge_output_format': 'mp4',
'quiet': True,
'noplaylist': True,
'nocheckcertificate': True,
'retries': 3,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return output_path
def extract_audio(input_path, output_path="audio.mp3"):
(
ffmpeg
.input(input_path)
.output(output_path, format='mp3', acodec='libmp3lame', audio_bitrate='192k')
.overwrite_output()
.run(quiet=True)
)
return output_path
def split_audio(file):
segmented_audio = []
try:
audio, rate = torchaudio.load(str(file))
transform = torchaudio.transforms.Resample(rate, RATE_HZ)
num_segments = (len(audio[0]) // MAX_LENGTH) # Floor division to get segments
for i in range(num_segments):
if i >= MAX_SEGMENTS:
break
start = i * MAX_LENGTH
end = min((i + 1) * MAX_LENGTH, len(audio[0]))
segment = audio[0][start:end]
segment = transform(segment).squeeze(0).numpy().reshape(-1)
segmented_audio.append(segment)
except Exception as e:
print(f"Error processing file: {e}")
return segmented_audio
else:
return np.concatenate(segmented_audio)
accent_mapping = {
'us': 'American',
'canada': 'Canadian',
'england': 'British',
'indian': 'Indian',
'australia': 'Australian',
}
st.set_page_config(page_title="Accent Classifier", layout="centered")
st.title("๐๏ธ English Accent Classifier")
st.markdown("Upload a video link and get the English accent with confidence.")
video_url = st.text_input("Paste a public video URL (Loom, or MP4):")
if st.button("Analyze"):
if not video_url.strip():
st.warning("Please enter a valid URL.")
else:
with st.spinner("Downloading video..."):
video_path = download_video(video_url)
with st.spinner("Extracting audio..."):
audio_path = extract_audio(video_path)
with st.spinner("Extracting Waves..."):
waves = split_audio(audio_path)
with st.spinner("Classifying accent..."):
model_name = "dima806/english_accents_classification"
pipe = pipeline('audio-classification', model=model_name, device=0)
# accent_data = accent_classify(pipe, audio_path)
accent_data = pipe(waves)[0]
accent = accent_mapping.get(accent_data.get("label", "us"))
confidence = accent_data.get("score", 0)
st.success("Analysis Complete!")
st.markdown(f"**Accent:** {accent}")
st.markdown(f"**Confidence Score:** {confidence:.2f}%")
# Cleanup
os.remove(video_path)
os.remove(audio_path) |