File size: 3,245 Bytes
1d33b47
0381907
 
61bf2df
 
 
0381907
 
 
c48241c
 
0381907
 
 
 
b25a75c
0381907
61bf2df
0381907
61bf2df
 
 
 
 
 
 
 
0381907
 
 
61bf2df
0381907
61bf2df
0381907
61bf2df
 
 
 
 
 
0381907
 
 
b25a75c
0381907
 
b25a75c
0381907
 
b25a75c
 
0381907
 
 
 
 
 
 
b25a75c
 
 
0381907
61bf2df
 
 
 
 
 
 
0381907
61bf2df
0381907
 
8265a8e
0381907
 
61bf2df
 
0381907
61bf2df
 
0381907
 
 
61bf2df
b25a75c
 
 
61bf2df
2dd9c4b
1fd173a
4aab7b8
 
61bf2df
 
0381907
 
 
 
61bf2df
0381907
 
61bf2df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import os
from transformers import pipeline
from transformers.utils import logging
import numpy as np 
import pandas as pd
import yt_dlp
import torchaudio
import ffmpeg

logging.set_verbosity_info()

RATE_HZ = 16000
MAX_SECONDS = 1
MAX_LENGTH = RATE_HZ * MAX_SECONDS
MAX_SEGMENTS = 250

def download_video(url, output_path="video.mp4"):
    ydl_opts = {
        'format': 'worstvideo[ext=mp4]+bestaudio[ext=m4a]/bestaudio',
        'outtmpl': output_path,
        'merge_output_format': 'mp4',
        'quiet': True,
        'noplaylist': True,
        'nocheckcertificate': True,
        'retries': 3,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    return output_path

def extract_audio(input_path, output_path="audio.mp3"):
    (
        ffmpeg
        .input(input_path)
        .output(output_path, format='mp3', acodec='libmp3lame', audio_bitrate='192k')
        .overwrite_output()
        .run(quiet=True)
    )
    return output_path

def split_audio(file):
    segmented_audio = []
    try:
        audio, rate = torchaudio.load(str(file))
        transform = torchaudio.transforms.Resample(rate, RATE_HZ)
        num_segments = (len(audio[0]) // MAX_LENGTH)  # Floor division to get segments
        for i in range(num_segments):
            if i >= MAX_SEGMENTS:
                break
            start = i * MAX_LENGTH
            end = min((i + 1) * MAX_LENGTH, len(audio[0]))
            segment = audio[0][start:end]
            segment = transform(segment).squeeze(0).numpy().reshape(-1)
            segmented_audio.append(segment)
    except Exception as e:
        print(f"Error processing file: {e}")
        return segmented_audio
    else:
        return np.concatenate(segmented_audio)

accent_mapping = {
    'us': 'American',
    'canada': 'Canadian',
    'england': 'British',
    'indian': 'Indian',
    'australia': 'Australian',
}

st.set_page_config(page_title="Accent Classifier", layout="centered")
st.title("๐ŸŽ™๏ธ English Accent Classifier")
st.markdown("Upload a video link and get the English accent with confidence.")
video_url = st.text_input("Paste a public video URL (Loom, or MP4):")

if st.button("Analyze"):
    if not video_url.strip():
        st.warning("Please enter a valid URL.")
    else:
        with st.spinner("Downloading video..."):
            video_path = download_video(video_url)

        with st.spinner("Extracting audio..."):
            audio_path = extract_audio(video_path)

        with st.spinner("Extracting Waves..."):
            waves = split_audio(audio_path)

        with st.spinner("Classifying accent..."):
            model_name = "dima806/english_accents_classification"
            pipe = pipeline('audio-classification', model=model_name, device=0)
            # accent_data = accent_classify(pipe, audio_path)
            accent_data = pipe(waves)[0]
            accent = accent_mapping.get(accent_data.get("label", "us"))
            confidence = accent_data.get("score", 0)

        st.success("Analysis Complete!")
        st.markdown(f"**Accent:** {accent}")
        st.markdown(f"**Confidence Score:** {confidence:.2f}%")

        # Cleanup
        os.remove(video_path)
        os.remove(audio_path)