7H4M3R commited on
Commit
61bf2df
·
verified ·
1 Parent(s): 7cf2ff9

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +46 -97
src/streamlit_app.py CHANGED
@@ -1,165 +1,114 @@
1
  import streamlit as st
2
  import os
3
- import numpy as np # linear algebra
4
- import pandas as pd # data processing
5
  # from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
6
  # from utils import download_video, extract_audio, accent_classify
7
- import whisper
8
  from transformers import pipeline
 
 
 
9
  import yt_dlp
10
  import torchaudio
11
- import yt_dlp
12
  import ffmpeg
13
- from transformers.utils import logging
14
 
15
  logging.set_verbosity_info()
16
 
17
- # Define the resampling rate in Hertz (Hz) for audio data
18
  RATE_HZ = 16000
19
- # Define the maximum audio interval length to consider in seconds
20
  MAX_SECONDS = 1
21
- # Calculate the maximum audio interval length in samples by multiplying the rate and seconds
22
  MAX_LENGTH = RATE_HZ * MAX_SECONDS
23
 
24
 
25
- def download_video(url, output_dir="/app/tmp"):
26
- os.makedirs(output_dir, exist_ok=True)
27
  ydl_opts = {
28
- 'format': 'worstvideo[ext=mp4]+bestaudio[ext=m4a]/bestaudio',
29
- "outtmpl": os.path.join(output_dir, "video.%(ext)s"),
30
- "quiet": True,
31
- 'merge_output_format': 'mp4',
32
- 'quiet': True,
33
- 'noplaylist': True,
34
- 'nocheckcertificate': True,
35
- 'retries': 3,
36
- }
37
 
38
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
  ydl.download([url])
40
- return os.path.join(output_dir, "video.mp4")
41
 
42
- def extract_audio(input_path, output_dir="/app/tmp"):
43
- os.makedirs(output_dir, exist_ok=True)
44
- output_path = os.path.join(output_dir, "audio.mp3")
45
  (
46
- ffmpeg
47
- .input(input_path)
48
- .output(output_path, format='mp3', acodec='libmp3lame', audio_bitrate='192k')
49
- .overwrite_output()
50
- .run(quiet=True)
51
- )
52
  return output_path
53
 
54
- # Split files by chunks with == MAX_LENGTH size
55
  def split_audio(file):
56
  try:
57
- # Load the audio file using torchaudio and get its sample rate.
58
  audio, rate = torchaudio.load(str(file))
59
-
60
- # Calculate the number of segments based on the MAX_LENGTH
61
  num_segments = (len(audio[0]) // MAX_LENGTH) # Floor division to get segments
62
-
63
- # Create an empty list to store segmented audio data
64
  segmented_audio = []
65
-
66
- # Split the audio into segments
67
  for i in range(num_segments):
68
  start = i * MAX_LENGTH
69
  end = min((i + 1) * MAX_LENGTH, len(audio[0]))
70
  segment = audio[0][start:end]
71
-
72
- # Create a transformation to resample the audio to a specified sample rate (RATE_HZ).
73
  transform = torchaudio.transforms.Resample(rate, RATE_HZ)
74
  segment = transform(segment).squeeze(0).numpy().reshape(-1)
75
-
76
  segmented_audio.append(segment)
77
-
78
- # Create a DataFrame from the segmented audio
79
  df_segments = pd.DataFrame({'audio': segmented_audio})
80
-
81
  return df_segments
82
 
83
  except Exception as e:
84
- # If an exception occurs (e.g., file not found), return nothing
85
  print(f"Error processing file: {e}")
86
  return None
87
 
88
  def accent_classify(pipe, audio_path):
89
  audio_df = split_audio(audio_path)
90
- return pipe(np.concatenate(audio_df["audio"][:50].to_list()))[0]
91
 
92
- st.set_page_config(page_title="Accent Classifier", layout="centered")
 
 
 
 
 
 
93
 
 
94
  st.title("🎙️ English Accent Classifier")
95
  st.markdown("Upload a video link and get the English accent with confidence.")
96
-
97
- st.subheader("1. Upload a Video File")
98
- uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "mov", "avi"])
99
-
100
- st.subheader("2. Or Enter a Video URL")
101
  video_url = st.text_input("Paste a public video URL (Loom, or MP4):")
102
 
103
  if st.button("Analyze"):
104
- video_path = None
105
- output_dir="/app/tmp"
106
- os.makedirs(output_dir, exist_ok=True)
107
-
108
- if uploaded_file:
109
- video_path = os.path.join(output_dir, "video.mp4")
110
- with open(video_path, "wb") as f:
111
- f.write(uploaded_file.read())
112
- st.success("✅ Video uploaded successfully.")
113
- elif video_url.strip():
114
- with st.spinner("Downloading video from URL..."):
115
- try:
116
- video_path = download_video(video_url)
117
- except Exception as e:
118
- st.error(f"❌ Failed to download video: {e}")
119
- else:
120
- st.success(f"✅ Video downloaded: {video_path}")
121
-
122
  else:
123
- st.warning("⚠️ Please upload a video file or enter a valid URL.")
 
124
 
125
- if video_path and os.path.exists(video_path):
126
- st.write("Exists:", os.path.exists(video_path))
127
  with st.spinner("Extracting audio..."):
128
  audio_path = extract_audio(video_path)
129
- st.write("Audio saved at:", audio_path)
130
- st.write("Exists:", os.path.exists(audio_path))
131
-
132
- # with st.spinner("Transcribing with Whisper..."):
133
- # whisper_model = whisper.load_model("base")
134
- # result = whisper_model.transcribe(audio_path)
135
- # transcription = result['text']
136
- # transcription = "Hello There"
137
- # pass
138
-
139
- with st.spinner("Extracting waves..."):
140
- audio_df = split_audio(audio_path)
141
- # print(np.concatenate(audio_df["audio"][:50].to_list()))
142
- waves = f"{np.concatenate(audio_df["audio"][:5].to_list())}"
143
- st.markdown("**Audio waves:**")
144
- st.text_area("Audio waves", waves, height=200)
145
-
146
-
147
- with st.spinner("Classifying accent..."):
148
  model_name = "dima806/english_accents_classification"
149
  pipe = pipeline('audio-classification', model=model_name, device=0)
150
  accent_data = accent_classify(pipe, audio_path)
151
-
152
- # accent_data = {"label": "American", "score": 0.9}
153
- accent = accent_data.get("label", "American")
154
- confidence = accent_data.get("score", 0.0)
155
- # pass
156
 
157
  st.success("Analysis Complete!")
158
  st.markdown(f"**Accent:** {accent}")
159
  st.markdown(f"**Confidence Score:** {confidence:.2f}%")
 
160
  # st.markdown("**Transcription:**")
161
  # st.text_area("Transcript", transcription, height=200)
162
 
163
  # Cleanup
164
  os.remove(video_path)
165
- os.remove(audio_path)
 
1
  import streamlit as st
2
  import os
 
 
3
  # from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
4
  # from utils import download_video, extract_audio, accent_classify
5
+ # import whisper
6
  from transformers import pipeline
7
+ from transformers.utils import logging
8
+ import numpy as np
9
+ import pandas as pd
10
  import yt_dlp
11
  import torchaudio
 
12
  import ffmpeg
 
13
 
14
  logging.set_verbosity_info()
15
 
 
16
  RATE_HZ = 16000
 
17
  MAX_SECONDS = 1
 
18
  MAX_LENGTH = RATE_HZ * MAX_SECONDS
19
 
20
 
21
+ def download_video(url, output_path="video.mp4"):
 
22
  ydl_opts = {
23
+ 'format': 'worstvideo[ext=mp4]+bestaudio[ext=m4a]/bestaudio',
24
+ 'outtmpl': output_path,
25
+ 'merge_output_format': 'mp4',
26
+ 'quiet': True,
27
+ 'noplaylist': True,
28
+ 'nocheckcertificate': True,
29
+ 'retries': 3,
30
+ }
 
31
 
32
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
33
  ydl.download([url])
34
+ return output_path
35
 
36
+ def extract_audio(input_path, output_path="audio.mp3"):
 
 
37
  (
38
+ ffmpeg
39
+ .input(input_path)
40
+ .output(output_path, format='mp3', acodec='libmp3lame', audio_bitrate='192k')
41
+ .overwrite_output()
42
+ .run(quiet=True)
43
+ )
44
  return output_path
45
 
 
46
  def split_audio(file):
47
  try:
 
48
  audio, rate = torchaudio.load(str(file))
 
 
49
  num_segments = (len(audio[0]) // MAX_LENGTH) # Floor division to get segments
 
 
50
  segmented_audio = []
 
 
51
  for i in range(num_segments):
52
  start = i * MAX_LENGTH
53
  end = min((i + 1) * MAX_LENGTH, len(audio[0]))
54
  segment = audio[0][start:end]
 
 
55
  transform = torchaudio.transforms.Resample(rate, RATE_HZ)
56
  segment = transform(segment).squeeze(0).numpy().reshape(-1)
 
57
  segmented_audio.append(segment)
 
 
58
  df_segments = pd.DataFrame({'audio': segmented_audio})
 
59
  return df_segments
60
 
61
  except Exception as e:
 
62
  print(f"Error processing file: {e}")
63
  return None
64
 
65
  def accent_classify(pipe, audio_path):
66
  audio_df = split_audio(audio_path)
67
+ return pipe(np.concatenate(audio_df["audio"][:250].to_list()))[0]
68
 
69
+ accent_mapping = {
70
+ 'us': 'American',
71
+ 'canada': 'Canadian',
72
+ 'england': 'British',
73
+ 'indian': 'Indian',
74
+ 'australia': 'Australian',
75
+ }
76
 
77
+ st.set_page_config(page_title="Accent Classifier", layout="centered")
78
  st.title("🎙️ English Accent Classifier")
79
  st.markdown("Upload a video link and get the English accent with confidence.")
 
 
 
 
 
80
  video_url = st.text_input("Paste a public video URL (Loom, or MP4):")
81
 
82
  if st.button("Analyze"):
83
+ if not video_url.strip():
84
+ st.warning("Please enter a valid URL.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  else:
86
+ with st.spinner("Downloading video..."):
87
+ video_path = download_video(video_url)
88
 
 
 
89
  with st.spinner("Extracting audio..."):
90
  audio_path = extract_audio(video_path)
91
+
92
+ # with st.spinner("Transcribing with Whisper..."):
93
+ # whisper_model = whisper.load_model("base")
94
+ # result = whisper_model.transcribe(audio_path)
95
+ # transcription = result['text']
96
+ # # pass
97
+
98
+ with st.spinner("Classifying accent..."):
 
 
 
 
 
 
 
 
 
 
 
99
  model_name = "dima806/english_accents_classification"
100
  pipe = pipeline('audio-classification', model=model_name, device=0)
101
  accent_data = accent_classify(pipe, audio_path)
102
+ accent = accent_mapping.get(accent_data.get("label", "us"))
103
+ confidence = accent_data.get("score", 0)
 
 
 
104
 
105
  st.success("Analysis Complete!")
106
  st.markdown(f"**Accent:** {accent}")
107
  st.markdown(f"**Confidence Score:** {confidence:.2f}%")
108
+
109
  # st.markdown("**Transcription:**")
110
  # st.text_area("Transcript", transcription, height=200)
111
 
112
  # Cleanup
113
  os.remove(video_path)
114
+ os.remove(audio_path)