7H4M3R commited on
Commit
b25a75c
·
verified ·
1 Parent(s): 4aab7b8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +11 -26
src/streamlit_app.py CHANGED
@@ -1,8 +1,5 @@
1
  import streamlit as st
2
  import os
3
- # from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
4
- # from utils import download_video, extract_audio, accent_classify
5
- # import whisper
6
  from transformers import pipeline
7
  from transformers.utils import logging
8
  import numpy as np
@@ -16,7 +13,7 @@ logging.set_verbosity_info()
16
  RATE_HZ = 16000
17
  MAX_SECONDS = 1
18
  MAX_LENGTH = RATE_HZ * MAX_SECONDS
19
-
20
 
21
  def download_video(url, output_path="video.mp4"):
22
  ydl_opts = {
@@ -44,27 +41,24 @@ def extract_audio(input_path, output_path="audio.mp3"):
44
  return output_path
45
 
46
  def split_audio(file):
 
47
  try:
48
  audio, rate = torchaudio.load(str(file))
 
49
  num_segments = (len(audio[0]) // MAX_LENGTH) # Floor division to get segments
50
- segmented_audio = []
51
  for i in range(num_segments):
 
 
52
  start = i * MAX_LENGTH
53
  end = min((i + 1) * MAX_LENGTH, len(audio[0]))
54
  segment = audio[0][start:end]
55
- transform = torchaudio.transforms.Resample(rate, RATE_HZ)
56
  segment = transform(segment).squeeze(0).numpy().reshape(-1)
57
  segmented_audio.append(segment)
58
- df_segments = pd.DataFrame({'audio': segmented_audio})
59
- return df_segments
60
-
61
  except Exception as e:
62
  print(f"Error processing file: {e}")
63
- return None
64
-
65
- # def accent_classify(pipe, audio_path):
66
- # audio_df = split_audio(audio_path)
67
- # return pipe(np.concatenate(audio_df["audio"][:250].to_list()))[0]
68
 
69
  accent_mapping = {
70
  'us': 'American',
@@ -89,15 +83,9 @@ if st.button("Analyze"):
89
  with st.spinner("Extracting audio..."):
90
  audio_path = extract_audio(video_path)
91
 
92
- # with st.spinner("Transcribing with Whisper..."):
93
- # whisper_model = whisper.load_model("base")
94
- # result = whisper_model.transcribe(audio_path)
95
- # transcription = result['text']
96
- # # pass
97
- with st.spinner("Extracting waves..."):
98
- audio_df = split_audio(audio_path)
99
- waves = np.concatenate(audio_df["audio"][:250].to_list())
100
-
101
  with st.spinner("Classifying accent..."):
102
  model_name = "dima806/english_accents_classification"
103
  pipe = pipeline('audio-classification', model=model_name, device=0)
@@ -110,9 +98,6 @@ if st.button("Analyze"):
110
  st.markdown(f"**Accent:** {accent}")
111
  st.markdown(f"**Confidence Score:** {confidence:.2f}%")
112
 
113
- # st.markdown("**Transcription:**")
114
- # st.text_area("Transcript", transcription, height=200)
115
-
116
  # Cleanup
117
  os.remove(video_path)
118
  os.remove(audio_path)
 
1
  import streamlit as st
2
  import os
 
 
 
3
  from transformers import pipeline
4
  from transformers.utils import logging
5
  import numpy as np
 
13
  RATE_HZ = 16000
14
  MAX_SECONDS = 1
15
  MAX_LENGTH = RATE_HZ * MAX_SECONDS
16
+ MAX_SEGMENTS = 250
17
 
18
  def download_video(url, output_path="video.mp4"):
19
  ydl_opts = {
 
41
  return output_path
42
 
43
  def split_audio(file):
44
+ segmented_audio = []
45
  try:
46
  audio, rate = torchaudio.load(str(file))
47
+ transform = torchaudio.transforms.Resample(rate, RATE_HZ)
48
  num_segments = (len(audio[0]) // MAX_LENGTH) # Floor division to get segments
 
49
  for i in range(num_segments):
50
+ if i >= MAX_SEGMENTS:
51
+ break
52
  start = i * MAX_LENGTH
53
  end = min((i + 1) * MAX_LENGTH, len(audio[0]))
54
  segment = audio[0][start:end]
 
55
  segment = transform(segment).squeeze(0).numpy().reshape(-1)
56
  segmented_audio.append(segment)
 
 
 
57
  except Exception as e:
58
  print(f"Error processing file: {e}")
59
+ return segmented_audio
60
+ else:
61
+ return np.concatenate(segmented_audio)
 
 
62
 
63
  accent_mapping = {
64
  'us': 'American',
 
83
  with st.spinner("Extracting audio..."):
84
  audio_path = extract_audio(video_path)
85
 
86
+ with st.spinner("Extracting Waves..."):
87
+ waves = split_audio(audio_path)
88
+
 
 
 
 
 
 
89
  with st.spinner("Classifying accent..."):
90
  model_name = "dima806/english_accents_classification"
91
  pipe = pipeline('audio-classification', model=model_name, device=0)
 
98
  st.markdown(f"**Accent:** {accent}")
99
  st.markdown(f"**Confidence Score:** {confidence:.2f}%")
100
 
 
 
 
101
  # Cleanup
102
  os.remove(video_path)
103
  os.remove(audio_path)