Update src/streamlit_app.py
Browse files- src/streamlit_app.py +15 -12
src/streamlit_app.py
CHANGED
@@ -133,16 +133,16 @@ def accent_classify(pipe, audio_path):
|
|
133 |
audio_df = split_audio(audio_path)
|
134 |
return pipe(np.concatenate(audio_df["audio"][:50].to_list()))[0]
|
135 |
|
136 |
-
# Load HF pipeline model (audio classification)
|
137 |
-
@st.cache_resource
|
138 |
-
def load_audio_classifier():
|
139 |
-
|
140 |
-
|
141 |
|
142 |
-
# Load Whisper model
|
143 |
-
@st.cache_resource
|
144 |
-
def load_whisper_model():
|
145 |
-
|
146 |
|
147 |
# Load models once
|
148 |
pipe = load_audio_classifier()
|
@@ -161,22 +161,25 @@ if st.button("Analyze"):
|
|
161 |
else:
|
162 |
with st.spinner("Downloading video..."):
|
163 |
video_path = download_video(video_url)
|
164 |
-
pass
|
165 |
|
166 |
with st.spinner("Extracting audio..."):
|
167 |
audio_path = extract_audio(video_path)
|
168 |
-
pass
|
169 |
|
170 |
with st.spinner("Transcribing with Whisper..."):
|
|
|
171 |
result = whisper_model.transcribe(audio_path)
|
172 |
transcription = result['text']
|
173 |
# pass
|
174 |
|
175 |
with st.spinner("Classifying accent..."):
|
|
|
|
|
176 |
accent_data = accent_classify(pipe, audio_path)
|
177 |
accent = accent_data.get("label", "us")
|
178 |
confidence = accent_data.get("score", 0)
|
179 |
-
pass
|
180 |
|
181 |
|
182 |
# accent = "Englsh"
|
|
|
133 |
audio_df = split_audio(audio_path)
|
134 |
return pipe(np.concatenate(audio_df["audio"][:50].to_list()))[0]
|
135 |
|
136 |
+
# # Load HF pipeline model (audio classification)
|
137 |
+
# @st.cache_resource
|
138 |
+
# def load_audio_classifier():
|
139 |
+
# model_name = "dima806/english_accents_classification"
|
140 |
+
# return pipeline('audio-classification', model=model_name, device=0) # GPU (device=0) or CPU (device=-1)
|
141 |
|
142 |
+
# # Load Whisper model
|
143 |
+
# @st.cache_resource
|
144 |
+
# def load_whisper_model():
|
145 |
+
# return whisper.load_model("base")
|
146 |
|
147 |
# Load models once
|
148 |
pipe = load_audio_classifier()
|
|
|
161 |
else:
|
162 |
with st.spinner("Downloading video..."):
|
163 |
video_path = download_video(video_url)
|
164 |
+
# pass
|
165 |
|
166 |
with st.spinner("Extracting audio..."):
|
167 |
audio_path = extract_audio(video_path)
|
168 |
+
# pass
|
169 |
|
170 |
with st.spinner("Transcribing with Whisper..."):
|
171 |
+
whisper_model = whisper.load_model("base")
|
172 |
result = whisper_model.transcribe(audio_path)
|
173 |
transcription = result['text']
|
174 |
# pass
|
175 |
|
176 |
with st.spinner("Classifying accent..."):
|
177 |
+
model_name = "dima806/english_accents_classification"
|
178 |
+
pipe = pipeline('audio-classification', model=model_name, device=0) # GPU (device=0) or CPU (device=-1)
|
179 |
accent_data = accent_classify(pipe, audio_path)
|
180 |
accent = accent_data.get("label", "us")
|
181 |
confidence = accent_data.get("score", 0)
|
182 |
+
# pass
|
183 |
|
184 |
|
185 |
# accent = "Englsh"
|