Spaces:

7H4M3R
/

Audio

Sleeping

7H4M3R commited on May 21

Commit

2d9b8e2

verified ·

1 Parent(s): c48241c

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -133,16 +133,16 @@ def accent_classify(pipe, audio_path):
     audio_df = split_audio(audio_path)
     return pipe(np.concatenate(audio_df["audio"][:50].to_list()))[0]
-# Load HF pipeline model (audio classification)
-@st.cache_resource
-def load_audio_classifier():
-    model_name = "dima806/english_accents_classification"
-    return pipeline('audio-classification', model=model_name, device=0)  # GPU (device=0) or CPU (device=-1)
-# Load Whisper model
-@st.cache_resource
-def load_whisper_model():
-    return whisper.load_model("base")
 # Load models once
 pipe = load_audio_classifier()
@@ -161,22 +161,25 @@ if st.button("Analyze"):
     else:
         with st.spinner("Downloading video..."):
             video_path = download_video(video_url)
-            pass
         with st.spinner("Extracting audio..."):
             audio_path = extract_audio(video_path)
-            pass
         with st.spinner("Transcribing with Whisper..."):
             result = whisper_model.transcribe(audio_path)
             transcription = result['text']
             # pass
         with st.spinner("Classifying accent..."):
             accent_data = accent_classify(pipe, audio_path)
             accent = accent_data.get("label", "us")
             confidence = accent_data.get("score", 0)
-            pass
         # accent = "Englsh"

     audio_df = split_audio(audio_path)
     return pipe(np.concatenate(audio_df["audio"][:50].to_list()))[0]
+# # Load HF pipeline model (audio classification)
+# @st.cache_resource
+# def load_audio_classifier():
+#     model_name = "dima806/english_accents_classification"
+#     return pipeline('audio-classification', model=model_name, device=0)  # GPU (device=0) or CPU (device=-1)
+# # Load Whisper model
+# @st.cache_resource
+# def load_whisper_model():
+#     return whisper.load_model("base")
 # Load models once
 pipe = load_audio_classifier()
     else:
         with st.spinner("Downloading video..."):
             video_path = download_video(video_url)
+            # pass
         with st.spinner("Extracting audio..."):
             audio_path = extract_audio(video_path)
+            # pass
         with st.spinner("Transcribing with Whisper..."):
+            whisper_model = whisper.load_model("base")
             result = whisper_model.transcribe(audio_path)
             transcription = result['text']
             # pass
         with st.spinner("Classifying accent..."):
+            model_name = "dima806/english_accents_classification"
+            pipe = pipeline('audio-classification', model=model_name, device=0)  # GPU (device=0) or CPU (device=-1)
             accent_data = accent_classify(pipe, audio_path)
             accent = accent_data.get("label", "us")
             confidence = accent_data.get("score", 0)
+            # pass
         # accent = "Englsh"