Spaces:

JCrimson1
/

accent_demo_id2

Sleeping

JCrimson1 commited on May 30

Commit

f93f37c

verified ·

1 Parent(s): 6923d3e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import librosa
 from speechbrain.inference.classifiers import EncoderClassifier
 from pydub import AudioSegment
 import gradio as gr
 # Load model once
 classifier = EncoderClassifier.from_hparams(
@@ -11,15 +13,24 @@ classifier = EncoderClassifier.from_hparams(
 )
 def classify_accent(video):
     audio = AudioSegment.from_file(video, format="mp4")
-    audio.export("output.wav", format="wav")
-    waveform, sr = librosa.load("output.wav", sr=16000, mono=True)
     waveform_tensor = torch.tensor(waveform).unsqueeze(0)
     prediction = classifier.classify_batch(waveform_tensor)
     _, score, _, text_lab = prediction
     return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
 app = gr.Interface(
@@ -32,3 +43,4 @@ app = gr.Interface(
 if __name__ == "__main__":
     app.launch()

 from speechbrain.inference.classifiers import EncoderClassifier
 from pydub import AudioSegment
 import gradio as gr
+import uuid
+import os
 # Load model once
 classifier = EncoderClassifier.from_hparams(
 )
 def classify_accent(video):
+    # Generate unique filename
+    temp_wav = f"/tmp/{uuid.uuid4().hex}.wav"
+    # Convert to .wav
     audio = AudioSegment.from_file(video, format="mp4")
+    audio.export(temp_wav, format="wav")
+    # Load waveform
+    waveform, sr = librosa.load(temp_wav, sr=16000, mono=True)
     waveform_tensor = torch.tensor(waveform).unsqueeze(0)
+    # Predict
     prediction = classifier.classify_batch(waveform_tensor)
     _, score, _, text_lab = prediction
+    # Cleanup
+    os.remove(temp_wav)
     return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
 app = gr.Interface(
 if __name__ == "__main__":
     app.launch()