JCrimson1 commited on
Commit
f93f37c
·
verified ·
1 Parent(s): 6923d3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -3,6 +3,8 @@ import librosa
3
  from speechbrain.inference.classifiers import EncoderClassifier
4
  from pydub import AudioSegment
5
  import gradio as gr
 
 
6
 
7
  # Load model once
8
  classifier = EncoderClassifier.from_hparams(
@@ -11,15 +13,24 @@ classifier = EncoderClassifier.from_hparams(
11
  )
12
 
13
  def classify_accent(video):
 
 
 
 
14
  audio = AudioSegment.from_file(video, format="mp4")
15
- audio.export("output.wav", format="wav")
16
 
17
- waveform, sr = librosa.load("output.wav", sr=16000, mono=True)
 
18
  waveform_tensor = torch.tensor(waveform).unsqueeze(0)
19
 
 
20
  prediction = classifier.classify_batch(waveform_tensor)
21
  _, score, _, text_lab = prediction
22
 
 
 
 
23
  return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
24
 
25
  app = gr.Interface(
@@ -32,3 +43,4 @@ app = gr.Interface(
32
 
33
  if __name__ == "__main__":
34
  app.launch()
 
 
3
  from speechbrain.inference.classifiers import EncoderClassifier
4
  from pydub import AudioSegment
5
  import gradio as gr
6
+ import uuid
7
+ import os
8
 
9
  # Load model once
10
  classifier = EncoderClassifier.from_hparams(
 
13
  )
14
 
15
  def classify_accent(video):
16
+ # Generate unique filename
17
+ temp_wav = f"/tmp/{uuid.uuid4().hex}.wav"
18
+
19
+ # Convert to .wav
20
  audio = AudioSegment.from_file(video, format="mp4")
21
+ audio.export(temp_wav, format="wav")
22
 
23
+ # Load waveform
24
+ waveform, sr = librosa.load(temp_wav, sr=16000, mono=True)
25
  waveform_tensor = torch.tensor(waveform).unsqueeze(0)
26
 
27
+ # Predict
28
  prediction = classifier.classify_batch(waveform_tensor)
29
  _, score, _, text_lab = prediction
30
 
31
+ # Cleanup
32
+ os.remove(temp_wav)
33
+
34
  return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
35
 
36
  app = gr.Interface(
 
43
 
44
  if __name__ == "__main__":
45
  app.launch()
46
+