Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ import librosa
|
|
3 |
from speechbrain.inference.classifiers import EncoderClassifier
|
4 |
from pydub import AudioSegment
|
5 |
import gradio as gr
|
|
|
|
|
6 |
|
7 |
# Load model once
|
8 |
classifier = EncoderClassifier.from_hparams(
|
@@ -11,15 +13,24 @@ classifier = EncoderClassifier.from_hparams(
|
|
11 |
)
|
12 |
|
13 |
def classify_accent(video):
|
|
|
|
|
|
|
|
|
14 |
audio = AudioSegment.from_file(video, format="mp4")
|
15 |
-
audio.export(
|
16 |
|
17 |
-
|
|
|
18 |
waveform_tensor = torch.tensor(waveform).unsqueeze(0)
|
19 |
|
|
|
20 |
prediction = classifier.classify_batch(waveform_tensor)
|
21 |
_, score, _, text_lab = prediction
|
22 |
|
|
|
|
|
|
|
23 |
return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
|
24 |
|
25 |
app = gr.Interface(
|
@@ -32,3 +43,4 @@ app = gr.Interface(
|
|
32 |
|
33 |
if __name__ == "__main__":
|
34 |
app.launch()
|
|
|
|
3 |
from speechbrain.inference.classifiers import EncoderClassifier
|
4 |
from pydub import AudioSegment
|
5 |
import gradio as gr
|
6 |
+
import uuid
|
7 |
+
import os
|
8 |
|
9 |
# Load model once
|
10 |
classifier = EncoderClassifier.from_hparams(
|
|
|
13 |
)
|
14 |
|
15 |
def classify_accent(video):
|
16 |
+
# Generate unique filename
|
17 |
+
temp_wav = f"/tmp/{uuid.uuid4().hex}.wav"
|
18 |
+
|
19 |
+
# Convert to .wav
|
20 |
audio = AudioSegment.from_file(video, format="mp4")
|
21 |
+
audio.export(temp_wav, format="wav")
|
22 |
|
23 |
+
# Load waveform
|
24 |
+
waveform, sr = librosa.load(temp_wav, sr=16000, mono=True)
|
25 |
waveform_tensor = torch.tensor(waveform).unsqueeze(0)
|
26 |
|
27 |
+
# Predict
|
28 |
prediction = classifier.classify_batch(waveform_tensor)
|
29 |
_, score, _, text_lab = prediction
|
30 |
|
31 |
+
# Cleanup
|
32 |
+
os.remove(temp_wav)
|
33 |
+
|
34 |
return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
|
35 |
|
36 |
app = gr.Interface(
|
|
|
43 |
|
44 |
if __name__ == "__main__":
|
45 |
app.launch()
|
46 |
+
|