Yilin0601 commited on
Commit
528d66e
·
verified ·
1 Parent(s): c2c9817

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import torch
4
+ import numpy as np
5
+ import librosa
6
+ from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
7
+
8
+ # 1. Load your model & feature extractor
9
+ model_name = "path_or_hub_id_of_your_finetuned_model"
10
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
11
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
12
+
13
+ model.eval()
14
+
15
+ def classify_accuracy(audio):
16
+ """
17
+ audio: This will be a tuple (sample_rate, audio_data) when using Gradio's microphone or file upload
18
+ We need to convert it to the correct format for the model.
19
+ """
20
+ sample_rate, data = audio
21
+
22
+ # Convert audio data to float32 numpy array
23
+ if not isinstance(data, np.ndarray):
24
+ data = np.array(data)
25
+
26
+ # If sample_rate != 16000, resample (optional)
27
+ # For small demos, you can do it with librosa
28
+ if sample_rate != 16000:
29
+ data = librosa.resample(data, orig_sr=sample_rate, target_sr=16000)
30
+ sample_rate = 16000
31
+
32
+ # Extract features
33
+ inputs = feature_extractor(
34
+ data,
35
+ sampling_rate=sample_rate,
36
+ return_tensors="pt",
37
+ padding=True
38
+ )
39
+
40
+ with torch.no_grad():
41
+ outputs = model(**inputs)
42
+ logits = outputs.logits
43
+ predicted_id = torch.argmax(logits, dim=-1).item()
44
+
45
+ # Convert to final accuracy level
46
+ accuracy_level = predicted_id + 3 # or however you map 0..7 → 3..10
47
+ return f"Accuracy Level: {accuracy_level}"
48
+
49
+ # 2. Build Gradio interface
50
+ title = "Speech Accuracy Classifier"
51
+ description = "Upload an audio file (or record) to see the predicted accuracy level."
52
+
53
+ # We use "microphone=True" in gr.Audio if you want an optional mic input
54
+ # By default, "type='numpy'" returns (sample_rate, data)
55
+ demo = gr.Interface(
56
+ fn=classify_accuracy,
57
+ inputs=gr.Audio(source="upload", type="numpy"),
58
+ outputs="text",
59
+ title=title,
60
+ description=description,
61
+ allow_flagging="never" # optional
62
+ )
63
+
64
+ # 3. Launch the Gradio app
65
+ if __name__ == "__main__":
66
+ demo.launch()