Yilin0601 commited on
Commit
4bcefb9
·
verified ·
1 Parent(s): 1e21e2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -68
app.py CHANGED
@@ -1,82 +1,46 @@
1
  # app.py
2
 
3
  import gradio as gr
4
- import torch
5
  import numpy as np
6
- import librosa
7
- from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
8
 
9
- # ------------------------------------------------
10
- # 1. Load base Wav2Vec2 model + classification head
11
- # ------------------------------------------------
12
- model_name = "facebook/wav2vec2-base-960h"
13
-
14
- # Specify num_labels=8 to create a random classification head on top.
15
- model = Wav2Vec2ForSequenceClassification.from_pretrained(
16
- model_name,
17
- num_labels=8
18
- )
19
- feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
20
-
21
- model.eval()
22
-
23
- # ------------------------------------------------
24
- # 2. Define inference function
25
- # ------------------------------------------------
26
- def classify_accuracy(audio):
27
  """
28
- Receives a tuple (sample_rate, data) from Gradio when type='numpy'.
29
- Resamples if needed, runs a forward pass, and returns a 'level'.
30
  """
31
  if audio is None:
32
- return "No audio provided."
33
 
34
  sample_rate, data = audio
35
-
36
- # Ensure data is a NumPy array.
37
- if not isinstance(data, np.ndarray):
38
- data = np.array(data)
39
-
40
- # Resample to 16kHz if needed.
41
- target_sr = 16000
42
- if sample_rate != target_sr:
43
- data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
44
- sample_rate = target_sr
45
-
46
- # Extract features from the audio data.
47
- inputs = feature_extractor(
48
- data,
49
- sampling_rate=sample_rate,
50
- return_tensors="pt",
51
- padding=True
52
- )
53
-
54
- # Run model inference.
55
- with torch.no_grad():
56
- outputs = model(**inputs)
57
- logits = outputs.logits
58
- predicted_id = torch.argmax(logits, dim=-1).item()
59
-
60
- # Map predicted id (0..7) to the final level (3..10).
61
- accuracy_level = predicted_id + 3
62
- return f"Predicted Accuracy Level: {accuracy_level}"
63
-
64
- # ------------------------------------------------
65
- # 3. Build Gradio interface
66
- # ------------------------------------------------
67
- title = "Speech Accuracy Classifier (Base Wav2Vec2)"
68
- description = (
69
- "Record audio using your microphone or upload an audio file (left). "
70
- "The model (not fine-tuned) will classify the audio into an accuracy level (right)."
71
- )
72
-
73
- # Using source="microphone" allows for direct recording, while recent versions also enable file upload.
74
  demo = gr.Interface(
75
- fn=classify_accuracy,
76
- inputs=gr.Audio(source="microphone", type="numpy", label="Record/Upload Audio"),
77
- outputs=gr.Textbox(label="Classification Result"),
78
- title=title,
79
- description=description,
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  allow_flagging="never"
81
  )
82
 
 
1
  # app.py
2
 
3
  import gradio as gr
 
4
  import numpy as np
 
 
5
 
6
+ def reverse_audio(audio):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  """
8
+ Reverses the input audio data.
9
+ 'audio' is a tuple (sample_rate, data) when type='numpy'.
10
  """
11
  if audio is None:
12
+ return None # No audio provided
13
 
14
  sample_rate, data = audio
15
+ # Convert to NumPy array if not already
16
+ data = np.array(data)
17
+
18
+ # Reverse the audio samples
19
+ reversed_data = np.flipud(data)
20
+
21
+ # Return (sample_rate, reversed_data) so Gradio can play it back
22
+ return (sample_rate, reversed_data)
23
+
24
+ # Build the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  demo = gr.Interface(
26
+ fn=reverse_audio,
27
+ inputs=gr.Audio(
28
+ source="microphone", # enables microphone recording (Gradio 3.2+)
29
+ type="numpy", # returns (sample_rate, data) to the function
30
+ label="Record/Upload Audio",
31
+ show_label=False, # optionally hide the label
32
+ bg_color="#FFFFFF", # optional background color
33
+ elem_id="audio_input", # optional element ID for custom styling
34
+ # Additional arguments you can customize:
35
+ # interactive=True,
36
+ # show_share_button=False,
37
+ ),
38
+ outputs="audio", # return reversed audio for playback
39
+ title="Microphone Audio Reverse Demo",
40
+ description=(
41
+ "Records audio from your microphone or lets you upload an audio file. "
42
+ "Then the audio is reversed and played back."
43
+ ),
44
  allow_flagging="never"
45
  )
46