PatienceIzere commited on
Commit
e0a3394
·
verified ·
1 Parent(s): 466da64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -284
app.py CHANGED
@@ -3,30 +3,36 @@ import streamlit as st
3
  import tempfile
4
  import base64
5
  import numpy as np
 
6
  from datetime import datetime
7
  import soundfile as sf
8
  import io
9
- import glob
10
- import shutil
11
- import time
12
  from hf_transcriber import HFTranscriber
13
  from huggingface_hub import login
 
14
 
15
- # Load environment variables from .env file if it exists
16
- from dotenv import load_dotenv
17
- load_dotenv()
 
 
 
18
 
19
- # Authenticate with Hugging Face
20
- HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')
21
- if HUGGINGFACE_TOKEN:
22
- try:
 
 
 
 
 
23
  login(token=HUGGINGFACE_TOKEN)
24
- st.success("Successfully authenticated with Hugging Face.")
25
- except Exception as e:
26
- st.warning(f"Could not authenticate with Hugging Face: {str(e)}")
27
- else:
28
- st.warning("Hugging Face token not found. Some models may not work without authentication.")
29
- st.info("Create a .env file with HUGGINGFACE_TOKEN=your_token_here or set it in your environment variables.")
30
 
31
  # Configuration dictionary to store app settings
32
  app_config = {
@@ -87,293 +93,78 @@ def get_binary_file_downloader_html(bin_file, file_label='File'):
87
  def save_uploaded_file(uploaded_file):
88
  """Save uploaded file to a temporary file and return the path."""
89
  try:
90
- # Create temp directory if it doesn't exist
91
- os.makedirs("temp_uploads", exist_ok=True)
92
-
93
- # Create a temporary file with a proper extension
94
- file_ext = os.path.splitext(uploaded_file.name)[1]
95
- with tempfile.NamedTemporaryFile(delete=False, dir="temp_uploads", suffix=file_ext) as tmp_file:
96
  tmp_file.write(uploaded_file.getvalue())
97
  return tmp_file.name
98
  except Exception as e:
99
  st.error(f"Error saving file: {str(e)}")
100
  return None
101
 
 
 
 
 
 
 
 
 
 
 
 
102
  def main():
103
- st.set_page_config(page_title="Audio to Sheet Music Transcriber", layout="wide")
104
-
105
  st.title("🎵 Audio to Sheet Music Transcriber")
106
  st.markdown("### Convert monophonic audio to sheet music")
107
 
108
- # Show warning if no audio devices are available
109
- if not app_config['RECORDING_ENABLED']:
110
- st.warning("""
111
- ⚠️ **No audio recording devices detected**
112
- You can still use this app by uploading audio files for transcription.
113
- """, icon="⚠️")
114
-
115
- # Initialize session state for recording if enabled
116
- if app_config['RECORDING_ENABLED']:
117
- if 'recorder' not in st.session_state:
118
- try:
119
- st.session_state.recorder = app_config.get('AudioRecorder')()
120
- st.session_state.recording = False
121
- except Exception as e:
122
- st.error(f"Failed to initialize audio recorder: {str(e)}")
123
- app_config['RECORDING_ENABLED'] = False
124
-
125
- # Sidebar settings
126
- st.sidebar.header("🔧 Transcription Settings")
127
-
128
- # Model selection
129
- use_hf = st.sidebar.checkbox("Use Hugging Face Model", value=True,
130
- help="Use pre-trained models from Hugging Face for better accuracy")
131
-
132
- # Initialize model_name with a default value
133
- model_name = "openai/whisper-small" # Default to whisper for better accuracy
134
- if use_hf:
135
  model_options = {
136
  "Whisper Small (Recommended)": "openai/whisper-small",
137
  "Whisper Base": "openai/whisper-base",
138
- "Wav2Vec2 Base": "facebook/wav2vec2-base-960h",
139
- "SpeechT5": "microsoft/speecht5_asr"
140
  }
141
 
142
- model_display = st.sidebar.selectbox(
143
  "Select Model",
144
  options=list(model_options.keys()),
145
- index=0
 
146
  )
147
- model_name = model_options[model_display]
148
 
149
- # Audio Input Section
150
- st.sidebar.header("🎤 Audio Input")
 
151
 
152
- # Input method selection
153
- input_methods = ["Upload Audio File"]
154
- if app_config['RECORDING_ENABLED']:
155
- input_methods.append("Record Live Audio")
156
-
157
- input_method = st.sidebar.radio(
158
- "Choose input method:",
159
- input_methods,
160
- help="Select how you want to provide the audio for transcription"
161
  )
162
 
163
- audio_file = None
164
-
165
- if input_method == "Upload Audio File":
166
- st.header("🎵 Upload Audio File")
167
- uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"])
168
-
169
- if uploaded_file is not None:
170
- with st.spinner("Processing audio file..."):
171
- try:
172
- # Read the file content directly from memory
173
- file_content = uploaded_file.getvalue()
174
-
175
- # Create a temporary file with the correct extension
176
- file_ext = os.path.splitext(uploaded_file.name)[1].lower()
177
- os.makedirs("temp_uploads", exist_ok=True)
178
-
179
- # Create a temporary file path
180
- temp_file_path = os.path.join("temp_uploads", f"upload_{int(time.time())}{file_ext}")
181
-
182
- # Save the file
183
- with open(temp_file_path, "wb") as f:
184
- f.write(file_content)
185
-
186
- # Store the file path in session state
187
- st.session_state.last_uploaded_file = temp_file_path
188
-
189
- # Display the audio player
190
- st.audio(temp_file_path, format=f'audio/{file_ext[1:]}' if file_ext else 'audio/wav')
191
-
192
- except Exception as e:
193
- st.error(f"Error processing uploaded file: {str(e)}")
194
- if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
195
- try:
196
- os.remove(temp_file_path)
197
- except:
198
- pass
199
-
200
- elif input_method == "Record Live Audio" and app_config['RECORDING_ENABLED']:
201
- st.header("🎤 Live Audio Recording")
202
-
203
- # Show available audio devices
204
- # Initialize with default values
205
- selected_device = None
206
-
207
- try:
208
- if 'list_audio_devices' not in app_config:
209
- st.warning("⚠️ Audio device listing not available. Using default settings.")
210
- app_config['RECORDING_ENABLED'] = True # Keep recording enabled but with fallback
211
- else:
212
- devices = app_config['list_audio_devices']()
213
- if not devices:
214
- st.warning("⚠️ No audio input devices found. Using fallback mode.")
215
- app_config['RECORDING_ENABLED'] = True # Keep recording enabled but with fallback
216
- else:
217
- # Filter out devices with no input channels
218
- input_devices = [d for d in devices if d.get('max_input_channels', 0) > 0]
219
-
220
- if not input_devices:
221
- st.warning("⚠️ No input devices with recording capability found. Using fallback mode.")
222
- app_config['RECORDING_ENABLED'] = True # Keep recording enabled but with fallback
223
- else:
224
- # Create a list of display strings for the dropdown
225
- device_options = [f"{i}: {d['name']} (Channels: {d.get('input_channels', 1)})"
226
- for i, d in enumerate(input_devices)]
227
-
228
- # Add a default option
229
- device_options.insert(0, "Default: Use system default device")
230
-
231
- selected_device_str = st.selectbox(
232
- "Select audio device:",
233
- options=device_options,
234
- index=0
235
- )
236
-
237
- # If default is selected, use None to let sounddevice choose
238
- if selected_device_str == "Default: Use system default device":
239
- selected_device = None
240
- else:
241
- # Get the device index from the selected string
242
- selected_device = device_options.index(selected_device_str) - 1 # Adjust for default option
243
-
244
- # Ensure the index is within bounds
245
- if selected_device >= len(input_devices):
246
- selected_device = None
247
- except Exception as e:
248
- st.warning(f"⚠️ Warning: Could not load audio devices: {str(e)}. Using fallback mode.")
249
- app_config['RECORDING_ENABLED'] = True # Keep recording enabled but with fallback
250
-
251
- col1, col2 = st.columns(2)
252
-
253
- with col1:
254
- # Check if recording is enabled and we have a valid recorder
255
- if not app_config.get('RECORDING_ENABLED', False):
256
- st.warning("⚠️ Recording is not available in the current environment.")
257
- st.button("🎤 Start Recording", disabled=True)
258
- else:
259
- if st.button("🎤 Start Recording",
260
- disabled=st.session_state.get('recording', False),
261
- key='start_recording_btn'):
262
  try:
263
- # Create a new recorder instance if needed
264
- if 'recorder' not in st.session_state or st.session_state.recorder is None:
265
- print("Creating new AudioRecorder instance...")
266
- st.session_state.recorder = AudioRecorder(device_index=selected_device)
267
-
268
- print("Starting recording...")
269
- # Show appropriate message based on device availability
270
- if selected_device is None:
271
- st.info("ℹ️ Using system default audio device. If no device is found, silent audio will be generated.")
272
-
273
- if st.session_state.recorder.start_recording():
274
- st.session_state.recording = True
275
- st.session_state.recording_started = True
276
- st.session_state.recording_error = None
277
- print("Recording started successfully")
278
- st.rerun()
279
- else:
280
- error_msg = "Failed to start recording. Please try again."
281
- print(error_msg)
282
- st.error(error_msg)
283
- st.session_state.recording_error = error_msg
284
- st.session_state.recording = False
285
- st.session_state.recording_started = False
286
- except Exception as e:
287
- error_msg = f"Error starting recording: {str(e)}"
288
- print(error_msg)
289
- st.error(error_msg)
290
- st.session_state.recording_error = error_msg
291
- st.session_state.recording = False
292
- st.session_state.recording_started = False
293
-
294
- with col2:
295
- if st.button("⏹️ Stop Recording",
296
- disabled=not st.session_state.get('recording', False),
297
- key='stop_recording_btn'):
298
- try:
299
- if 'recorder' in st.session_state and st.session_state.recorder is not None:
300
- print("Stopping recording...")
301
-
302
- # Stop the recording
303
- audio_data = st.session_state.recorder.stop_recording()
304
-
305
- if audio_data is None:
306
- st.warning("No audio data was recorded")
307
- st.session_state.recording = False
308
- st.session_state.recording_started = False
309
- return
310
-
311
- # Ensure recordings directory exists
312
- recordings_dir = os.path.abspath("recordings")
313
- os.makedirs(recordings_dir, exist_ok=True)
314
-
315
- # Generate filename with full path
316
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
317
- filename = f"recording_{timestamp}.wav"
318
- audio_file = os.path.join(recordings_dir, filename)
319
-
320
- print(f"Saving recording to {audio_file}...")
321
- try:
322
- # Save with the full absolute path
323
- saved_file = st.session_state.recorder.save_recording(audio_file)
324
- if saved_file and os.path.exists(saved_file):
325
- print(f"Successfully saved recording to {saved_file}")
326
- st.session_state.last_recording = saved_file
327
- st.session_state.last_recorded_audio = saved_file
328
-
329
- # Clean up old recordings
330
- clean_up_recordings(keep_last=5)
331
-
332
- # Display success and audio player
333
- st.success(f"Recording saved successfully!")
334
- st.audio(saved_file)
335
-
336
- # Rerun to update the UI
337
- st.rerun()
338
- else:
339
- error_msg = "Failed to save recording. No audio data was captured."
340
- print(error_msg)
341
- st.error(error_msg)
342
- st.session_state.recording_error = error_msg
343
-
344
- except Exception as save_error:
345
- error_msg = f"Error saving recording: {str(save_error)}"
346
- print(f"Save error details: {error_msg}")
347
- st.error(error_msg)
348
- st.session_state.recording_error = error_msg
349
-
350
- # Reset recording state
351
- st.session_state.recording = False
352
- st.session_state.recording_started = False
353
-
354
- except Exception as e:
355
- error_msg = f"Error stopping recording: {str(e)}"
356
- print(error_msg)
357
- st.error(error_msg)
358
- st.session_state.recording_error = error_msg
359
-
360
- # Ensure we reset the recording state
361
- st.session_state.recording = False
362
- st.session_state.recording_started = False
363
- finally:
364
- # Always clean up the recorder
365
- if 'recorder' in st.session_state:
366
- try:
367
- st.session_state.recorder = None
368
- except:
369
- pass
370
-
371
- # Don't use rerun() in finally as it can cause infinite loops
372
- # The UI will update automatically due to Streamlit's reactivity
373
 
374
  # Transcription Section
375
- if 'last_recorded_audio' in st.session_state and st.session_state.last_recorded_audio:
376
- audio_file = st.session_state.last_recorded_audio
377
 
378
  # Add model selection
379
  model_options = {
@@ -489,12 +280,18 @@ def clean_up_recordings(keep_last=5):
489
 
490
  if __name__ == "__main__":
491
  # Create necessary directories
492
- os.makedirs("recordings", exist_ok=True)
493
  os.makedirs("outputs", exist_ok=True)
494
- os.makedirs("temp_uploads", exist_ok=True)
495
-
496
- # Clean up old files on startup
497
- clean_up_recordings(keep_last=5)
498
 
499
- # Run the main app
500
  main()
 
 
 
 
 
 
 
 
 
 
 
 
3
  import tempfile
4
  import base64
5
  import numpy as np
6
+ import time
7
  from datetime import datetime
8
  import soundfile as sf
9
  import io
 
 
 
10
  from hf_transcriber import HFTranscriber
11
  from huggingface_hub import login
12
+ from dotenv import load_dotenv, find_dotenv
13
 
14
+ # Set page config first
15
+ st.set_page_config(
16
+ page_title="🎵 Audio to Sheet Music Transcriber",
17
+ page_icon="🎵",
18
+ layout="wide"
19
+ )
20
 
21
+ # Load environment variables
22
+ env_path = find_dotenv()
23
+ if env_path:
24
+ load_dotenv(env_path)
25
+
26
+ # Initialize Hugging Face authentication with better error handling
27
+ try:
28
+ HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN') or os.environ.get('HF_TOKEN')
29
+ if HUGGINGFACE_TOKEN and HUGGINGFACE_TOKEN.startswith('hf_'):
30
  login(token=HUGGINGFACE_TOKEN)
31
+ st.sidebar.success(" Authenticated with Hugging Face")
32
+ else:
33
+ st.sidebar.warning("⚠️ Using public models (rate limited)")
34
+ except Exception as e:
35
+ st.sidebar.warning(f"⚠️ Using public access: {str(e)}")
 
36
 
37
  # Configuration dictionary to store app settings
38
  app_config = {
 
93
  def save_uploaded_file(uploaded_file):
94
  """Save uploaded file to a temporary file and return the path."""
95
  try:
96
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
 
 
 
 
 
97
  tmp_file.write(uploaded_file.getvalue())
98
  return tmp_file.name
99
  except Exception as e:
100
  st.error(f"Error saving file: {str(e)}")
101
  return None
102
 
103
+ def transcribe_audio(file_path, model_name):
104
+ """Transcribe audio using the specified model."""
105
+ try:
106
+ transcriber = HFTranscriber(model_name=model_name)
107
+ result = transcriber.transcribe_audio(file_path, 16000) # 16kHz sample rate
108
+ return result
109
+ except Exception as e:
110
+ st.error(f"❌ Transcription failed: {str(e)}")
111
+ st.exception(e) # Show full error in debug mode
112
+ return None
113
+
114
  def main():
 
 
115
  st.title("🎵 Audio to Sheet Music Transcriber")
116
  st.markdown("### Convert monophonic audio to sheet music")
117
 
118
+ # Model selection in sidebar
119
+ with st.sidebar:
120
+ st.header("🔧 Settings")
121
+
122
+ # Model selection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  model_options = {
124
  "Whisper Small (Recommended)": "openai/whisper-small",
125
  "Whisper Base": "openai/whisper-base",
126
+ "Wav2Vec2 Base": "facebook/wav2vec2-base-960h"
 
127
  }
128
 
129
+ selected_model = st.selectbox(
130
  "Select Model",
131
  options=list(model_options.keys()),
132
+ index=0,
133
+ help="Choose the transcription model. Whisper models generally provide better accuracy."
134
  )
135
+ model_name = model_options[selected_model]
136
 
137
+ # Main content area
138
+ st.header("🎤 Upload Audio File")
139
+ st.info("ℹ️ Please upload an audio file for transcription (WAV, MP3, or OGG format)")
140
 
141
+ uploaded_file = st.file_uploader(
142
+ "Choose an audio file",
143
+ type=["wav", "mp3", "ogg"],
144
+ accept_multiple_files=False,
145
+ help="Select an audio file to transcribe (max 30MB)"
 
 
 
 
146
  )
147
 
148
+ if uploaded_file is not None:
149
+ with st.spinner("Processing audio..."):
150
+ try:
151
+ # Save the uploaded file temporarily
152
+ temp_file_path = save_uploaded_file(uploaded_file)
153
+
154
+ # Display the audio player
155
+ st.audio(temp_file_path, format=f'audio/{os.path.splitext(uploaded_file.name)[1][1:]}')
156
+
157
+ except Exception as e:
158
+ st.error(f"Error processing uploaded file: {str(e)}")
159
+ if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  try:
161
+ os.remove(temp_file_path)
162
+ except:
163
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  # Transcription Section
166
+ if uploaded_file is not None:
167
+ audio_file = temp_file_path
168
 
169
  # Add model selection
170
  model_options = {
 
280
 
281
  if __name__ == "__main__":
282
  # Create necessary directories
 
283
  os.makedirs("outputs", exist_ok=True)
 
 
 
 
284
 
285
+ # Run the app
286
  main()
287
+
288
+ # Add footer
289
+ st.markdown("---")
290
+ st.markdown("### About")
291
+ st.markdown("""
292
+ This app uses Hugging Face's Transformers library for speech-to-text transcription.
293
+ Models are loaded on-demand and require an internet connection.
294
+
295
+ **Note:** This is a web-based version that only supports file uploads.
296
+ For local use with microphone support, run the main app.py instead.
297
+ """)