Spaces:

PatienceIzere
/

AudioTranscriber

Running

App Files Files Community

PatienceIzere commited on 11 days ago

Commit

e0a3394

verified ·

1 Parent(s): 466da64

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -284

app.py CHANGED Viewed

@@ -3,30 +3,36 @@ import streamlit as st
 import tempfile
 import base64
 import numpy as np
 from datetime import datetime
 import soundfile as sf
 import io
-import glob
-import shutil
-import time
 from hf_transcriber import HFTranscriber
 from huggingface_hub import login
-# Load environment variables from .env file if it exists
-from dotenv import load_dotenv
-load_dotenv()
-# Authenticate with Hugging Face
-HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')
-if HUGGINGFACE_TOKEN:
-    try:
         login(token=HUGGINGFACE_TOKEN)
-        st.success("Successfully authenticated with Hugging Face.")
-    except Exception as e:
-        st.warning(f"Could not authenticate with Hugging Face: {str(e)}")
-else:
-    st.warning("Hugging Face token not found. Some models may not work without authentication.")
-    st.info("Create a .env file with HUGGINGFACE_TOKEN=your_token_here or set it in your environment variables.")
 # Configuration dictionary to store app settings
 app_config = {
@@ -87,293 +93,78 @@ def get_binary_file_downloader_html(bin_file, file_label='File'):
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to a temporary file and return the path."""
     try:
-        # Create temp directory if it doesn't exist
-        os.makedirs("temp_uploads", exist_ok=True)
-        # Create a temporary file with a proper extension
-        file_ext = os.path.splitext(uploaded_file.name)[1]
-        with tempfile.NamedTemporaryFile(delete=False, dir="temp_uploads", suffix=file_ext) as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             return tmp_file.name
     except Exception as e:
         st.error(f"Error saving file: {str(e)}")
         return None
 def main():
-    st.set_page_config(page_title="Audio to Sheet Music Transcriber", layout="wide")
     st.title("🎵 Audio to Sheet Music Transcriber")
     st.markdown("### Convert monophonic audio to sheet music")
-    # Show warning if no audio devices are available
-    if not app_config['RECORDING_ENABLED']:
-        st.warning("""
-        ⚠️ **No audio recording devices detected**
-        You can still use this app by uploading audio files for transcription.
-        """, icon="⚠️")
-    # Initialize session state for recording if enabled
-    if app_config['RECORDING_ENABLED']:
-        if 'recorder' not in st.session_state:
-            try:
-                st.session_state.recorder = app_config.get('AudioRecorder')()
-                st.session_state.recording = False
-            except Exception as e:
-                st.error(f"Failed to initialize audio recorder: {str(e)}")
-                app_config['RECORDING_ENABLED'] = False
-    # Sidebar settings
-    st.sidebar.header("🔧 Transcription Settings")
-    # Model selection
-    use_hf = st.sidebar.checkbox("Use Hugging Face Model", value=True,
-                               help="Use pre-trained models from Hugging Face for better accuracy")
-    # Initialize model_name with a default value
-    model_name = "openai/whisper-small"  # Default to whisper for better accuracy
-    if use_hf:
         model_options = {
             "Whisper Small (Recommended)": "openai/whisper-small",
             "Whisper Base": "openai/whisper-base",
-            "Wav2Vec2 Base": "facebook/wav2vec2-base-960h",
-            "SpeechT5": "microsoft/speecht5_asr"
         }
-        model_display = st.sidebar.selectbox(
             "Select Model",
             options=list(model_options.keys()),
-            index=0
         )
-        model_name = model_options[model_display]
-    # Audio Input Section
-    st.sidebar.header("🎤 Audio Input")
-    # Input method selection
-    input_methods = ["Upload Audio File"]
-    if app_config['RECORDING_ENABLED']:
-        input_methods.append("Record Live Audio")
-    input_method = st.sidebar.radio(
-        "Choose input method:",
-        input_methods,
-        help="Select how you want to provide the audio for transcription"
     )
-    audio_file = None
-    if input_method == "Upload Audio File":
-        st.header("🎵 Upload Audio File")
-        uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"])
-        if uploaded_file is not None:
-            with st.spinner("Processing audio file..."):
-                try:
-                    # Read the file content directly from memory
-                    file_content = uploaded_file.getvalue()
-                    # Create a temporary file with the correct extension
-                    file_ext = os.path.splitext(uploaded_file.name)[1].lower()
-                    os.makedirs("temp_uploads", exist_ok=True)
-                    # Create a temporary file path
-                    temp_file_path = os.path.join("temp_uploads", f"upload_{int(time.time())}{file_ext}")
-                    # Save the file
-                    with open(temp_file_path, "wb") as f:
-                        f.write(file_content)
-                    # Store the file path in session state
-                    st.session_state.last_uploaded_file = temp_file_path
-                    # Display the audio player
-                    st.audio(temp_file_path, format=f'audio/{file_ext[1:]}' if file_ext else 'audio/wav')
-                except Exception as e:
-                    st.error(f"Error processing uploaded file: {str(e)}")
-                    if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
-                        try:
-                            os.remove(temp_file_path)
-                        except:
-                            pass
-    elif input_method == "Record Live Audio" and app_config['RECORDING_ENABLED']:
-        st.header("🎤 Live Audio Recording")
-        # Show available audio devices
-        # Initialize with default values
-        selected_device = None
-        try:
-            if 'list_audio_devices' not in app_config:
-                st.warning("⚠️ Audio device listing not available. Using default settings.")
-                app_config['RECORDING_ENABLED'] = True  # Keep recording enabled but with fallback
-            else:
-                devices = app_config['list_audio_devices']()
-                if not devices:
-                    st.warning("⚠️ No audio input devices found. Using fallback mode.")
-                    app_config['RECORDING_ENABLED'] = True  # Keep recording enabled but with fallback
-                else:
-                    # Filter out devices with no input channels
-                    input_devices = [d for d in devices if d.get('max_input_channels', 0) > 0]
-                    if not input_devices:
-                        st.warning("⚠️ No input devices with recording capability found. Using fallback mode.")
-                        app_config['RECORDING_ENABLED'] = True  # Keep recording enabled but with fallback
-                    else:
-                        # Create a list of display strings for the dropdown
-                        device_options = [f"{i}: {d['name']} (Channels: {d.get('input_channels', 1)})"
-                                       for i, d in enumerate(input_devices)]
-                        # Add a default option
-                        device_options.insert(0, "Default: Use system default device")
-                        selected_device_str = st.selectbox(
-                            "Select audio device:",
-                            options=device_options,
-                            index=0
-                        )
-                        # If default is selected, use None to let sounddevice choose
-                        if selected_device_str == "Default: Use system default device":
-                            selected_device = None
-                        else:
-                            # Get the device index from the selected string
-                            selected_device = device_options.index(selected_device_str) - 1  # Adjust for default option
-                            # Ensure the index is within bounds
-                            if selected_device >= len(input_devices):
-                                selected_device = None
-        except Exception as e:
-            st.warning(f"⚠️ Warning: Could not load audio devices: {str(e)}. Using fallback mode.")
-            app_config['RECORDING_ENABLED'] = True  # Keep recording enabled but with fallback
-        col1, col2 = st.columns(2)
-        with col1:
-            # Check if recording is enabled and we have a valid recorder
-            if not app_config.get('RECORDING_ENABLED', False):
-                st.warning("⚠️ Recording is not available in the current environment.")
-                st.button("🎤 Start Recording", disabled=True)
-            else:
-                if st.button("🎤 Start Recording",
-                            disabled=st.session_state.get('recording', False),
-                            key='start_recording_btn'):
                     try:
-                        # Create a new recorder instance if needed
-                        if 'recorder' not in st.session_state or st.session_state.recorder is None:
-                            print("Creating new AudioRecorder instance...")
-                            st.session_state.recorder = AudioRecorder(device_index=selected_device)
-                        print("Starting recording...")
-                        # Show appropriate message based on device availability
-                        if selected_device is None:
-                            st.info("ℹ️ Using system default audio device. If no device is found, silent audio will be generated.")
-                        if st.session_state.recorder.start_recording():
-                            st.session_state.recording = True
-                            st.session_state.recording_started = True
-                            st.session_state.recording_error = None
-                            print("Recording started successfully")
-                            st.rerun()
-                        else:
-                            error_msg = "Failed to start recording. Please try again."
-                            print(error_msg)
-                            st.error(error_msg)
-                            st.session_state.recording_error = error_msg
-                            st.session_state.recording = False
-                            st.session_state.recording_started = False
-                    except Exception as e:
-                        error_msg = f"Error starting recording: {str(e)}"
-                        print(error_msg)
-                        st.error(error_msg)
-                        st.session_state.recording_error = error_msg
-                        st.session_state.recording = False
-                        st.session_state.recording_started = False
-        with col2:
-            if st.button("⏹️ Stop Recording",
-                        disabled=not st.session_state.get('recording', False),
-                        key='stop_recording_btn'):
-                try:
-                    if 'recorder' in st.session_state and st.session_state.recorder is not None:
-                        print("Stopping recording...")
-                        # Stop the recording
-                        audio_data = st.session_state.recorder.stop_recording()
-                        if audio_data is None:
-                            st.warning("No audio data was recorded")
-                            st.session_state.recording = False
-                            st.session_state.recording_started = False
-                            return
-                        # Ensure recordings directory exists
-                        recordings_dir = os.path.abspath("recordings")
-                        os.makedirs(recordings_dir, exist_ok=True)
-                        # Generate filename with full path
-                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-                        filename = f"recording_{timestamp}.wav"
-                        audio_file = os.path.join(recordings_dir, filename)
-                        print(f"Saving recording to {audio_file}...")
-                        try:
-                            # Save with the full absolute path
-                            saved_file = st.session_state.recorder.save_recording(audio_file)
-                            if saved_file and os.path.exists(saved_file):
-                                print(f"Successfully saved recording to {saved_file}")
-                                st.session_state.last_recording = saved_file
-                                st.session_state.last_recorded_audio = saved_file
-                                # Clean up old recordings
-                                clean_up_recordings(keep_last=5)
-                                # Display success and audio player
-                                st.success(f"Recording saved successfully!")
-                                st.audio(saved_file)
-                                # Rerun to update the UI
-                                st.rerun()
-                            else:
-                                error_msg = "Failed to save recording. No audio data was captured."
-                                print(error_msg)
-                                st.error(error_msg)
-                                st.session_state.recording_error = error_msg
-                        except Exception as save_error:
-                            error_msg = f"Error saving recording: {str(save_error)}"
-                            print(f"Save error details: {error_msg}")
-                            st.error(error_msg)
-                            st.session_state.recording_error = error_msg
-                    # Reset recording state
-                    st.session_state.recording = False
-                    st.session_state.recording_started = False
-                except Exception as e:
-                    error_msg = f"Error stopping recording: {str(e)}"
-                    print(error_msg)
-                    st.error(error_msg)
-                    st.session_state.recording_error = error_msg
-                    # Ensure we reset the recording state
-                    st.session_state.recording = False
-                    st.session_state.recording_started = False
-                finally:
-                    # Always clean up the recorder
-                    if 'recorder' in st.session_state:
-                        try:
-                            st.session_state.recorder = None
-                        except:
-                            pass
-                    # Don't use rerun() in finally as it can cause infinite loops
-                    # The UI will update automatically due to Streamlit's reactivity
     # Transcription Section
-    if 'last_recorded_audio' in st.session_state and st.session_state.last_recorded_audio:
-        audio_file = st.session_state.last_recorded_audio
         # Add model selection
         model_options = {
@@ -489,12 +280,18 @@ def clean_up_recordings(keep_last=5):
 if __name__ == "__main__":
     # Create necessary directories
-    os.makedirs("recordings", exist_ok=True)
     os.makedirs("outputs", exist_ok=True)
-    os.makedirs("temp_uploads", exist_ok=True)
-    # Clean up old files on startup
-    clean_up_recordings(keep_last=5)
-    # Run the main app
     main()

 import tempfile
 import base64
 import numpy as np
+import time
 from datetime import datetime
 import soundfile as sf
 import io
 from hf_transcriber import HFTranscriber
 from huggingface_hub import login
+from dotenv import load_dotenv, find_dotenv
+# Set page config first
+st.set_page_config(
+    page_title="🎵 Audio to Sheet Music Transcriber",
+    page_icon="🎵",
+    layout="wide"
+)
+# Load environment variables
+env_path = find_dotenv()
+if env_path:
+    load_dotenv(env_path)
+# Initialize Hugging Face authentication with better error handling
+try:
+    HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN') or os.environ.get('HF_TOKEN')
+    if HUGGINGFACE_TOKEN and HUGGINGFACE_TOKEN.startswith('hf_'):
         login(token=HUGGINGFACE_TOKEN)
+        st.sidebar.success("✅ Authenticated with Hugging Face")
+    else:
+        st.sidebar.warning("⚠️ Using public models (rate limited)")
+except Exception as e:
+    st.sidebar.warning(f"⚠️ Using public access: {str(e)}")
 # Configuration dictionary to store app settings
 app_config = {
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to a temporary file and return the path."""
     try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             return tmp_file.name
     except Exception as e:
         st.error(f"Error saving file: {str(e)}")
         return None
+def transcribe_audio(file_path, model_name):
+    """Transcribe audio using the specified model."""
+    try:
+        transcriber = HFTranscriber(model_name=model_name)
+        result = transcriber.transcribe_audio(file_path, 16000)  # 16kHz sample rate
+        return result
+    except Exception as e:
+        st.error(f"❌ Transcription failed: {str(e)}")
+        st.exception(e)  # Show full error in debug mode
+        return None
 def main():
     st.title("🎵 Audio to Sheet Music Transcriber")
     st.markdown("### Convert monophonic audio to sheet music")
+    # Model selection in sidebar
+    with st.sidebar:
+        st.header("🔧 Settings")
+        # Model selection
         model_options = {
             "Whisper Small (Recommended)": "openai/whisper-small",
             "Whisper Base": "openai/whisper-base",
+            "Wav2Vec2 Base": "facebook/wav2vec2-base-960h"
         }
+        selected_model = st.selectbox(
             "Select Model",
             options=list(model_options.keys()),
+            index=0,
+            help="Choose the transcription model. Whisper models generally provide better accuracy."
         )
+        model_name = model_options[selected_model]
+    # Main content area
+    st.header("🎤 Upload Audio File")
+    st.info("ℹ️ Please upload an audio file for transcription (WAV, MP3, or OGG format)")
+    uploaded_file = st.file_uploader(
+        "Choose an audio file",
+        type=["wav", "mp3", "ogg"],
+        accept_multiple_files=False,
+        help="Select an audio file to transcribe (max 30MB)"
     )
+    if uploaded_file is not None:
+        with st.spinner("Processing audio..."):
+            try:
+                # Save the uploaded file temporarily
+                temp_file_path = save_uploaded_file(uploaded_file)
+                # Display the audio player
+                st.audio(temp_file_path, format=f'audio/{os.path.splitext(uploaded_file.name)[1][1:]}')
+            except Exception as e:
+                st.error(f"Error processing uploaded file: {str(e)}")
+                if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
                     try:
+                        os.remove(temp_file_path)
+                    except:
+                        pass
     # Transcription Section
+    if uploaded_file is not None:
+        audio_file = temp_file_path
         # Add model selection
         model_options = {
 if __name__ == "__main__":
     # Create necessary directories
     os.makedirs("outputs", exist_ok=True)
+    # Run the app
     main()
+    # Add footer
+    st.markdown("---")
+    st.markdown("### About")
+    st.markdown("""
+    This app uses Hugging Face's Transformers library for speech-to-text transcription.
+    Models are loaded on-demand and require an internet connection.
+    **Note:** This is a web-based version that only supports file uploads.
+    For local use with microphone support, run the main app.py instead.
+    """)