Spaces:

PatienceIzere
/

AudioTranscriber

Running

App Files Files Community

PatienceIzere commited on 11 days ago

Commit

5c1dd7e

verified ·

1 Parent(s): 71f837d

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -23

app.py CHANGED Viewed

@@ -2,11 +2,8 @@ import os
 import streamlit as st
 import tempfile
 import base64
-import numpy as np
 import time
 from datetime import datetime
-import soundfile as sf
-import io
 from hf_transcriber import HFTranscriber
 from huggingface_hub import login
 from dotenv import load_dotenv, find_dotenv
@@ -53,8 +50,7 @@ app_config = {
 def init_recording():
     """Initialize recording capability and return status."""
     try:
-        # Import required modules
-        from hf_transcriber import HFTranscriber
         from recorder import AudioRecorder, list_audio_devices
         # Update config with recording components
@@ -67,24 +63,24 @@ def init_recording():
             app_config['AUDIO_DEVICES'] = devices
             if not devices or not any(d.get('max_input_channels', 0) > 0 for d in devices):
-                st.warning("⚠️ No input devices with recording capability found. Using fallback mode.")
                 app_config['RECORDING_ENABLED'] = False
             else:
                 app_config['RECORDING_ENABLED'] = True
         except Exception as e:
-            st.warning(f"⚠️ Could not detect audio devices: {str(e)}. Using fallback mode.")
             app_config['RECORDING_ENABLED'] = False
             app_config['AUDIO_DEVICES'] = []
         return True
     except ImportError as e:
-        st.warning(f"⚠️ Some features may be limited: {str(e)}")
         app_config['RECORDING_ENABLED'] = False
         return False
     except Exception as e:
-        st.warning(f"⚠️ Audio initialization failed: {str(e)}. Using fallback mode.")
         app_config['RECORDING_ENABLED'] = False
         return False
@@ -120,9 +116,50 @@ def transcribe_audio(file_path, model_name):
         st.exception(e)  # Show full error in debug mode
         return None
 def main():
     st.title("🎵 Audio to Sheet Music Transcriber")
-    st.markdown("### Convert monophonic audio to sheet music")
     # Model selection in sidebar
     with st.sidebar:
@@ -143,22 +180,32 @@ def main():
         )
         model_name = model_options[selected_model]
-    # Main content area
-    st.header("🎤 Upload Audio File")
-    st.info("ℹ️ Please upload an audio file for transcription (WAV, MP3, or OGG format)")
-    uploaded_file = st.file_uploader(
-        "Choose an audio file",
-        type=["wav", "mp3", "ogg"],
-        accept_multiple_files=False,
-        help="Select an audio file to transcribe (max 30MB)"
-    )
     if uploaded_file is not None:
         with st.spinner("Processing audio..."):
             try:
-                # Save the uploaded file temporarily
-                temp_file_path = save_uploaded_file(uploaded_file)
                 # Display the audio player
                 st.audio(temp_file_path, format=f'audio/{os.path.splitext(uploaded_file.name)[1][1:]}')
@@ -301,6 +348,5 @@ if __name__ == "__main__":
     This app uses Hugging Face's Transformers library for speech-to-text transcription.
     Models are loaded on-demand and require an internet connection.
-    **Note:** This is a web-based version that only supports file uploads.
-    For local use with microphone support, run the main app.py instead.
     """)

 import streamlit as st
 import tempfile
 import base64
 import time
 from datetime import datetime
 from hf_transcriber import HFTranscriber
 from huggingface_hub import login
 from dotenv import load_dotenv, find_dotenv
 def init_recording():
     """Initialize recording capability and return status."""
     try:
+        # Try to import recording-related modules
         from recorder import AudioRecorder, list_audio_devices
         # Update config with recording components
             app_config['AUDIO_DEVICES'] = devices
             if not devices or not any(d.get('max_input_channels', 0) > 0 for d in devices):
+                st.warning("⚠️ No input devices with recording capability found. Using file upload only.")
                 app_config['RECORDING_ENABLED'] = False
             else:
                 app_config['RECORDING_ENABLED'] = True
         except Exception as e:
+            st.warning(f"⚠️ Could not detect audio devices: {str(e)}. Using file upload only.")
             app_config['RECORDING_ENABLED'] = False
             app_config['AUDIO_DEVICES'] = []
         return True
     except ImportError as e:
+        st.warning(f"⚠️ Some features may be limited: {str(e)}. Using file upload only.")
         app_config['RECORDING_ENABLED'] = False
         return False
     except Exception as e:
+        st.warning(f"⚠️ Audio initialization failed: {str(e)}. Using file upload only.")
         app_config['RECORDING_ENABLED'] = False
         return False
         st.exception(e)  # Show full error in debug mode
         return None
+def record_audio():
+    """Handle audio recording functionality."""
+    st.header("🎤 Record Audio")
+    if not app_config['RECORDING_ENABLED']:
+        st.warning("Audio recording is not available on this device.")
+        return
+    AudioRecorder = app_config['AudioRecorder']
+    if 'recorder' not in st.session_state:
+        st.session_state.recorder = AudioRecorder()
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("🎤 Start Recording"):
+            st.session_state.recorder.start()
+            st.session_state.recording = True
+            st.experimental_rerun()
+    with col2:
+        if st.button("⏹️ Stop Recording") and st.session_state.get('recording', False):
+            audio_data = st.session_state.recorder.stop()
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_file = os.path.join("outputs", f"recording_{timestamp}.wav")
+            os.makedirs("outputs", exist_ok=True)
+            audio_data.export(output_file, format="wav")
+            st.session_state.recorded_file = output_file
+            st.session_state.recording = False
+            st.experimental_rerun()
+    if st.session_state.get('recording', False):
+        st.warning("Recording in progress... Click 'Stop Recording' when finished.")
+    if 'recorded_file' in st.session_state and os.path.exists(st.session_state.recorded_file):
+        st.audio(st.session_state.recorded_file)
+        return st.session_state.recorded_file
+    return None
 def main():
     st.title("🎵 Audio to Sheet Music Transcriber")
+    st.markdown("### Record or upload audio for transcription")
     # Model selection in sidebar
     with st.sidebar:
         )
         model_name = model_options[selected_model]
+    # Main content area - Tabs for different input methods
+    tab1, tab2 = st.tabs(["🎤 Record Audio", "📁 Upload File"])
+    recorded_file = None
+    uploaded_file = None
+    with tab1:
+        recorded_file = record_audio()
+    with tab2:
+        st.info("ℹ️ Please upload an audio file for transcription (WAV, MP3, or OGG format)")
+        uploaded_file = st.file_uploader(
+            "Choose an audio file",
+            type=["wav", "mp3", "ogg"],
+            help="Select an audio file to transcribe (max 30MB)",
+            key="file_uploader"
+        )
     if uploaded_file is not None:
         with st.spinner("Processing audio..."):
             try:
+                # Get the file path (either recorded or uploaded)
+                if recorded_file:
+                    temp_file_path = recorded_file
+                else:
+                    temp_file_path = save_uploaded_file(uploaded_file)
                 # Display the audio player
                 st.audio(temp_file_path, format=f'audio/{os.path.splitext(uploaded_file.name)[1][1:]}')
     This app uses Hugging Face's Transformers library for speech-to-text transcription.
     Models are loaded on-demand and require an internet connection.
+    **Note:** This version supports both file uploads and live recording (if your device supports it).
     """)