Spaces:

invincible-jha
/

vc-copilot

Sleeping

File size: 11,621 Bytes

# Add these imports at the top
import soundfile as sf
import librosa
from pathlib import Path
import humanize
from datetime import timedelta

# Add these constants
MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB
MAX_AUDIO_DURATION = 600  # 10 minutes in seconds
SUPPORTED_FORMATS = {
    '.wav': 'WAV audio',
    '.mp3': 'MP3 audio',
    '.m4a': 'M4A audio'
}

class AudioValidator:
    """Handles audio file validation and provides detailed feedback"""
    
    @staticmethod
    def validate_audio_file(file) -> tuple[bool, str]:
        try:
            # Check if file is provided
            if file is None:
                return False, "No file was uploaded."

            # Check file size
            file_size = len(file.getvalue())
            if file_size > MAX_FILE_SIZE:
                readable_size = humanize.naturalsize(file_size)
                max_size = humanize.naturalsize(MAX_FILE_SIZE)
                return False, f"File size ({readable_size}) exceeds maximum allowed size ({max_size})"

            # Check file extension
            file_extension = Path(file.name).suffix.lower()
            if file_extension not in SUPPORTED_FORMATS:
                return False, f"Unsupported file format. Please upload {', '.join(SUPPORTED_FORMATS.values())}"

            # Save file temporarily for duration check
            with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
                tmp_file.write(file.getvalue())
                tmp_file_path = tmp_file.name

            try:
                # Check audio duration
                duration = librosa.get_duration(path=tmp_file_path)
                if duration > MAX_AUDIO_DURATION:
                    formatted_duration = str(timedelta(seconds=int(duration)))
                    max_duration = str(timedelta(seconds=MAX_AUDIO_DURATION))
                    return False, f"Audio duration ({formatted_duration}) exceeds maximum allowed length ({max_duration})"

                # Check audio quality
                y, sr = librosa.load(tmp_file_path)
                if sr < 16000:
                    return False, f"Audio quality too low. Sample rate ({sr}Hz) should be at least 16kHz"

                return True, "Audio file is valid"

            finally:
                os.unlink(tmp_file_path)

        except Exception as e:
            logger.error(f"Audio validation error: {str(e)}")
            return False, f"Error validating audio file: {str(e)}"

class AudioProcessor:
    """Enhanced audio processor with better feedback and error handling"""
    
    def __init__(self, model):
        self.model = model
        self.validator = AudioValidator()
        
    def process_audio_chunk(self, audio_file) -> tuple[Optional[str], Dict[str, Any]]:
        processing_stats = {
            'duration': None,
            'sample_rate': None,
            'file_size': None,
            'processing_time': None,
            'status': 'pending'
        }
        
        try:
            start_time = datetime.now()
            
            # Validate file
            is_valid, validation_message = self.validator.validate_audio_file(audio_file)
            if not is_valid:
                processing_stats['status'] = 'failed'
                processing_stats['error'] = validation_message
                return None, processing_stats

            # Get file stats
            file_size = len(audio_file.getvalue())
            processing_stats['file_size'] = humanize.naturalsize(file_size)

            # Process audio
            file_extension = Path(audio_file.name).suffix.lower()
            with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
                audio_file.seek(0)
                tmp_file.write(audio_file.getvalue())
                tmp_file_path = tmp_file.name

            try:
                # Get audio info
                y, sr = librosa.load(tmp_file_path)
                duration = librosa.get_duration(y=y, sr=sr)
                processing_stats.update({
                    'duration': str(timedelta(seconds=int(duration))),
                    'sample_rate': f"{sr/1000:.1f}kHz"
                })

                # Transcribe audio
                result = self.model.transcribe(
                    tmp_file_path,
                    language="en",
                    task="transcribe",
                    fp16=True if torch.cuda.is_available() else False
                )

                # Update stats
                processing_time = (datetime.now() - start_time).total_seconds()
                processing_stats.update({
                    'processing_time': f"{processing_time:.1f}s",
                    'status': 'success'
                })

                return result["text"], processing_stats

            finally:
                if os.path.exists(tmp_file_path):
                    os.unlink(tmp_file_path)

        except Exception as e:
            error_message = str(e)
            logger.error(f"Audio processing error: {error_message}")
            processing_stats.update({
                'status': 'failed',
                'error': error_message
            })
            return None, processing_stats
        finally:
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

class UIManager:
    """Enhanced UI manager with better feedback and progress indicators"""
    
    @staticmethod
    def setup_page():
        st.set_page_config(
            page_title="VC Call Assistant",
            page_icon="🎙️",
            layout="wide",
            initial_sidebar_state="expanded"
        )
    
    @staticmethod
    def show_file_uploader() -> Optional[Any]:
        st.markdown("""
        ### 📁 Upload Audio File
        
        **Supported formats:**
        - WAV (recommended)
        - MP3
        - M4A
        
        **Limitations:**
        - Maximum file size: 25MB
        - Maximum duration: 10 minutes
        - Minimum sample rate: 16kHz
        """)
        
        return st.file_uploader(
            "Choose an audio file",
            type=['wav', 'mp3', 'm4a']
        )
    
    @staticmethod
    def show_processing_stats(stats: Dict[str, Any]):
        """Display processing statistics in a nice format"""
        if not stats:
            return

        st.markdown("### 📊 Processing Statistics")
        
        col1, col2, col3 = st.columns(3)
        
        with col1:
            st.metric("Duration", stats.get('duration', 'N/A'))
            st.metric("File Size", stats.get('file_size', 'N/A'))
            
        with col2:
            st.metric("Sample Rate", stats.get('sample_rate', 'N/A'))
            st.metric("Processing Time", stats.get('processing_time', 'N/A'))
            
        with col3:
            status = stats.get('status', 'unknown')
            if status == 'success':
                st.success("Processing Completed")
            elif status == 'failed':
                st.error(f"Processing Failed: {stats.get('error', 'Unknown error')}")
            else:
                st.info("Processing Pending")

def main():
    try:
        UIManager.setup_page()
        
        with st.sidebar:
            st.title("VC Assistant Settings")
            model_name = "GPT2"
            
            st.info(f"""Using {model_name}
            Memory Usage: {MODEL_CONFIGS[model_name]['memory_required']}
            Description: {MODEL_CONFIGS[model_name]['description']}""")
            
            vc_name = st.text_input("Your Name")
            note_style = st.selectbox(
                "Note Style",
                ["Bullet Points", "Paragraphs", "Q&A"]
            )
            
            interests = st.multiselect(
                "Focus Areas",
                ["Product", "Market", "Team", "Financials", "Technology"],
                default=["Product", "Market"]
            )

        st.title("🎙️ VC Call Assistant")
        
        if not vc_name:
            st.warning("Please enter your name in the sidebar.")
            return

        # Initialize models with progress tracking
        progress_text = "Loading models..."
        progress_bar = st.progress(0, text=progress_text)
        
        try:
            progress_bar.progress(25, text="Loading Whisper model...")
            whisper_model = ModelManager.load_whisper()
            
            progress_bar.progress(50, text="Loading language model...")
            llm = ModelManager.load_llm(model_name)
            
            if not whisper_model or not llm:
                st.error("Failed to initialize models. Please refresh the page.")
                return
            
            progress_bar.progress(75, text="Initializing processors...")
            audio_processor = AudioProcessor(whisper_model)
            analyzer = ContentAnalyzer(llm)
            
            progress_bar.progress(100, text="Ready!")
        finally:
            progress_bar.empty()

        # File upload and processing
        audio_file = UIManager.show_file_uploader()
        
        if audio_file:
            with st.spinner("Processing audio..."):
                transcription, processing_stats = audio_processor.process_audio_chunk(audio_file)
                
                # Show processing statistics
                UIManager.show_processing_stats(processing_stats)
                
                if transcription:
                    col1, col2 = st.columns(2)
                    
                    with col1:
                        st.subheader("📝 Transcript")
                        st.write(transcription)
                    
                    with col2:
                        st.subheader("🔍 Analysis")
                        with st.spinner("Analyzing transcript..."):
                            vc_style = VCStyle(
                                name=vc_name,
                                note_format={"style": note_style},
                                key_interests=interests,
                                custom_sections=[],
                                insight_preferences={}
                            )
                            
                            analysis = analyzer.analyze_text(transcription, vc_style)
                            if analysis:
                                st.write(analysis)
                                
                                st.download_button(
                                    "📥 Export Analysis",
                                    data=json.dumps({
                                        "timestamp": datetime.now().isoformat(),
                                        "transcription": transcription,
                                        "analysis": analysis,
                                        "processing_stats": processing_stats
                                    }, indent=2),
                                    file_name=f"vc_analysis_{datetime.now():%Y%m%d_%H%M%S}.json",
                                    mime="application/json"
                                )

    except Exception as e:
        logger.error(f"Application error: {str(e)}")
        st.error(f"""
        An unexpected error occurred: {str(e)}
        
        Please try:
        1. Refreshing the page
        2. Using a different audio file
        3. Checking your internet connection
        """)
        
    finally:
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

if __name__ == "__main__":
    main()