Spaces:

invincible-jha
/

vc-copilot

Sleeping

App Files Files Community

invincible-jha commited on Nov 25, 2024

Commit

fb59a4d

verified ·

1 Parent(s): 258e204

Upload 2 files

Browse files

Files changed (2) hide show

app.py +232 -165
requirements.txt +1 -4

app.py CHANGED Viewed

@@ -1,219 +1,308 @@
-# Add these imports at the top
-import soundfile as sf
-import librosa
 from pathlib import Path
 import humanize
-from datetime import timedelta
-# Add these constants
 MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB
-MAX_AUDIO_DURATION = 600  # 10 minutes in seconds
-SUPPORTED_FORMATS = {
-    '.wav': 'WAV audio',
-    '.mp3': 'MP3 audio',
-    '.m4a': 'M4A audio'
 }
 class AudioValidator:
-    """Handles audio file validation and provides detailed feedback"""
     @staticmethod
-    def validate_audio_file(file) -> tuple[bool, str]:
         try:
-            # Check if file is provided
             if file is None:
-                return False, "No file was uploaded."
             # Check file size
             file_size = len(file.getvalue())
             if file_size > MAX_FILE_SIZE:
-                readable_size = humanize.naturalsize(file_size)
-                max_size = humanize.naturalsize(MAX_FILE_SIZE)
-                return False, f"File size ({readable_size}) exceeds maximum allowed size ({max_size})"
             # Check file extension
             file_extension = Path(file.name).suffix.lower()
             if file_extension not in SUPPORTED_FORMATS:
-                return False, f"Unsupported file format. Please upload {', '.join(SUPPORTED_FORMATS.values())}"
-            # Save file temporarily for duration check
             with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
                 tmp_file.write(file.getvalue())
                 tmp_file_path = tmp_file.name
             try:
-                # Check audio duration
-                duration = librosa.get_duration(path=tmp_file_path)
                 if duration > MAX_AUDIO_DURATION:
-                    formatted_duration = str(timedelta(seconds=int(duration)))
-                    max_duration = str(timedelta(seconds=MAX_AUDIO_DURATION))
-                    return False, f"Audio duration ({formatted_duration}) exceeds maximum allowed length ({max_duration})"
-                # Check audio quality
-                y, sr = librosa.load(tmp_file_path)
-                if sr < 16000:
-                    return False, f"Audio quality too low. Sample rate ({sr}Hz) should be at least 16kHz"
-                return True, "Audio file is valid"
             finally:
                 os.unlink(tmp_file_path)
         except Exception as e:
-            logger.error(f"Audio validation error: {str(e)}")
-            return False, f"Error validating audio file: {str(e)}"
 class AudioProcessor:
-    """Enhanced audio processor with better feedback and error handling"""
     def __init__(self, model):
         self.model = model
         self.validator = AudioValidator()
-    def process_audio_chunk(self, audio_file) -> tuple[Optional[str], Dict[str, Any]]:
-        processing_stats = {
-            'duration': None,
-            'sample_rate': None,
-            'file_size': None,
             'processing_time': None,
-            'status': 'pending'
         }
         try:
-            start_time = datetime.now()
             # Validate file
-            is_valid, validation_message = self.validator.validate_audio_file(audio_file)
             if not is_valid:
-                processing_stats['status'] = 'failed'
-                processing_stats['error'] = validation_message
-                return None, processing_stats
-            # Get file stats
-            file_size = len(audio_file.getvalue())
-            processing_stats['file_size'] = humanize.naturalsize(file_size)
             # Process audio
-            file_extension = Path(audio_file.name).suffix.lower()
-            with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
-                audio_file.seek(0)
                 tmp_file.write(audio_file.getvalue())
                 tmp_file_path = tmp_file.name
             try:
-                # Get audio info
-                y, sr = librosa.load(tmp_file_path)
-                duration = librosa.get_duration(y=y, sr=sr)
-                processing_stats.update({
-                    'duration': str(timedelta(seconds=int(duration))),
-                    'sample_rate': f"{sr/1000:.1f}kHz"
-                })
-                # Transcribe audio
                 result = self.model.transcribe(
                     tmp_file_path,
                     language="en",
                     task="transcribe",
-                    fp16=True if torch.cuda.is_available() else False
                 )
-                # Update stats
-                processing_time = (datetime.now() - start_time).total_seconds()
-                processing_stats.update({
-                    'processing_time': f"{processing_time:.1f}s",
-                    'status': 'success'
-                })
-                return result["text"], processing_stats
             finally:
-                if os.path.exists(tmp_file_path):
-                    os.unlink(tmp_file_path)
         except Exception as e:
-            error_message = str(e)
-            logger.error(f"Audio processing error: {error_message}")
-            processing_stats.update({
-                'status': 'failed',
-                'error': error_message
-            })
-            return None, processing_stats
         finally:
-            gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
-class UIManager:
-    """Enhanced UI manager with better feedback and progress indicators"""
-    @staticmethod
-    def setup_page():
-        st.set_page_config(
-            page_title="VC Call Assistant",
-            page_icon="🎙️",
-            layout="wide",
-            initial_sidebar_state="expanded"
         )
-    @staticmethod
-    def show_file_uploader() -> Optional[Any]:
-        st.markdown("""
-        ### 📁 Upload Audio File
-        **Supported formats:**
-        - WAV (recommended)
-        - MP3
-        - M4A
-        **Limitations:**
-        - Maximum file size: 25MB
-        - Maximum duration: 10 minutes
-        - Minimum sample rate: 16kHz
-        """)
-        return st.file_uploader(
-            "Choose an audio file",
-            type=['wav', 'mp3', 'm4a']
         )
-    @staticmethod
-    def show_processing_stats(stats: Dict[str, Any]):
-        """Display processing statistics in a nice format"""
-        if not stats:
-            return
-        st.markdown("### 📊 Processing Statistics")
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            st.metric("Duration", stats.get('duration', 'N/A'))
-            st.metric("File Size", stats.get('file_size', 'N/A'))
-        with col2:
-            st.metric("Sample Rate", stats.get('sample_rate', 'N/A'))
-            st.metric("Processing Time", stats.get('processing_time', 'N/A'))
-        with col3:
             status = stats.get('status', 'unknown')
             if status == 'success':
-                st.success("Processing Completed")
             elif status == 'failed':
-                st.error(f"Processing Failed: {stats.get('error', 'Unknown error')}")
             else:
-                st.info("Processing Pending")
 def main():
     try:
-        UIManager.setup_page()
         with st.sidebar:
             st.title("VC Assistant Settings")
-            model_name = "GPT2"
-            st.info(f"""Using {model_name}
-            Memory Usage: {MODEL_CONFIGS[model_name]['memory_required']}
-            Description: {MODEL_CONFIGS[model_name]['description']}""")
             vc_name = st.text_input("Your Name")
             note_style = st.selectbox(
@@ -233,40 +322,25 @@ def main():
             st.warning("Please enter your name in the sidebar.")
             return
-        # Initialize models with progress tracking
-        progress_text = "Loading models..."
-        progress_bar = st.progress(0, text=progress_text)
-        try:
-            progress_bar.progress(25, text="Loading Whisper model...")
-            whisper_model = ModelManager.load_whisper()
-            progress_bar.progress(50, text="Loading language model...")
-            llm = ModelManager.load_llm(model_name)
             if not whisper_model or not llm:
                 st.error("Failed to initialize models. Please refresh the page.")
                 return
-            progress_bar.progress(75, text="Initializing processors...")
             audio_processor = AudioProcessor(whisper_model)
             analyzer = ContentAnalyzer(llm)
-            progress_bar.progress(100, text="Ready!")
-        finally:
-            progress_bar.empty()
-        # File upload and processing
-        audio_file = UIManager.show_file_uploader()
         if audio_file:
             with st.spinner("Processing audio..."):
-                transcription, processing_stats = audio_processor.process_audio_chunk(audio_file)
-                # Show processing statistics
-                UIManager.show_processing_stats(processing_stats)
-                if transcription:
                     col1, col2 = st.columns(2)
                     with col1:
@@ -294,7 +368,7 @@ def main():
                                         "timestamp": datetime.now().isoformat(),
                                         "transcription": transcription,
                                         "analysis": analysis,
-                                        "processing_stats": processing_stats
                                     }, indent=2),
                                     file_name=f"vc_analysis_{datetime.now():%Y%m%d_%H%M%S}.json",
                                     mime="application/json"
@@ -302,14 +376,7 @@ def main():
     except Exception as e:
         logger.error(f"Application error: {str(e)}")
-        st.error(f"""
-        An unexpected error occurred: {str(e)}
-        Please try:
-        1. Refreshing the page
-        2. Using a different audio file
-        3. Checking your internet connection
-        """)
     finally:
         gc.collect()

+import os
+import gc
+import json
+import logging
+import tempfile
+from datetime import datetime, timedelta
 from pathlib import Path
+from dataclasses import dataclass
+import streamlit as st
+import whisper
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import numpy as np
+import librosa
 import humanize
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Constants
 MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB
+MAX_AUDIO_DURATION = 600  # 10 minutes
+MIN_SAMPLE_RATE = 16000  # 16kHz
+SUPPORTED_FORMATS = {'.wav', '.mp3', '.m4a'}
+# Model configuration
+MODEL_CONFIG = {
+    "path": "gpt2",
+    "description": "Efficient open-source model for analysis",
+    "memory_required": "8GB"
 }
+@dataclass
+class VCStyle:
+    name: str
+    note_format: dict
+    key_interests: list
+    custom_sections: list
+    insight_preferences: dict
 class AudioValidator:
     @staticmethod
+    def validate_audio_file(file):
+        stats = {
+            'file_size': None,
+            'duration': None,
+            'sample_rate': None,
+            'format': None
+        }
         try:
             if file is None:
+                return False, "No file was uploaded.", stats
             # Check file size
             file_size = len(file.getvalue())
+            stats['file_size'] = humanize.naturalsize(file_size)
             if file_size > MAX_FILE_SIZE:
+                return False, f"File size ({stats['file_size']}) exceeds limit", stats
             # Check file extension
             file_extension = Path(file.name).suffix.lower()
+            stats['format'] = file_extension
             if file_extension not in SUPPORTED_FORMATS:
+                return False, f"Unsupported format {file_extension}", stats
+            # Create temporary file
             with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
                 tmp_file.write(file.getvalue())
                 tmp_file_path = tmp_file.name
             try:
+                # Check audio properties
+                y, sr = librosa.load(tmp_file_path, sr=None)
+                duration = librosa.get_duration(y=y, sr=sr)
+                stats.update({
+                    'duration': str(timedelta(seconds=int(duration))),
+                    'sample_rate': f"{sr/1000:.1f}kHz"
+                })
                 if duration > MAX_AUDIO_DURATION:
+                    return False, f"Duration ({stats['duration']}) exceeds limit", stats
+                if sr < MIN_SAMPLE_RATE:
+                    return False, f"Sample rate too low ({stats['sample_rate']})", stats
+                return True, "Audio file is valid", stats
             finally:
                 os.unlink(tmp_file_path)
         except Exception as e:
+            logger.error(f"Validation error: {str(e)}")
+            return False, str(e), stats
 class AudioProcessor:
     def __init__(self, model):
         self.model = model
         self.validator = AudioValidator()
+    def process_audio(self, audio_file):
+        stats = {
+            'status': 'processing',
+            'start_time': datetime.now(),
+            'file_info': None,
             'processing_time': None,
+            'error': None
         }
         try:
             # Validate file
+            is_valid, message, file_stats = self.validator.validate_audio_file(audio_file)
+            stats['file_info'] = file_stats
             if not is_valid:
+                stats['status'] = 'failed'
+                stats['error'] = message
+                return None, stats
             # Process audio
+            with tempfile.NamedTemporaryFile(delete=False, suffix=file_stats['format']) as tmp_file:
                 tmp_file.write(audio_file.getvalue())
                 tmp_file_path = tmp_file.name
             try:
                 result = self.model.transcribe(
                     tmp_file_path,
                     language="en",
                     task="transcribe",
+                    fp16=torch.cuda.is_available()
                 )
+                stats['status'] = 'success'
+                stats['processing_time'] = str(datetime.now() - stats['start_time'])
+                return result["text"], stats
             finally:
+                os.unlink(tmp_file_path)
         except Exception as e:
+            logger.error(f"Processing error: {str(e)}")
+            stats['status'] = 'failed'
+            stats['error'] = str(e)
+            return None, stats
         finally:
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
+            gc.collect()
+@st.cache_resource
+def load_whisper():
+    try:
+        return whisper.load_model("base")
+    except Exception as e:
+        logger.error(f"Whisper model loading error: {str(e)}")
+        return None
+@st.cache_resource
+def load_llm():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_CONFIG["path"],
+            trust_remote_code=True
         )
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_CONFIG["path"],
+            device_map="auto",
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        )
+        return pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.95,
+            repetition_penalty=1.15,
+            batch_size=1
         )
+    except Exception as e:
+        logger.error(f"LLM loading error: {str(e)}")
+        return None
+class ContentAnalyzer:
+    def __init__(self, generator):
+        self.generator = generator
+    def analyze_text(self, text, vc_style):
+        try:
+            prompt = self._create_analysis_prompt(text, vc_style)
+            response = self._generate_response(prompt)
+            return self._parse_response(response)
+        except Exception as e:
+            logger.error(f"Analysis error: {str(e)}")
+            return None
+    def _create_analysis_prompt(self, text, vc_style):
+        interests = ', '.join(vc_style.key_interests)
+        return f"""Analyze this startup pitch focusing on {interests}:
+        {text}
+        Provide structured insights for:
+        1. Key Points
+        2. Metrics
+        3. Risks
+        4. Questions"""
+    def _generate_response(self, prompt):
+        try:
+            response = self.generator(prompt)
+            return response[0]['generated_text']
+        except Exception as e:
+            logger.error(f"Generation error: {str(e)}")
+            return ""
+    def _parse_response(self, response):
+        try:
+            sections = response.split('\n\n')
+            parsed = {}
+            current_section = "general"
+            for section in sections:
+                if section.strip().endswith(':'):
+                    current_section = section.strip()[:-1].lower()
+                    parsed[current_section] = []
+                else:
+                    if current_section in parsed:
+                        parsed[current_section].append(section.strip())
+                    else:
+                        parsed[current_section] = [section.strip()]
+            return parsed
+        except Exception as e:
+            logger.error(f"Parsing error: {str(e)}")
+            return {"error": "Failed to parse response"}
+def setup_page():
+    st.set_page_config(
+        page_title="VC Call Assistant",
+        page_icon="🎙️",
+        layout="wide",
+    )
+def show_file_uploader():
+    st.markdown("""
+    ### 📁 Upload Audio File
+    **Supported formats:** WAV, MP3, M4A
+    **Limits:** 25MB, 10 minutes, 16kHz min quality
+    """)
+    return st.file_uploader(
+        "Choose an audio file",
+        type=['wav', 'mp3', 'm4a']
+    )
+def show_processing_stats(stats):
+    if not stats:
+        return
+    st.markdown("### 📊 Processing Information")
+    cols = st.columns(3)
+    if stats.get('file_info'):
+        with cols[0]:
+            st.metric("File Size", stats['file_info'].get('file_size', 'N/A'))
+            st.metric("Format", stats['file_info'].get('format', 'N/A'))
+        with cols[1]:
+            st.metric("Duration", stats['file_info'].get('duration', 'N/A'))
+            st.metric("Sample Rate", stats['file_info'].get('sample_rate', 'N/A'))
+        with cols[2]:
             status = stats.get('status', 'unknown')
             if status == 'success':
+                st.success(f"Processed in {stats.get('processing_time', 'N/A')}")
             elif status == 'failed':
+                st.error(f"Failed: {stats.get('error', 'Unknown error')}")
             else:
+                st.info("Processing...")
 def main():
     try:
+        setup_page()
         with st.sidebar:
             st.title("VC Assistant Settings")
+            st.info(f"""Using GPT2
+            Memory: {MODEL_CONFIG['memory_required']}
+            Info: {MODEL_CONFIG['description']}""")
             vc_name = st.text_input("Your Name")
             note_style = st.selectbox(
             st.warning("Please enter your name in the sidebar.")
             return
+        with st.spinner("Loading models..."):
+            whisper_model = load_whisper()
+            llm = load_llm()
             if not whisper_model or not llm:
                 st.error("Failed to initialize models. Please refresh the page.")
                 return
             audio_processor = AudioProcessor(whisper_model)
             analyzer = ContentAnalyzer(llm)
+        audio_file = show_file_uploader()
         if audio_file:
             with st.spinner("Processing audio..."):
+                transcription, stats = audio_processor.process_audio(audio_file)
+                show_processing_stats(stats)
+                if transcription and stats['status'] == 'success':
                     col1, col2 = st.columns(2)
                     with col1:
                                         "timestamp": datetime.now().isoformat(),
                                         "transcription": transcription,
                                         "analysis": analysis,
+                                        "processing_stats": stats
                                     }, indent=2),
                                     file_name=f"vc_analysis_{datetime.now():%Y%m%d_%H%M%S}.json",
                                     mime="application/json"
     except Exception as e:
         logger.error(f"Application error: {str(e)}")
+        st.error("An error occurred. Please refresh the page and try again.")
     finally:
         gc.collect()

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 streamlit==1.24.0
-whisper-openai==1.0.0
 pandas==1.5.3
 numpy==1.23.5
 torch==2.0.1
@@ -9,8 +9,5 @@ bitsandbytes==0.41.1
 scipy==1.11.3
 sentencepiece==0.1.99
 huggingface-hub==0.19.4
-python-dotenv==1.0.0
-dataclasses-json==0.5.7
 librosa==0.10.1
-soundfile==0.12.1
 humanize==4.7.0

 streamlit==1.24.0
+openai-whisper==20231117
 pandas==1.5.3
 numpy==1.23.5
 torch==2.0.1
 scipy==1.11.3
 sentencepiece==0.1.99
 huggingface-hub==0.19.4
 librosa==0.10.1
 humanize==4.7.0