File size: 11,621 Bytes
57966f8
 
 
 
 
 
e8513b3
57966f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c834a4
57966f8
 
 
 
 
 
3c834a4
57966f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788d26b
57966f8
 
 
 
 
 
3c834a4
2b82738
57966f8
2b82738
 
 
57966f8
 
 
 
 
 
 
 
 
 
2b82738
 
57966f8
2b82738
57966f8
 
 
 
 
 
2db9a09
57966f8
 
 
e8513b3
57966f8
 
 
 
 
 
2db9a09
57966f8
 
 
 
 
 
 
 
3c834a4
57966f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c834a4
 
57966f8
 
 
 
 
 
 
 
 
 
 
3c834a4
 
57966f8
3c834a4
 
 
 
 
 
 
 
 
 
 
 
57966f8
 
 
 
 
 
 
 
 
 
 
 
 
 
3c834a4
57966f8
 
3c834a4
57966f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c834a4
788d26b
 
 
e8513b3
788d26b
 
57966f8
788d26b
 
 
 
 
3c834a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57966f8
 
 
 
 
 
3c834a4
57966f8
 
3c834a4
 
 
 
 
 
57966f8
3c834a4
 
57966f8
 
 
 
3c834a4
57966f8
3c834a4
 
 
 
57966f8
 
 
 
3c834a4
 
 
 
 
 
 
 
 
 
57966f8
 
 
 
 
 
 
3c834a4
57966f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c834a4
 
57966f8
 
 
 
 
 
 
 
 
3c834a4
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# Add these imports at the top
import soundfile as sf
import librosa
from pathlib import Path
import humanize
from datetime import timedelta

# Add these constants
MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB
MAX_AUDIO_DURATION = 600  # 10 minutes in seconds
SUPPORTED_FORMATS = {
    '.wav': 'WAV audio',
    '.mp3': 'MP3 audio',
    '.m4a': 'M4A audio'
}

class AudioValidator:
    """Handles audio file validation and provides detailed feedback"""
    
    @staticmethod
    def validate_audio_file(file) -> tuple[bool, str]:
        try:
            # Check if file is provided
            if file is None:
                return False, "No file was uploaded."

            # Check file size
            file_size = len(file.getvalue())
            if file_size > MAX_FILE_SIZE:
                readable_size = humanize.naturalsize(file_size)
                max_size = humanize.naturalsize(MAX_FILE_SIZE)
                return False, f"File size ({readable_size}) exceeds maximum allowed size ({max_size})"

            # Check file extension
            file_extension = Path(file.name).suffix.lower()
            if file_extension not in SUPPORTED_FORMATS:
                return False, f"Unsupported file format. Please upload {', '.join(SUPPORTED_FORMATS.values())}"

            # Save file temporarily for duration check
            with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
                tmp_file.write(file.getvalue())
                tmp_file_path = tmp_file.name

            try:
                # Check audio duration
                duration = librosa.get_duration(path=tmp_file_path)
                if duration > MAX_AUDIO_DURATION:
                    formatted_duration = str(timedelta(seconds=int(duration)))
                    max_duration = str(timedelta(seconds=MAX_AUDIO_DURATION))
                    return False, f"Audio duration ({formatted_duration}) exceeds maximum allowed length ({max_duration})"

                # Check audio quality
                y, sr = librosa.load(tmp_file_path)
                if sr < 16000:
                    return False, f"Audio quality too low. Sample rate ({sr}Hz) should be at least 16kHz"

                return True, "Audio file is valid"

            finally:
                os.unlink(tmp_file_path)

        except Exception as e:
            logger.error(f"Audio validation error: {str(e)}")
            return False, f"Error validating audio file: {str(e)}"

class AudioProcessor:
    """Enhanced audio processor with better feedback and error handling"""
    
    def __init__(self, model):
        self.model = model
        self.validator = AudioValidator()
        
    def process_audio_chunk(self, audio_file) -> tuple[Optional[str], Dict[str, Any]]:
        processing_stats = {
            'duration': None,
            'sample_rate': None,
            'file_size': None,
            'processing_time': None,
            'status': 'pending'
        }
        
        try:
            start_time = datetime.now()
            
            # Validate file
            is_valid, validation_message = self.validator.validate_audio_file(audio_file)
            if not is_valid:
                processing_stats['status'] = 'failed'
                processing_stats['error'] = validation_message
                return None, processing_stats

            # Get file stats
            file_size = len(audio_file.getvalue())
            processing_stats['file_size'] = humanize.naturalsize(file_size)

            # Process audio
            file_extension = Path(audio_file.name).suffix.lower()
            with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
                audio_file.seek(0)
                tmp_file.write(audio_file.getvalue())
                tmp_file_path = tmp_file.name

            try:
                # Get audio info
                y, sr = librosa.load(tmp_file_path)
                duration = librosa.get_duration(y=y, sr=sr)
                processing_stats.update({
                    'duration': str(timedelta(seconds=int(duration))),
                    'sample_rate': f"{sr/1000:.1f}kHz"
                })

                # Transcribe audio
                result = self.model.transcribe(
                    tmp_file_path,
                    language="en",
                    task="transcribe",
                    fp16=True if torch.cuda.is_available() else False
                )

                # Update stats
                processing_time = (datetime.now() - start_time).total_seconds()
                processing_stats.update({
                    'processing_time': f"{processing_time:.1f}s",
                    'status': 'success'
                })

                return result["text"], processing_stats

            finally:
                if os.path.exists(tmp_file_path):
                    os.unlink(tmp_file_path)

        except Exception as e:
            error_message = str(e)
            logger.error(f"Audio processing error: {error_message}")
            processing_stats.update({
                'status': 'failed',
                'error': error_message
            })
            return None, processing_stats
        finally:
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

class UIManager:
    """Enhanced UI manager with better feedback and progress indicators"""
    
    @staticmethod
    def setup_page():
        st.set_page_config(
            page_title="VC Call Assistant",
            page_icon="πŸŽ™οΈ",
            layout="wide",
            initial_sidebar_state="expanded"
        )
    
    @staticmethod
    def show_file_uploader() -> Optional[Any]:
        st.markdown("""
        ### πŸ“ Upload Audio File
        
        **Supported formats:**
        - WAV (recommended)
        - MP3
        - M4A
        
        **Limitations:**
        - Maximum file size: 25MB
        - Maximum duration: 10 minutes
        - Minimum sample rate: 16kHz
        """)
        
        return st.file_uploader(
            "Choose an audio file",
            type=['wav', 'mp3', 'm4a']
        )
    
    @staticmethod
    def show_processing_stats(stats: Dict[str, Any]):
        """Display processing statistics in a nice format"""
        if not stats:
            return

        st.markdown("### πŸ“Š Processing Statistics")
        
        col1, col2, col3 = st.columns(3)
        
        with col1:
            st.metric("Duration", stats.get('duration', 'N/A'))
            st.metric("File Size", stats.get('file_size', 'N/A'))
            
        with col2:
            st.metric("Sample Rate", stats.get('sample_rate', 'N/A'))
            st.metric("Processing Time", stats.get('processing_time', 'N/A'))
            
        with col3:
            status = stats.get('status', 'unknown')
            if status == 'success':
                st.success("Processing Completed")
            elif status == 'failed':
                st.error(f"Processing Failed: {stats.get('error', 'Unknown error')}")
            else:
                st.info("Processing Pending")

def main():
    try:
        UIManager.setup_page()
        
        with st.sidebar:
            st.title("VC Assistant Settings")
            model_name = "GPT2"
            
            st.info(f"""Using {model_name}
            Memory Usage: {MODEL_CONFIGS[model_name]['memory_required']}
            Description: {MODEL_CONFIGS[model_name]['description']}""")
            
            vc_name = st.text_input("Your Name")
            note_style = st.selectbox(
                "Note Style",
                ["Bullet Points", "Paragraphs", "Q&A"]
            )
            
            interests = st.multiselect(
                "Focus Areas",
                ["Product", "Market", "Team", "Financials", "Technology"],
                default=["Product", "Market"]
            )

        st.title("πŸŽ™οΈ VC Call Assistant")
        
        if not vc_name:
            st.warning("Please enter your name in the sidebar.")
            return

        # Initialize models with progress tracking
        progress_text = "Loading models..."
        progress_bar = st.progress(0, text=progress_text)
        
        try:
            progress_bar.progress(25, text="Loading Whisper model...")
            whisper_model = ModelManager.load_whisper()
            
            progress_bar.progress(50, text="Loading language model...")
            llm = ModelManager.load_llm(model_name)
            
            if not whisper_model or not llm:
                st.error("Failed to initialize models. Please refresh the page.")
                return
            
            progress_bar.progress(75, text="Initializing processors...")
            audio_processor = AudioProcessor(whisper_model)
            analyzer = ContentAnalyzer(llm)
            
            progress_bar.progress(100, text="Ready!")
        finally:
            progress_bar.empty()

        # File upload and processing
        audio_file = UIManager.show_file_uploader()
        
        if audio_file:
            with st.spinner("Processing audio..."):
                transcription, processing_stats = audio_processor.process_audio_chunk(audio_file)
                
                # Show processing statistics
                UIManager.show_processing_stats(processing_stats)
                
                if transcription:
                    col1, col2 = st.columns(2)
                    
                    with col1:
                        st.subheader("πŸ“ Transcript")
                        st.write(transcription)
                    
                    with col2:
                        st.subheader("πŸ” Analysis")
                        with st.spinner("Analyzing transcript..."):
                            vc_style = VCStyle(
                                name=vc_name,
                                note_format={"style": note_style},
                                key_interests=interests,
                                custom_sections=[],
                                insight_preferences={}
                            )
                            
                            analysis = analyzer.analyze_text(transcription, vc_style)
                            if analysis:
                                st.write(analysis)
                                
                                st.download_button(
                                    "πŸ“₯ Export Analysis",
                                    data=json.dumps({
                                        "timestamp": datetime.now().isoformat(),
                                        "transcription": transcription,
                                        "analysis": analysis,
                                        "processing_stats": processing_stats
                                    }, indent=2),
                                    file_name=f"vc_analysis_{datetime.now():%Y%m%d_%H%M%S}.json",
                                    mime="application/json"
                                )

    except Exception as e:
        logger.error(f"Application error: {str(e)}")
        st.error(f"""
        An unexpected error occurred: {str(e)}
        
        Please try:
        1. Refreshing the page
        2. Using a different audio file
        3. Checking your internet connection
        """)
        
    finally:
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

if __name__ == "__main__":
    main()