Spaces:

SreekarB
/

SLPAnalysis

Running

App Files Files Community

SreekarB commited on Apr 24

Commit

9c403ff

verified ·

1 Parent(s): 8e70044

Upload simple_app.py

Browse files

Files changed (1) hide show

simple_app.py +367 -54

simple_app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import logging
 import os
 import tempfile
 import shutil
 from datetime import datetime
 # Try to import ReportLab (needed for PDF generation)
@@ -28,10 +30,14 @@ AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY", "")
 AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY", "")
 AWS_REGION = os.getenv("AWS_REGION", "us-east-1")
-# Initialize Bedrock client if credentials are available
 bedrock_client = None
 if AWS_ACCESS_KEY and AWS_SECRET_KEY:
     try:
         bedrock_client = boto3.client(
             'bedrock-runtime',
             aws_access_key_id=AWS_ACCESS_KEY,
@@ -39,26 +45,52 @@ if AWS_ACCESS_KEY and AWS_SECRET_KEY:
             region_name=AWS_REGION
         )
         logger.info("Bedrock client initialized successfully")
     except Exception as e:
-        logger.error(f"Failed to initialize Bedrock client: {str(e)}")
 # Create data directories if they don't exist
 DATA_DIR = os.environ.get("DATA_DIR", "patient_data")
 DOWNLOADS_DIR = os.path.join(DATA_DIR, "downloads")
 def ensure_data_dirs():
     """Ensure data directories exist"""
     try:
         os.makedirs(DATA_DIR, exist_ok=True)
         os.makedirs(DOWNLOADS_DIR, exist_ok=True)
-        logger.info(f"Data directories created: {DATA_DIR}, {DOWNLOADS_DIR}")
     except Exception as e:
         logger.warning(f"Could not create data directories: {str(e)}")
         # Fallback to tmp directory on HF Spaces
-        global DOWNLOADS_DIR
         DOWNLOADS_DIR = os.path.join(tempfile.gettempdir(), "casl_downloads")
         os.makedirs(DOWNLOADS_DIR, exist_ok=True)
-        logger.info(f"Using fallback directory: {DOWNLOADS_DIR}")
 # Initialize data directories
 ensure_data_dirs()
@@ -148,11 +180,216 @@ def call_bedrock(prompt, max_tokens=4096):
         logger.error(f"Error in call_bedrock: {str(e)}")
         return f"Error: {str(e)}"
 def generate_demo_response(prompt):
-    """Generate a simulated response for demo purposes"""
-    # This function generates a realistic but fake response for demo purposes
-    # In a real deployment, you would call an actual LLM API
     return """<SPEECH_FACTORS_START>
 Difficulty producing fluent speech: 8, 65
 Examples:
@@ -508,6 +745,8 @@ def analyze_transcript(transcript, age, gender):
 def export_pdf(results, patient_name="", record_id="", age="", gender="", assessment_date="", clinician=""):
     """Export analysis results to a PDF report"""
     # Check if ReportLab is available
     if not REPORTLAB_AVAILABLE:
         return "ERROR: PDF export is not available - ReportLab library is not installed. Please run 'pip install reportlab'."
@@ -525,7 +764,6 @@ def export_pdf(results, patient_name="", record_id="", age="", gender="", assess
         except Exception as e:
             logger.warning(f"Could not access downloads directory: {str(e)}")
             # Fallback to temp directory
-            global DOWNLOADS_DIR
             DOWNLOADS_DIR = os.path.join(tempfile.gettempdir(), "casl_downloads")
             os.makedirs(DOWNLOADS_DIR, exist_ok=True)
@@ -703,51 +941,88 @@ def create_interface():
     with gr.Blocks(title="Simple CASL Analysis Tool", theme=theme) as app:
         gr.Markdown("# CASL Analysis Tool")
-        gr.Markdown("A simplified tool for analyzing speech transcripts using CASL framework")
-        with gr.Row():
-            with gr.Column(scale=1):
-                # Patient info
-                gr.Markdown("### Patient Information")
-                patient_name = gr.Textbox(label="Patient Name", placeholder="Enter patient name")
-                record_id = gr.Textbox(label="Record ID", placeholder="Enter record ID")
                 with gr.Row():
-                    age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
-                    gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
-                assessment_date = gr.Textbox(
-                    label="Assessment Date",
-                    placeholder="MM/DD/YYYY",
-                    value=datetime.now().strftime('%m/%d/%Y')
-                )
-                clinician_name = gr.Textbox(label="Clinician", placeholder="Enter clinician name")
-                # Transcript input
-                gr.Markdown("### Transcript")
-                sample_btn = gr.Button("Load Sample Transcript")
-                file_upload = gr.File(label="Upload transcript file (.txt or .cha)")
-                transcript = gr.Textbox(
-                    label="Speech transcript (CHAT format preferred)",
-                    placeholder="Enter transcript text or upload a file...",
-                    lines=10
-                )
-                # Analysis button
-                analyze_btn = gr.Button("Analyze Transcript", variant="primary")
-            with gr.Column(scale=1):
-                # Results display
-                gr.Markdown("### Analysis Results")
-                analysis_output = gr.Markdown(label="Full Analysis")
-                # PDF export (only shown if ReportLab is available)
-                export_status = gr.Markdown("")
-                if REPORTLAB_AVAILABLE:
-                    export_btn = gr.Button("Export as PDF", variant="secondary")
-                else:
-                    gr.Markdown("⚠️ PDF export is disabled - ReportLab library is not installed")
         # Load sample transcript button
         def load_sample():
@@ -865,6 +1140,43 @@ def create_interface():
                 ],
                 outputs=[export_status]
             )
     return app
@@ -876,7 +1188,8 @@ def create_requirements_file():
         "numpy",
         "Pillow",
         "reportlab>=3.6.0",  # Required for PDF exports
-        "boto3"
     ]
     with open("requirements.txt", "w") as f:

 import os
 import tempfile
 import shutil
+import time
+import uuid
 from datetime import datetime
 # Try to import ReportLab (needed for PDF generation)
 AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY", "")
 AWS_REGION = os.getenv("AWS_REGION", "us-east-1")
+# Initialize AWS clients if credentials are available
 bedrock_client = None
+transcribe_client = None
+s3_client = None
 if AWS_ACCESS_KEY and AWS_SECRET_KEY:
     try:
+        # Initialize Bedrock client for AI analysis
         bedrock_client = boto3.client(
             'bedrock-runtime',
             aws_access_key_id=AWS_ACCESS_KEY,
             region_name=AWS_REGION
         )
         logger.info("Bedrock client initialized successfully")
+        # Initialize Transcribe client for speech-to-text
+        transcribe_client = boto3.client(
+            'transcribe',
+            aws_access_key_id=AWS_ACCESS_KEY,
+            aws_secret_access_key=AWS_SECRET_KEY,
+            region_name=AWS_REGION
+        )
+        logger.info("Transcribe client initialized successfully")
+        # Initialize S3 client for storing audio files
+        s3_client = boto3.client(
+            's3',
+            aws_access_key_id=AWS_ACCESS_KEY,
+            aws_secret_access_key=AWS_SECRET_KEY,
+            region_name=AWS_REGION
+        )
+        logger.info("S3 client initialized successfully")
     except Exception as e:
+        logger.error(f"Failed to initialize AWS clients: {str(e)}")
+# S3 bucket for storing audio files
+S3_BUCKET = os.environ.get("S3_BUCKET", "casl-audio-files")
+S3_PREFIX = "transcribe-audio/"
 # Create data directories if they don't exist
 DATA_DIR = os.environ.get("DATA_DIR", "patient_data")
 DOWNLOADS_DIR = os.path.join(DATA_DIR, "downloads")
+AUDIO_DIR = os.path.join(DATA_DIR, "audio")
 def ensure_data_dirs():
     """Ensure data directories exist"""
+    global DOWNLOADS_DIR, AUDIO_DIR
     try:
         os.makedirs(DATA_DIR, exist_ok=True)
         os.makedirs(DOWNLOADS_DIR, exist_ok=True)
+        os.makedirs(AUDIO_DIR, exist_ok=True)
+        logger.info(f"Data directories created: {DATA_DIR}, {DOWNLOADS_DIR}, {AUDIO_DIR}")
     except Exception as e:
         logger.warning(f"Could not create data directories: {str(e)}")
         # Fallback to tmp directory on HF Spaces
         DOWNLOADS_DIR = os.path.join(tempfile.gettempdir(), "casl_downloads")
+        AUDIO_DIR = os.path.join(tempfile.gettempdir(), "casl_audio")
         os.makedirs(DOWNLOADS_DIR, exist_ok=True)
+        os.makedirs(AUDIO_DIR, exist_ok=True)
+        logger.info(f"Using fallback directories: {DOWNLOADS_DIR}, {AUDIO_DIR}")
 # Initialize data directories
 ensure_data_dirs()
         logger.error(f"Error in call_bedrock: {str(e)}")
         return f"Error: {str(e)}"
+def transcribe_audio(audio_path, patient_age=8):
+    """Transcribe an audio recording using Amazon Transcribe and format in CHAT format"""
+    if not os.path.exists(audio_path):
+        logger.error(f"Audio file not found: {audio_path}")
+        return "Error: Audio file not found."
+    if not transcribe_client or not s3_client:
+        logger.warning("AWS clients not initialized, using demo transcription")
+        return generate_demo_transcription()
+    try:
+        # Get file info
+        file_name = os.path.basename(audio_path)
+        file_size = os.path.getsize(audio_path)
+        _, file_extension = os.path.splitext(file_name)
+        # Check file format
+        supported_formats = ['.mp3', '.mp4', '.wav', '.flac', '.ogg', '.amr', '.webm']
+        if file_extension.lower() not in supported_formats:
+            logger.error(f"Unsupported audio format: {file_extension}")
+            return f"Error: Unsupported audio format. Please use one of: {', '.join(supported_formats)}"
+        # Generate a unique job name
+        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
+        job_name = f"casl-transcription-{timestamp}"
+        s3_key = f"{S3_PREFIX}{job_name}{file_extension}"
+        # Upload to S3
+        logger.info(f"Uploading {file_name} to S3 bucket {S3_BUCKET}")
+        try:
+            with open(audio_path, 'rb') as audio_file:
+                s3_client.upload_fileobj(audio_file, S3_BUCKET, s3_key)
+        except Exception as e:
+            logger.error(f"Failed to upload to S3: {str(e)}")
+            # If upload fails, try to create the bucket
+            try:
+                s3_client.create_bucket(Bucket=S3_BUCKET)
+                logger.info(f"Created S3 bucket: {S3_BUCKET}")
+                # Try upload again
+                with open(audio_path, 'rb') as audio_file:
+                    s3_client.upload_fileobj(audio_file, S3_BUCKET, s3_key)
+            except Exception as bucket_error:
+                logger.error(f"Failed to create bucket and upload: {str(bucket_error)}")
+                return "Error: Failed to upload audio file. Please check your AWS permissions."
+        # Start transcription job
+        logger.info(f"Starting transcription job: {job_name}")
+        media_format = file_extension.lower()[1:]  # Remove the dot
+        if media_format == 'webm':
+            media_format = 'webm'  # Amazon Transcribe expects this
+        # Determine language settings based on patient age
+        if patient_age < 10:
+            # For younger children, enabling child language model is helpful
+            language_options = {
+                'LanguageCode': 'en-US',
+                'Settings': {
+                    'LanguageModelName': 'ChildLanguage'
+                }
+            }
+        else:
+            language_options = {
+                'LanguageCode': 'en-US'
+            }
+        transcribe_client.start_transcription_job(
+            TranscriptionJobName=job_name,
+            Media={
+                'MediaFileUri': f"s3://{S3_BUCKET}/{s3_key}"
+            },
+            MediaFormat=media_format,
+            **language_options,
+            Settings={
+                'ShowSpeakerLabels': True,
+                'MaxSpeakerLabels': 2  # Typically patient + clinician
+            }
+        )
+        # Wait for the job to complete (with timeout)
+        logger.info("Waiting for transcription to complete...")
+        max_tries = 30  # 5 minutes max wait
+        tries = 0
+        while tries < max_tries:
+            try:
+                job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
+                status = job['TranscriptionJob']['TranscriptionJobStatus']
+                if status == 'COMPLETED':
+                    # Get the transcript
+                    transcript_uri = job['TranscriptionJob']['Transcript']['TranscriptFileUri']
+                    # Download the transcript
+                    import urllib.request
+                    import json
+                    with urllib.request.urlopen(transcript_uri) as response:
+                        transcript_json = json.loads(response.read().decode('utf-8'))
+                    # Convert to CHAT format
+                    chat_transcript = format_as_chat(transcript_json)
+                    return chat_transcript
+                elif status == 'FAILED':
+                    reason = job['TranscriptionJob'].get('FailureReason', 'Unknown failure')
+                    logger.error(f"Transcription job failed: {reason}")
+                    return f"Error: Transcription failed - {reason}"
+                # Still in progress, wait and try again
+                tries += 1
+                time.sleep(10)  # Check every 10 seconds
+            except Exception as e:
+                logger.error(f"Error checking transcription job: {str(e)}")
+                return f"Error getting transcription: {str(e)}"
+        # If we got here, we timed out
+        return "Error: Transcription timed out. The process is taking longer than expected."
+    except Exception as e:
+        logger.exception("Error in audio transcription")
+        return f"Error transcribing audio: {str(e)}"
+def format_as_chat(transcript_json):
+    """Format the Amazon Transcribe JSON result as CHAT format"""
+    try:
+        # Get transcript items
+        items = transcript_json['results']['items']
+        # Get speaker labels if available
+        speakers = {}
+        if 'speaker_labels' in transcript_json['results']:
+            speaker_segments = transcript_json['results']['speaker_labels']['segments']
+            # Map each item to its speaker
+            for segment in speaker_segments:
+                for item in segment['items']:
+                    start_time = item['start_time']
+                    speakers[start_time] = segment['speaker_label']
+        # Build transcript by combining words into utterances by speaker
+        current_speaker = None
+        current_utterance = []
+        utterances = []
+        for item in items:
+            # Skip non-pronunciation items (like punctuation)
+            if item['type'] != 'pronunciation':
+                continue
+            word = item['alternatives'][0]['content']
+            start_time = item.get('start_time')
+            # Determine speaker if available
+            speaker = speakers.get(start_time, 'spk_0')
+            # If speaker changed, start a new utterance
+            if speaker != current_speaker and current_utterance:
+                utterances.append((current_speaker, ' '.join(current_utterance)))
+                current_utterance = []
+            current_speaker = speaker
+            current_utterance.append(word)
+        # Add the last utterance
+        if current_utterance:
+            utterances.append((current_speaker, ' '.join(current_utterance)))
+        # Format as CHAT
+        chat_lines = []
+        for speaker, text in utterances:
+            # Map speakers to CHAT format
+            # Assuming spk_0 is the patient (PAR) and spk_1 is the clinician (INV)
+            chat_speaker = "*PAR:" if speaker == "spk_0" else "*INV:"
+            chat_lines.append(f"{chat_speaker} {text}.")
+        return '\n'.join(chat_lines)
+    except Exception as e:
+        logger.exception("Error formatting transcript")
+        return "*PAR: (Error formatting transcript)"
+def generate_demo_transcription():
+    """Generate a simulated transcription response"""
+    return """*PAR: today I want to tell you about my favorite toy.
+*PAR: it's a &-um teddy bear that I got for my birthday.
+*PAR: he has &-um brown fur and a red bow.
+*PAR: I like to sleep with him every night.
+*PAR: sometimes I take him to school in my backpack.
+*INV: what's your teddy bear's name?
+*PAR: his name is &-um Brownie because he's brown."""
 def generate_demo_response(prompt):
+    """Generate a response using Bedrock if available, otherwise return a demo response"""
+    # This function will attempt to call Bedrock, and only fall back to the demo response
+    # if Bedrock is not available or fails
+    # Try to call Bedrock first if client is available
+    if bedrock_client:
+        try:
+            return call_bedrock(prompt)
+        except Exception as e:
+            logger.error(f"Error calling Bedrock: {str(e)}")
+            logger.info("Falling back to demo response")
+            # Continue to fallback response if Bedrock call fails
+    # Fallback demo response
+    logger.warning("Using demo response - Bedrock client not available or call failed")
     return """<SPEECH_FACTORS_START>
 Difficulty producing fluent speech: 8, 65
 Examples:
 def export_pdf(results, patient_name="", record_id="", age="", gender="", assessment_date="", clinician=""):
     """Export analysis results to a PDF report"""
+    global DOWNLOADS_DIR
     # Check if ReportLab is available
     if not REPORTLAB_AVAILABLE:
         return "ERROR: PDF export is not available - ReportLab library is not installed. Please run 'pip install reportlab'."
         except Exception as e:
             logger.warning(f"Could not access downloads directory: {str(e)}")
             # Fallback to temp directory
             DOWNLOADS_DIR = os.path.join(tempfile.gettempdir(), "casl_downloads")
             os.makedirs(DOWNLOADS_DIR, exist_ok=True)
     with gr.Blocks(title="Simple CASL Analysis Tool", theme=theme) as app:
         gr.Markdown("# CASL Analysis Tool")
+        gr.Markdown("A simplified tool for analyzing speech transcripts and audio using CASL framework")
+        with gr.Tabs() as main_tabs:
+            # Analysis Tab
+            with gr.TabItem("Analysis", id=0):
                 with gr.Row():
+                    with gr.Column(scale=1):
+                        # Patient info
+                        gr.Markdown("### Patient Information")
+                        patient_name = gr.Textbox(label="Patient Name", placeholder="Enter patient name")
+                        record_id = gr.Textbox(label="Record ID", placeholder="Enter record ID")
+                        with gr.Row():
+                            age = gr.Number(label="Age", value=8, minimum=1, maximum=120)
+                            gender = gr.Radio(["male", "female", "other"], label="Gender", value="male")
+                        assessment_date = gr.Textbox(
+                            label="Assessment Date",
+                            placeholder="MM/DD/YYYY",
+                            value=datetime.now().strftime('%m/%d/%Y')
+                        )
+                        clinician_name = gr.Textbox(label="Clinician", placeholder="Enter clinician name")
+                        # Transcript input
+                        gr.Markdown("### Transcript")
+                        sample_btn = gr.Button("Load Sample Transcript")
+                        file_upload = gr.File(label="Upload transcript file (.txt or .cha)")
+                        transcript = gr.Textbox(
+                            label="Speech transcript (CHAT format preferred)",
+                            placeholder="Enter transcript text or upload a file...",
+                            lines=10
+                        )
+                        # Analysis button
+                        analyze_btn = gr.Button("Analyze Transcript", variant="primary")
+                    with gr.Column(scale=1):
+                        # Results display
+                        gr.Markdown("### Analysis Results")
+                        analysis_output = gr.Markdown(label="Full Analysis")
+                        # PDF export (only shown if ReportLab is available)
+                        export_status = gr.Markdown("")
+                        if REPORTLAB_AVAILABLE:
+                            export_btn = gr.Button("Export as PDF", variant="secondary")
+                        else:
+                            gr.Markdown("⚠️ PDF export is disabled - ReportLab library is not installed")
+            # Transcription Tab
+            with gr.TabItem("Transcription", id=1):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Audio Transcription")
+                        gr.Markdown("Upload an audio recording to automatically transcribe it in CHAT format")
+                        # Patient's age helps with transcription accuracy
+                        transcription_age = gr.Number(label="Patient Age", value=8, minimum=1, maximum=120,
+                                                     info="For children under 10, special language models may be used")
+                        # Audio input
+                        audio_input = gr.Audio(type="filepath", label="Upload Audio Recording",
+                                              format="mp3,wav,ogg,webm",
+                                              elem_id="audio-input")
+                        # Transcribe button
+                        transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
+                    with gr.Column(scale=1):
+                        # Transcription output
+                        transcription_output = gr.Textbox(
+                            label="Transcription Result",
+                            placeholder="Transcription will appear here...",
+                            lines=12
+                        )
+                        with gr.Row():
+                            # Button to use transcription in analysis
+                            copy_to_analysis_btn = gr.Button("Use for Analysis", variant="secondary")
+                        # Status/info message
+                        transcription_status = gr.Markdown("")
         # Load sample transcript button
         def load_sample():
                 ],
                 outputs=[export_status]
             )
+        # Transcription button handler
+        def on_transcribe_audio(audio_path, age_val):
+            try:
+                if not audio_path:
+                    return "Please upload an audio file to transcribe.", "Error: No audio file provided."
+                # Process the audio file with Amazon Transcribe
+                transcription = transcribe_audio(audio_path, age_val)
+                # Return status message based on whether it's a demo or real transcription
+                if not transcribe_client:
+                    status_msg = "⚠️ Demo mode: Using example transcription (AWS credentials not configured)"
+                else:
+                    status_msg = "✅ Transcription completed successfully"
+                return transcription, status_msg
+            except Exception as e:
+                logger.exception("Error transcribing audio")
+                return f"Error: {str(e)}", f"❌ Transcription failed: {str(e)}"
+        # Connect the transcribe button to its handler
+        transcribe_btn.click(
+            on_transcribe_audio,
+            inputs=[audio_input, transcription_age],
+            outputs=[transcription_output, transcription_status]
+        )
+        # Copy transcription to analysis tab
+        def copy_to_analysis(transcription):
+            return transcription, gr.update(selected=0)  # Switch to Analysis tab
+        copy_to_analysis_btn.click(
+            copy_to_analysis,
+            inputs=[transcription_output],
+            outputs=[transcript, main_tabs]
+        )
     return app
         "numpy",
         "Pillow",
         "reportlab>=3.6.0",  # Required for PDF exports
+        "boto3>=1.28.0",     # Required for AWS services
+        "botocore>=1.31.0"   # Required for AWS services
     ]
     with open("requirements.txt", "w") as f: