Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Upload 5 files
Browse files- app.py +86 -0
 - brain_of_doctor.py +45 -0
 - requirements.txt +0 -0
 - voice_of_the_doctor.py +115 -0
 - voice_of_the_patient.py +59 -0
 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,86 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 2 | 
         
            +
            import os
         
     | 
| 3 | 
         
            +
            import gradio as gr
         
     | 
| 4 | 
         
            +
            from brain_of_doctor import load_image, analyze_image_with_query
         
     | 
| 5 | 
         
            +
            from voice_of_the_patient import record_audio, transcription_with_groq
         
     | 
| 6 | 
         
            +
            from voice_of_the_doctor import (
         
     | 
| 7 | 
         
            +
                text_to_speech_with_gtts_new_autoplay,
         
     | 
| 8 | 
         
            +
                text_to_speech_with_elevenlabs_new_autoplay
         
     | 
| 9 | 
         
            +
            )
         
     | 
| 10 | 
         
            +
            from deep_translator import GoogleTranslator
         
     | 
| 11 | 
         
            +
            from gtts import gTTS
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            load_dotenv()
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose. 
         
     | 
| 16 | 
         
            +
            What's in this image?. Do you find anything wrong with it medically? 
         
     | 
| 17 | 
         
            +
            If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in 
         
     | 
| 18 | 
         
            +
            your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
         
     | 
| 19 | 
         
            +
            Donot say 'In the image I see' but say 'With what I see, I think you have ....'
         
     | 
| 20 | 
         
            +
            Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot, 
         
     | 
| 21 | 
         
            +
            Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            # ✅ Helper for Urdu TTS (using gTTS)
         
     | 
| 24 | 
         
            +
            def tts_with_language_support(text, output_file="final.wav", lang="en"):
         
     | 
| 25 | 
         
            +
                try:
         
     | 
| 26 | 
         
            +
                    tts = gTTS(text=text, lang=lang)
         
     | 
| 27 | 
         
            +
                    tts.save(output_file)
         
     | 
| 28 | 
         
            +
                    return output_file
         
     | 
| 29 | 
         
            +
                except Exception as e:
         
     | 
| 30 | 
         
            +
                    print("TTS generation failed:", e)
         
     | 
| 31 | 
         
            +
                    return None
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            def process_inputs(audio_filePath, image_filePath, language):
         
     | 
| 35 | 
         
            +
                # Speech → text
         
     | 
| 36 | 
         
            +
                speech_to_text_output = transcription_with_groq(
         
     | 
| 37 | 
         
            +
                    GROQ_API_KEY= os.getenv("GROQ_API_KEY"),
         
     | 
| 38 | 
         
            +
                    audio_filePath=audio_filePath,
         
     | 
| 39 | 
         
            +
                    sst_model="whisper-large-v3"
         
     | 
| 40 | 
         
            +
                )
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                # Image analysis
         
     | 
| 43 | 
         
            +
                if image_filePath:
         
     | 
| 44 | 
         
            +
                    doctor_response_en = analyze_image_with_query(
         
     | 
| 45 | 
         
            +
                        query=system_prompt + speech_to_text_output,
         
     | 
| 46 | 
         
            +
                        encoded_image=load_image(image_filePath),
         
     | 
| 47 | 
         
            +
                        model="meta-llama/llama-4-scout-17b-16e-instruct"
         
     | 
| 48 | 
         
            +
                    )
         
     | 
| 49 | 
         
            +
                else:
         
     | 
| 50 | 
         
            +
                    doctor_response_en = "No image provided for me to analyze"
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
                # Translation + voice selection
         
     | 
| 53 | 
         
            +
                if language == "Urdu":
         
     | 
| 54 | 
         
            +
                    # Translate to Urdu
         
     | 
| 55 | 
         
            +
                    doctor_response = GoogleTranslator(source='en', target='ur').translate(doctor_response_en)
         
     | 
| 56 | 
         
            +
                    # Generate Urdu voice with gTTS
         
     | 
| 57 | 
         
            +
                    output_audio = tts_with_language_support(text=doctor_response, output_file="final.wav", lang="ur")
         
     | 
| 58 | 
         
            +
                else:
         
     | 
| 59 | 
         
            +
                    # English voice via ElevenLabs
         
     | 
| 60 | 
         
            +
                    doctor_response = doctor_response_en
         
     | 
| 61 | 
         
            +
                    output_audio = text_to_speech_with_gtts_new_autoplay(
         
     | 
| 62 | 
         
            +
                        input_text=doctor_response,
         
     | 
| 63 | 
         
            +
                        output_file="final.wav"
         
     | 
| 64 | 
         
            +
                    )
         
     | 
| 65 | 
         
            +
                    output_audio= "final.wav"
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
                return speech_to_text_output, doctor_response, output_audio
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
            # ✅ Gradio Interface
         
     | 
| 71 | 
         
            +
            iface = gr.Interface(
         
     | 
| 72 | 
         
            +
                fn=process_inputs,
         
     | 
| 73 | 
         
            +
                inputs=[
         
     | 
| 74 | 
         
            +
                    gr.Audio(sources=["microphone"], type="filepath", label="Patient's Voice"),
         
     | 
| 75 | 
         
            +
                    gr.Image(type="filepath", label="Upload Medical Image"),
         
     | 
| 76 | 
         
            +
                    gr.Radio(choices=["English", "Urdu"], value="English", label="Select Language")
         
     | 
| 77 | 
         
            +
                ],
         
     | 
| 78 | 
         
            +
                outputs=[
         
     | 
| 79 | 
         
            +
                    gr.Textbox(label="Speech to Text"),
         
     | 
| 80 | 
         
            +
                    gr.Textbox(label="Doctor's Response"),
         
     | 
| 81 | 
         
            +
                    gr.Audio(label="Doctor's Voice (Auto Play)", autoplay=True, type="filepath")
         
     | 
| 82 | 
         
            +
                ],
         
     | 
| 83 | 
         
            +
                title="AI Doctor — Developed by Danish Khan"
         
     | 
| 84 | 
         
            +
            )
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
            iface.launch(debug=True)
         
     | 
    	
        brain_of_doctor.py
    ADDED
    
    | 
         @@ -0,0 +1,45 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 2 | 
         
            +
            import base64
         
     | 
| 3 | 
         
            +
            from groq import Groq
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            load_dotenv()
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def load_image(image_path):
         
     | 
| 10 | 
         
            +
                image_file = open(image_path, "rb")
         
     | 
| 11 | 
         
            +
                return base64.b64encode(image_file.read()).decode("utf-8")
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            query = "Describe the condition of face briefly"
         
     | 
| 14 | 
         
            +
            model = "meta-llama/llama-4-scout-17b-16e-instruct"
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            def analyze_image_with_query(model, query , encoded_image):
         
     | 
| 18 | 
         
            +
                client = Groq()
         
     | 
| 19 | 
         
            +
                messages = [
         
     | 
| 20 | 
         
            +
                    {
         
     | 
| 21 | 
         
            +
                        "role": "user",
         
     | 
| 22 | 
         
            +
                        "content": [
         
     | 
| 23 | 
         
            +
                            {
         
     | 
| 24 | 
         
            +
                                "type": "text",
         
     | 
| 25 | 
         
            +
                                "text": query
         
     | 
| 26 | 
         
            +
                            },
         
     | 
| 27 | 
         
            +
                            {
         
     | 
| 28 | 
         
            +
                                "type": "image_url",
         
     | 
| 29 | 
         
            +
                                "image_url": {
         
     | 
| 30 | 
         
            +
                                    "url": f"data:image/jpeg;base64,{encoded_image}"
         
     | 
| 31 | 
         
            +
                                },
         
     | 
| 32 | 
         
            +
                            },
         
     | 
| 33 | 
         
            +
                        ],
         
     | 
| 34 | 
         
            +
                    }
         
     | 
| 35 | 
         
            +
                ]
         
     | 
| 36 | 
         
            +
                chat_completion = client.chat.completions.create(
         
     | 
| 37 | 
         
            +
                    model=model,
         
     | 
| 38 | 
         
            +
                    messages=messages,
         
     | 
| 39 | 
         
            +
                )
         
     | 
| 40 | 
         
            +
                print("Loading image...")
         
     | 
| 41 | 
         
            +
                print(chat_completion.choices[0].message.content)
         
     | 
| 42 | 
         
            +
                return chat_completion.choices[0].message.content
         
     | 
| 43 | 
         
            +
             
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            print(analyze_image_with_query(model, query, load_image("acne.jpg")))
         
     | 
    	
        requirements.txt
    ADDED
    
    | 
         
            File without changes
         
     | 
    	
        voice_of_the_doctor.py
    ADDED
    
    | 
         @@ -0,0 +1,115 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os
         
     | 
| 2 | 
         
            +
            from gtts import gTTS
         
     | 
| 3 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            load_dotenv()
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            def text_to_speech_with_gtts_old_no_autoplay_functionality(input_text , output_file):
         
     | 
| 8 | 
         
            +
                language = 'en'
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
                gttsObj = gTTS(
         
     | 
| 11 | 
         
            +
                    text=input_text,
         
     | 
| 12 | 
         
            +
                    lang=language,
         
     | 
| 13 | 
         
            +
                    slow=False
         
     | 
| 14 | 
         
            +
                )
         
     | 
| 15 | 
         
            +
                gttsObj.save(output_file)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            text = "hello this is me danish with ai voice"
         
     | 
| 18 | 
         
            +
            # text_to_speech_with_gtts_old_no_autoplay_functionality(input_text=text,output_file= "output_gtts.mp3")
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            import elevenlabs
         
     | 
| 22 | 
         
            +
            from elevenlabs.client import ElevenLabs
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            def text_to_speech_with_elevenlabs_old_no_autoplay_functionality(input_text , output_file):
         
     | 
| 27 | 
         
            +
                client = ElevenLabs(api_key = ELEVENLABS_API_KEY)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                audio = client.generate(
         
     | 
| 30 | 
         
            +
                    text = input_text,
         
     | 
| 31 | 
         
            +
                    voice = "Aria",
         
     | 
| 32 | 
         
            +
                    model = "eleven_turbo_v2",
         
     | 
| 33 | 
         
            +
                    output_format = "mp3_22050_32"
         
     | 
| 34 | 
         
            +
                )
         
     | 
| 35 | 
         
            +
                elevenlabs.save(audio , output_file)
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            # text_to_speech_with_elevenlabs_old_no_autoplay_functionality(input_text=text,output_file= "output_elevenlabs.mp3")
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
            import subprocess
         
     | 
| 43 | 
         
            +
            import platform
         
     | 
| 44 | 
         
            +
            from pydub import AudioSegment
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
            def text_to_speech_with_gtts_new_autoplay(input_text , output_file):
         
     | 
| 48 | 
         
            +
                language = 'en'
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
                #create gtts object
         
     | 
| 51 | 
         
            +
                gttsObj = gTTS(
         
     | 
| 52 | 
         
            +
                    text=input_text,
         
     | 
| 53 | 
         
            +
                    lang=language,
         
     | 
| 54 | 
         
            +
                    slow=False
         
     | 
| 55 | 
         
            +
                )
         
     | 
| 56 | 
         
            +
                #save the audio file
         
     | 
| 57 | 
         
            +
                gttsObj.save(output_file)
         
     | 
| 58 | 
         
            +
             
     | 
| 59 | 
         
            +
                #converstion from mp3 to wav
         
     | 
| 60 | 
         
            +
                wav_file = output_file.replace('.mp3', '.wav')
         
     | 
| 61 | 
         
            +
                AudioSegment.from_mp3(output_file).export(wav_file,format="wav")
         
     | 
| 62 | 
         
            +
                # Optionally remove the original mp3 file
         
     | 
| 63 | 
         
            +
                # os.remove(output_file)
         
     | 
| 64 | 
         
            +
                # Play the wav file based on the operating system
         
     | 
| 65 | 
         
            +
                os_name = platform.system()
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
                try:
         
     | 
| 68 | 
         
            +
                    if os_name == "Darwin":
         
     | 
| 69 | 
         
            +
                        subprocess.run(['afplay', wav_file])
         
     | 
| 70 | 
         
            +
                    elif os_name == "Windows":
         
     | 
| 71 | 
         
            +
                        subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_file}").PlaySync();'])
         
     | 
| 72 | 
         
            +
                    elif os_name == "Linux": 
         
     | 
| 73 | 
         
            +
                        subprocess.run(['aplay', wav_file]) 
         
     | 
| 74 | 
         
            +
                    else:
         
     | 
| 75 | 
         
            +
                        raise OSError("Unsupported operating system")
         
     | 
| 76 | 
         
            +
                except Exception as e:
         
     | 
| 77 | 
         
            +
                    print(f"An error occurred while trying to play the audio: {e}")
         
     | 
| 78 | 
         
            +
             
     | 
| 79 | 
         
            +
            text = "hello this is me danish with ai voice"
         
     | 
| 80 | 
         
            +
            # text_to_speech_with_gtts_new_autoplay(input_text=text,output_file= "gtts_testing_autoplay.mp3")
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
             
     | 
| 83 | 
         
            +
            def text_to_speech_with_elevenlabs_new_autoplay(input_text , output_file):
         
     | 
| 84 | 
         
            +
                client = ElevenLabs(api_key = ELEVENLABS_API_KEY)
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
                audio = client.generate(
         
     | 
| 87 | 
         
            +
                    text = input_text,
         
     | 
| 88 | 
         
            +
                    voice = "Aria",
         
     | 
| 89 | 
         
            +
                    model = "eleven_turbo_v2",
         
     | 
| 90 | 
         
            +
                    output_format = "mp3_22050_32"
         
     | 
| 91 | 
         
            +
                )
         
     | 
| 92 | 
         
            +
                elevenlabs.save(audio , output_file)
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
            +
                wav_file = output_file.replace("mp3", "wav")
         
     | 
| 95 | 
         
            +
                AudioSegment.from_mp3(output_file).export(wav_file,format="wav")
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
                # os.remove(output_file)
         
     | 
| 98 | 
         
            +
             
     | 
| 99 | 
         
            +
             
     | 
| 100 | 
         
            +
                os_name = platform.system()
         
     | 
| 101 | 
         
            +
                try:
         
     | 
| 102 | 
         
            +
                    if os_name == "Darwin":
         
     | 
| 103 | 
         
            +
                        subprocess.run(['afplay', wav_file])
         
     | 
| 104 | 
         
            +
                    elif os_name == "Windows":
         
     | 
| 105 | 
         
            +
                        subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_file}").PlaySync();'])
         
     | 
| 106 | 
         
            +
                    elif os_name == "Linux": 
         
     | 
| 107 | 
         
            +
                        subprocess.run(['aplay', wav_file]) 
         
     | 
| 108 | 
         
            +
                    else:
         
     | 
| 109 | 
         
            +
                        raise OSError("Unsupported operating system")
         
     | 
| 110 | 
         
            +
                except Exception as e:
         
     | 
| 111 | 
         
            +
                    print(f"An error occurred while trying to play the audio: {e}")
         
     | 
| 112 | 
         
            +
             
     | 
| 113 | 
         
            +
             
     | 
| 114 | 
         
            +
            text = "hello this is me danish with ai voice"
         
     | 
| 115 | 
         
            +
            text_to_speech_with_elevenlabs_new_autoplay(input_text=text,output_file= "output_elevenlabs_autoplay.mp3")
         
     | 
    	
        voice_of_the_patient.py
    ADDED
    
    | 
         @@ -0,0 +1,59 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import logging
         
     | 
| 2 | 
         
            +
            import speech_recognition as sr
         
     | 
| 3 | 
         
            +
            from pydub import AudioSegment
         
     | 
| 4 | 
         
            +
            from io import BytesIO
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            logging.basicConfig(level=logging.INFO , format='%(asctime)s - %(levelname)s - %(message)s')
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def record_audio(file_path , duration=20 , phrase_time_limit= None):
         
     | 
| 10 | 
         
            +
                recognizer = sr.Recognizer()
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
                try:
         
     | 
| 13 | 
         
            +
                    with sr.Microphone() as source:
         
     | 
| 14 | 
         
            +
                        logging.info("Adjusting for ambient noise...")
         
     | 
| 15 | 
         
            +
                        recognizer.adjust_for_ambient_noise(source)
         
     | 
| 16 | 
         
            +
                        logging.info("Recording audio...")
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
                        # Record the audio
         
     | 
| 19 | 
         
            +
                        audio_data = recognizer.listen(source , timeout=duration , phrase_time_limit=phrase_time_limit)
         
     | 
| 20 | 
         
            +
                        logging.info("Recording complete.")
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                        # Convert the recorded audio to an MP3 file
         
     | 
| 23 | 
         
            +
                        audio_waves = audio_data.get_wav_data()
         
     | 
| 24 | 
         
            +
                        audio_segments = AudioSegment.from_wav(BytesIO(audio_waves))
         
     | 
| 25 | 
         
            +
                        audio_segments.export(file_path , format="mp3" , bitrate="128k")
         
     | 
| 26 | 
         
            +
                        logging.info(f"Audio saved to {file_path}.")
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
                except Exception as e:
         
     | 
| 29 | 
         
            +
                    logging.error(f"An error occurred while recording audio: {e}")
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            audio_filePath = "patient_voice.mp3"
         
     | 
| 32 | 
         
            +
            record_audio(file_path= audio_filePath)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
            import os
         
     | 
| 35 | 
         
            +
            from groq import Groq
         
     | 
| 36 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            load_dotenv()
         
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
            GROQ_API_KEY = os.getenv("GROQ_API_KEY")
         
     | 
| 42 | 
         
            +
            sst_model = "whisper-large-v3"
         
     | 
| 43 | 
         
            +
             
     | 
| 44 | 
         
            +
            def transcription_with_groq(audio_filePath , sst_model , GROQ_API_KEY):
         
     | 
| 45 | 
         
            +
                try:
         
     | 
| 46 | 
         
            +
                    groq_client = Groq(api_key = GROQ_API_KEY)
         
     | 
| 47 | 
         
            +
                    audio_file = open(audio_filePath , "rb")
         
     | 
| 48 | 
         
            +
                    transcription = groq_client.audio.transcriptions.create(
         
     | 
| 49 | 
         
            +
                        model = sst_model,
         
     | 
| 50 | 
         
            +
                        file = audio_file,
         
     | 
| 51 | 
         
            +
                        language = "en"
         
     | 
| 52 | 
         
            +
                    )
         
     | 
| 53 | 
         
            +
                    logging.info("Transcription complete.")
         
     | 
| 54 | 
         
            +
                    print(transcription.text)
         
     | 
| 55 | 
         
            +
                    return transcription.text
         
     | 
| 56 | 
         
            +
                except Exception as e:
         
     | 
| 57 | 
         
            +
                    logging.error(f"An error occurred during transcription: {e}")
         
     | 
| 58 | 
         
            +
             
     | 
| 59 | 
         
            +
            transcription_with_groq(audio_filePath=audio_filePath , sst_model=sst_model,GROQ_API_KEY=GROQ_API_KEY)
         
     |