Spaces:

DanishICUP
/

ai-doctor

Running

App Files Files Community

DanishICUP commited on 9 days ago

Commit

17a5b4a

verified ·

1 Parent(s): cd2c5e9

Upload 5 files

Browse files

Files changed (5) hide show

app.py +86 -0
brain_of_doctor.py +45 -0
requirements.txt +0 -0
voice_of_the_doctor.py +115 -0
voice_of_the_patient.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from dotenv import load_dotenv
+import os
+import gradio as gr
+from brain_of_doctor import load_image, analyze_image_with_query
+from voice_of_the_patient import record_audio, transcription_with_groq
+from voice_of_the_doctor import (
+    text_to_speech_with_gtts_new_autoplay,
+    text_to_speech_with_elevenlabs_new_autoplay
+)
+from deep_translator import GoogleTranslator
+from gtts import gTTS
+load_dotenv()
+system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
+What's in this image?. Do you find anything wrong with it medically?
+If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
+your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
+Donot say 'In the image I see' but say 'With what I see, I think you have ....'
+Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
+Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
+# ✅ Helper for Urdu TTS (using gTTS)
+def tts_with_language_support(text, output_file="final.wav", lang="en"):
+    try:
+        tts = gTTS(text=text, lang=lang)
+        tts.save(output_file)
+        return output_file
+    except Exception as e:
+        print("TTS generation failed:", e)
+        return None
+def process_inputs(audio_filePath, image_filePath, language):
+    # Speech → text
+    speech_to_text_output = transcription_with_groq(
+        GROQ_API_KEY= os.getenv("GROQ_API_KEY"),
+        audio_filePath=audio_filePath,
+        sst_model="whisper-large-v3"
+    )
+    # Image analysis
+    if image_filePath:
+        doctor_response_en = analyze_image_with_query(
+            query=system_prompt + speech_to_text_output,
+            encoded_image=load_image(image_filePath),
+            model="meta-llama/llama-4-scout-17b-16e-instruct"
+        )
+    else:
+        doctor_response_en = "No image provided for me to analyze"
+    # Translation + voice selection
+    if language == "Urdu":
+        # Translate to Urdu
+        doctor_response = GoogleTranslator(source='en', target='ur').translate(doctor_response_en)
+        # Generate Urdu voice with gTTS
+        output_audio = tts_with_language_support(text=doctor_response, output_file="final.wav", lang="ur")
+    else:
+        # English voice via ElevenLabs
+        doctor_response = doctor_response_en
+        output_audio = text_to_speech_with_gtts_new_autoplay(
+            input_text=doctor_response,
+            output_file="final.wav"
+        )
+        output_audio= "final.wav"
+    return speech_to_text_output, doctor_response, output_audio
+# ✅ Gradio Interface
+iface = gr.Interface(
+    fn=process_inputs,
+    inputs=[
+        gr.Audio(sources=["microphone"], type="filepath", label="Patient's Voice"),
+        gr.Image(type="filepath", label="Upload Medical Image"),
+        gr.Radio(choices=["English", "Urdu"], value="English", label="Select Language")
+    ],
+    outputs=[
+        gr.Textbox(label="Speech to Text"),
+        gr.Textbox(label="Doctor's Response"),
+        gr.Audio(label="Doctor's Voice (Auto Play)", autoplay=True, type="filepath")
+    ],
+    title="AI Doctor — Developed by Danish Khan"
+)
+iface.launch(debug=True)

brain_of_doctor.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from dotenv import load_dotenv
+import base64
+from groq import Groq
+load_dotenv()
+def load_image(image_path):
+    image_file = open(image_path, "rb")
+    return base64.b64encode(image_file.read()).decode("utf-8")
+query = "Describe the condition of face briefly"
+model = "meta-llama/llama-4-scout-17b-16e-instruct"
+def analyze_image_with_query(model, query , encoded_image):
+    client = Groq()
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": query
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{encoded_image}"
+                    },
+                },
+            ],
+        }
+    ]
+    chat_completion = client.chat.completions.create(
+        model=model,
+        messages=messages,
+    )
+    print("Loading image...")
+    print(chat_completion.choices[0].message.content)
+    return chat_completion.choices[0].message.content
+print(analyze_image_with_query(model, query, load_image("acne.jpg")))

requirements.txt ADDED Viewed

File without changes

voice_of_the_doctor.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+from gtts import gTTS
+from dotenv import load_dotenv
+load_dotenv()
+def text_to_speech_with_gtts_old_no_autoplay_functionality(input_text , output_file):
+    language = 'en'
+    gttsObj = gTTS(
+        text=input_text,
+        lang=language,
+        slow=False
+    )
+    gttsObj.save(output_file)
+text = "hello this is me danish with ai voice"
+# text_to_speech_with_gtts_old_no_autoplay_functionality(input_text=text,output_file= "output_gtts.mp3")
+import elevenlabs
+from elevenlabs.client import ElevenLabs
+ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
+def text_to_speech_with_elevenlabs_old_no_autoplay_functionality(input_text , output_file):
+    client = ElevenLabs(api_key = ELEVENLABS_API_KEY)
+    audio = client.generate(
+        text = input_text,
+        voice = "Aria",
+        model = "eleven_turbo_v2",
+        output_format = "mp3_22050_32"
+    )
+    elevenlabs.save(audio , output_file)
+# text_to_speech_with_elevenlabs_old_no_autoplay_functionality(input_text=text,output_file= "output_elevenlabs.mp3")
+import subprocess
+import platform
+from pydub import AudioSegment
+def text_to_speech_with_gtts_new_autoplay(input_text , output_file):
+    language = 'en'
+    #create gtts object
+    gttsObj = gTTS(
+        text=input_text,
+        lang=language,
+        slow=False
+    )
+    #save the audio file
+    gttsObj.save(output_file)
+    #converstion from mp3 to wav
+    wav_file = output_file.replace('.mp3', '.wav')
+    AudioSegment.from_mp3(output_file).export(wav_file,format="wav")
+    # Optionally remove the original mp3 file
+    # os.remove(output_file)
+    # Play the wav file based on the operating system
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":
+            subprocess.run(['afplay', wav_file])
+        elif os_name == "Windows":
+            subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_file}").PlaySync();'])
+        elif os_name == "Linux":
+            subprocess.run(['aplay', wav_file])
+        else:
+            raise OSError("Unsupported operating system")
+    except Exception as e:
+        print(f"An error occurred while trying to play the audio: {e}")
+text = "hello this is me danish with ai voice"
+# text_to_speech_with_gtts_new_autoplay(input_text=text,output_file= "gtts_testing_autoplay.mp3")
+def text_to_speech_with_elevenlabs_new_autoplay(input_text , output_file):
+    client = ElevenLabs(api_key = ELEVENLABS_API_KEY)
+    audio = client.generate(
+        text = input_text,
+        voice = "Aria",
+        model = "eleven_turbo_v2",
+        output_format = "mp3_22050_32"
+    )
+    elevenlabs.save(audio , output_file)
+    wav_file = output_file.replace("mp3", "wav")
+    AudioSegment.from_mp3(output_file).export(wav_file,format="wav")
+    # os.remove(output_file)
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":
+            subprocess.run(['afplay', wav_file])
+        elif os_name == "Windows":
+            subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_file}").PlaySync();'])
+        elif os_name == "Linux":
+            subprocess.run(['aplay', wav_file])
+        else:
+            raise OSError("Unsupported operating system")
+    except Exception as e:
+        print(f"An error occurred while trying to play the audio: {e}")
+text = "hello this is me danish with ai voice"
+text_to_speech_with_elevenlabs_new_autoplay(input_text=text,output_file= "output_elevenlabs_autoplay.mp3")

voice_of_the_patient.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import logging
+import speech_recognition as sr
+from pydub import AudioSegment
+from io import BytesIO
+logging.basicConfig(level=logging.INFO , format='%(asctime)s - %(levelname)s - %(message)s')
+def record_audio(file_path , duration=20 , phrase_time_limit= None):
+    recognizer = sr.Recognizer()
+    try:
+        with sr.Microphone() as source:
+            logging.info("Adjusting for ambient noise...")
+            recognizer.adjust_for_ambient_noise(source)
+            logging.info("Recording audio...")
+            # Record the audio
+            audio_data = recognizer.listen(source , timeout=duration , phrase_time_limit=phrase_time_limit)
+            logging.info("Recording complete.")
+            # Convert the recorded audio to an MP3 file
+            audio_waves = audio_data.get_wav_data()
+            audio_segments = AudioSegment.from_wav(BytesIO(audio_waves))
+            audio_segments.export(file_path , format="mp3" , bitrate="128k")
+            logging.info(f"Audio saved to {file_path}.")
+    except Exception as e:
+        logging.error(f"An error occurred while recording audio: {e}")
+audio_filePath = "patient_voice.mp3"
+record_audio(file_path= audio_filePath)
+import os
+from groq import Groq
+from dotenv import load_dotenv
+load_dotenv()
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+sst_model = "whisper-large-v3"
+def transcription_with_groq(audio_filePath , sst_model , GROQ_API_KEY):
+    try:
+        groq_client = Groq(api_key = GROQ_API_KEY)
+        audio_file = open(audio_filePath , "rb")
+        transcription = groq_client.audio.transcriptions.create(
+            model = sst_model,
+            file = audio_file,
+            language = "en"
+        )
+        logging.info("Transcription complete.")
+        print(transcription.text)
+        return transcription.text
+    except Exception as e:
+        logging.error(f"An error occurred during transcription: {e}")
+transcription_with_groq(audio_filePath=audio_filePath , sst_model=sst_model,GROQ_API_KEY=GROQ_API_KEY)