Spaces:
Running
Running
| from dotenv import load_dotenv | |
| import os | |
| import gradio as gr | |
| from brain_of_doctor import load_image, analyze_image_with_query | |
| from voice_of_the_patient import record_audio, transcription_with_groq | |
| from voice_of_the_doctor import ( | |
| text_to_speech_with_gtts_new_autoplay, | |
| ) | |
| from deep_translator import GoogleTranslator | |
| from gtts import gTTS | |
| load_dotenv() | |
| system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose. | |
| What's in this image?. Do you find anything wrong with it medically? | |
| If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in | |
| your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person. | |
| Donot say 'In the image I see' but say 'With what I see, I think you have ....' | |
| Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot, | |
| Keep your answer concise (max 2 sentences). No preamble, start your answer right away please""" | |
| # β Helper for Urdu TTS (using gTTS) | |
| def tts_with_language_support(text, output_file="final.wav", lang="en"): | |
| try: | |
| tts = gTTS(text=text, lang=lang) | |
| tts.save(output_file) | |
| return output_file | |
| except Exception as e: | |
| print("TTS generation failed:", e) | |
| return None | |
| def process_inputs(audio_filePath, image_filePath, language): | |
| # Speech β text | |
| speech_to_text_output = transcription_with_groq( | |
| GROQ_API_KEY= os.getenv("GROQ_API_KEY"), | |
| audio_filePath=audio_filePath, | |
| sst_model="whisper-large-v3" | |
| ) | |
| # Image analysis | |
| if image_filePath: | |
| doctor_response_en = analyze_image_with_query( | |
| query=system_prompt + speech_to_text_output, | |
| encoded_image=load_image(image_filePath), | |
| model="meta-llama/llama-4-scout-17b-16e-instruct" | |
| ) | |
| else: | |
| doctor_response_en = "No image provided for me to analyze" | |
| # Translation + voice selection | |
| if language == "Urdu": | |
| # Translate to Urdu | |
| doctor_response = GoogleTranslator(source='en', target='ur').translate(doctor_response_en) | |
| # Generate Urdu voice with gTTS | |
| output_audio = tts_with_language_support(text=doctor_response, output_file="final.wav", lang="ur") | |
| else: | |
| # English voice via ElevenLabs | |
| doctor_response = doctor_response_en | |
| output_audio = text_to_speech_with_gtts_new_autoplay( | |
| input_text=doctor_response, | |
| output_file="final.wav" | |
| ) | |
| output_audio= "final.wav" | |
| return speech_to_text_output, doctor_response, output_audio | |
| # β Gradio Interface | |
| iface = gr.Interface( | |
| fn=process_inputs, | |
| inputs=[ | |
| gr.Audio(sources=["microphone"], type="filepath", label="Patient's Voice"), | |
| gr.Image(type="filepath", label="Upload Medical Image"), | |
| gr.Radio(choices=["English", "Urdu"], value="English", label="Select Language") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Speech to Text"), | |
| gr.Textbox(label="Doctor's Response"), | |
| gr.Audio(label="Doctor's Voice (Auto Play)", autoplay=True, type="filepath") | |
| ], | |
| title="AI Doctor β Developed by Danish Khan" | |
| ) | |
| iface.launch(debug=True) | |