Spaces:

DanishICUP
/

ai-doctor

Running

App Files Files Community

ai-doctor / app.py

DanishICUP

Update app.py

90e676d verified 8 days ago

raw

history blame contribute delete

3.28 kB

	from dotenv import load_dotenv
	import os
	import gradio as gr
	from brain_of_doctor import load_image, analyze_image_with_query
	from voice_of_the_patient import record_audio, transcription_with_groq
	from voice_of_the_doctor import (
	text_to_speech_with_gtts_new_autoplay,
	)
	from deep_translator import GoogleTranslator
	from gtts import gTTS

	load_dotenv()

	system_prompt = """You have to act as a professional doctor, i know you are not but this is for learning purpose.
	What's in this image?. Do you find anything wrong with it medically?
	If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
	your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
	Donot say 'In the image I see' but say 'With what I see, I think you have ....'
	Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
	Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""

	# ✅ Helper for Urdu TTS (using gTTS)
	def tts_with_language_support(text, output_file="final.wav", lang="en"):
	try:
	tts = gTTS(text=text, lang=lang)
	tts.save(output_file)
	return output_file
	except Exception as e:
	print("TTS generation failed:", e)
	return None


	def process_inputs(audio_filePath, image_filePath, language):
	# Speech → text
	speech_to_text_output = transcription_with_groq(
	GROQ_API_KEY= os.getenv("GROQ_API_KEY"),
	audio_filePath=audio_filePath,
	sst_model="whisper-large-v3"
	)

	# Image analysis
	if image_filePath:
	doctor_response_en = analyze_image_with_query(
	query=system_prompt + speech_to_text_output,
	encoded_image=load_image(image_filePath),
	model="meta-llama/llama-4-scout-17b-16e-instruct"
	)
	else:
	doctor_response_en = "No image provided for me to analyze"

	# Translation + voice selection
	if language == "Urdu":
	# Translate to Urdu
	doctor_response = GoogleTranslator(source='en', target='ur').translate(doctor_response_en)
	# Generate Urdu voice with gTTS
	output_audio = tts_with_language_support(text=doctor_response, output_file="final.wav", lang="ur")
	else:
	# English voice via ElevenLabs
	doctor_response = doctor_response_en
	output_audio = text_to_speech_with_gtts_new_autoplay(
	input_text=doctor_response,
	output_file="final.wav"
	)
	output_audio= "final.wav"

	return speech_to_text_output, doctor_response, output_audio


	# ✅ Gradio Interface
	iface = gr.Interface(
	fn=process_inputs,
	inputs=[
	gr.Audio(sources=["microphone"], type="filepath", label="Patient's Voice"),
	gr.Image(type="filepath", label="Upload Medical Image"),
	gr.Radio(choices=["English", "Urdu"], value="English", label="Select Language")
	],
	outputs=[
	gr.Textbox(label="Speech to Text"),
	gr.Textbox(label="Doctor's Response"),
	gr.Audio(label="Doctor's Voice (Auto Play)", autoplay=True, type="filepath")
	],
	title="AI Doctor — Developed by Danish Khan"
	)

	iface.launch(debug=True)