Spaces:

gpaasch
/

MedCodeMCP

Running

App Files Files Community

MedCodeMCP / app.py

gpaasch

managing token limit

6914fb8 12 days ago

raw

history blame contribute delete

8.61 kB

	import gradio as gr
	from utils.model_configuration_utils import select_best_model, ensure_model
	from services.llm import build_llm
	from utils.voice_input_utils import update_live_transcription, format_response_for_user
	from services.embeddings import configure_embeddings
	from services.indexing import create_symptom_index
	import torchaudio.transforms as T
	import re
	import logging, sys
	import json
	from llama_cpp import Llama


	logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
	logger = logging.getLogger(__name__)


	# ========== Model setup ==========
	MODEL_NAME, REPO_ID = select_best_model()
	model_path = ensure_model()
	print(f"Using model: {MODEL_NAME} from {REPO_ID}", flush=True)
	print(f"Model path: {model_path}", flush=True)

	# ========== LLM initialization ==========
	print("\n<<< before build_llm: ", flush=True)
	llm = build_llm(model_path)
	print(">>> after build_llm", flush=True)

	# ========== Embeddings & index setup ==========
	print("\n<<< before configure_embeddings: ", flush=True)
	configure_embeddings()
	print(">>> after configure_embeddings", flush=True)
	print("Embeddings configured and ready", flush=True)

	print("\n<<< before create_symptom_index: ", flush=True)
	symptom_index = create_symptom_index()
	print(">>> after create_symptom_index", flush=True)
	print("Symptom index built successfully. Ready for queries.", flush=True)

	# ========== Prompt template ==========
	SYSTEM_PROMPT = (
	"You are a medical assistant helping a user find the most relevant ICD-10 code based on their symptoms.\n",
	"At each turn, determine the top three most relevant ICD-10 codes based on input from the user.\n",
	"Assign a confidence score from 1 to 100 for each code you decided was relevant.\n",
	"Asking a question to the user to raise or lower your confidence score for each code.\n",
	"Replace low-confidence codes with new ones as you learn more.\n",
	"Your goal is to find the most relevant codes with high confidence.\n",
	"When you have high confidence in a code, provide it to the user.\n",
	"Maintain a conversational tone and explain your reasoning step by step.\n",
	"If you need more information, ask the user clarifying questions.\n",
	"End your response with a summary of the top codes and their confidence scores.\n",
	"If you need to ask the user a follow-up question, do so clearly.\n",
	)

	def truncate_prompt(prompt, max_tokens=2048):
	# Use your model's tokenizer here; this is a placeholder
	tokens = prompt.split() # Replace with actual tokenization
	if len(tokens) > max_tokens:
	tokens = tokens[:max_tokens]
	return " ".join(tokens)

	# Initialize your model (adjust path and params as needed)
	llm = Llama(model_path=model_path)

	def truncate_prompt_llama(prompt, max_tokens=2048):
	# Tokenize the prompt using llama_cpp's tokenizer
	tokens = llm.tokenize(prompt.encode("utf-8"))
	if len(tokens) > max_tokens:
	# Truncate tokens and decode back to string
	tokens = tokens[:max_tokens]
	prompt = llm.detokenize(tokens).decode("utf-8", errors="ignore")
	return prompt

	# ========== Generator handler ==========
	def on_submit(symptoms_text, history):
	log = []
	print("on_submit called", flush=True)

	# Placeholder
	msg = "🔍 Received input"
	log.append(msg)
	print(msg, flush=True)
	history = history + [{"role": "assistant", "content": "Processing your request..."}]
	yield history, None, "\n".join(log)

	# Validate
	if not symptoms_text.strip():
	msg = "❌ No symptoms provided"
	log.append(msg)
	print(msg, flush=True)
	result = {"error": "No input provided", "diagnoses": [], "confidences": [], "follow_up": []}
	yield history, result, "\n".join(log)
	return

	# Clean input
	cleaned = symptoms_text.strip()
	msg = f"🔄 Cleaned text: {cleaned}"
	log.append(msg)
	print(msg, flush=True)
	yield history, None, "\n".join(log)

	# Semantic query
	msg = "🔍 Running semantic query"
	log.append(msg)
	print(msg, flush=True)
	yield history, None, "\n".join(log)

	qe = symptom_index.as_query_engine(retriever_kwargs={"similarity_top_k": 5})
	hits = qe.query(cleaned)
	msg = f"🔍 Retrieved context entries"
	log.append(msg)
	print(msg, flush=True)
	history = history + [{"role": "assistant", "content": msg}]
	yield history, None, "\n".join(log)

	# Build prompt with minimal context
	context_list = []
	for node in getattr(hits, 'source_nodes', [])[:3]:
	md = getattr(node, 'metadata', {}) or {}
	context_list.append(f"{md.get('code','')}: {md.get('description','')}")
	context_text = "\n".join(context_list)
	prompt = "\n".join([
	f"{SYSTEM_PROMPT}",
	f"User symptoms: '{cleaned}'",
	f"Relevant ICD-10 context:\n{context_text}",
	])
	prompt = truncate_prompt_llama(prompt, max_tokens=2048)

	msg = "✏️ Prompt built"
	log.append(msg)
	print(msg, flush=True)
	yield history, None, "\n".join(log)

	# Call LLM
	response = llm(prompt=prompt)
	raw = response
	# Extract text from CompletionResponse if needed
	if hasattr(raw, "text"):
	raw = raw.text
	elif hasattr(raw, "content"):
	raw = raw.content
	# Now ensure it's a dict
	if isinstance(raw, str):
	try:
	raw = json.loads(raw)
	except Exception:
	raw = {"diagnoses": [], "confidences": [], "follow_up": raw}
	assistant_msg = format_response_for_user(raw)
	history = history + [{"role": "assistant", "content": assistant_msg}]
	msg = "✅ Final response appended"
	log.append(msg)
	print(msg, flush=True)
	yield history, raw, "\n".join(log)

	# ========== Gradio UI ==========
	with gr.Blocks(theme="default") as demo:
	gr.Markdown("""
	# 🏥 Medical Symptom to ICD-10 Code Assistant
	## Describe symptoms by typing or speaking.
	Debug log updates live below.
	"""
	)
	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	label="Type your symptoms",
	placeholder="I'm feeling under the weather...",
	lines=3
	)
	microphone = gr.Audio(
	sources=["microphone"],
	streaming=True,
	type="numpy",
	label="Or speak your symptoms..."
	)
	submit_btn = gr.Button("Submit", variant="primary")
	clear_btn = gr.Button("Clear Chat", variant="secondary")
	chatbot = gr.Chatbot(
	label="Medical Consultation",
	height=500,
	type="messages"
	)
	json_output = gr.JSON(label="Diagnosis JSON")
	debug_box = gr.Textbox(label="Debug log", lines=10)
	with gr.Column(scale=1):
	with gr.Accordion("API Keys (optional)", open=False):
	api_key = gr.Textbox(label="OpenAI Key", type="password")
	model_selector = gr.Dropdown(
	choices=["OpenAI","Modal","Anthropic","MistralAI","Nebius","Hyperbolic","SambaNova"],
	value="OpenAI",
	label="Model Provider"
	)
	temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")

	# Bindings
	submit_btn.click(
	fn=on_submit,
	inputs=[text_input, chatbot],
	outputs=[chatbot, json_output, debug_box],
	queue=True
	)
	clear_btn.click(
	lambda: (None, {}, ""),
	None,
	[chatbot, json_output, debug_box],
	queue=False
	)
	microphone.stream(
	fn=update_live_transcription,
	inputs=[microphone],
	outputs=[text_input],
	queue=True
	)

	# --- About the Creator ---
	gr.Markdown("""
	---
	### 👋 About the Creator

	Hi! I'm Graham Paasch, an experienced technology professional!

	🎥 Check out my YouTube channel for more tech content:
	[Subscribe to my channel](https://www.youtube.com/channel/UCg3oUjrSYcqsL9rGk1g_lPQ)

	💼 Looking for a skilled developer?
	I'm currently seeking new opportunities! View my experience and connect on [LinkedIn](https://www.linkedin.com/in/grahampaasch/)

	⭐ If you found this tool helpful, please consider:
	- Subscribing to my YouTube channel
	- Connecting on LinkedIn
	- Sharing this tool with others in healthcare tech
	"""
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_api=True, mcp_server=True)