Spaces:

hponepyae
/

codewithmedgemma

Sleeping

App Files Files Community

hponepyae commited on Jun 16

Commit

a91bbfc

verified ·

1 Parent(s): 1fa102b

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -44

app.py CHANGED Viewed

@@ -5,11 +5,12 @@ import torch
 import os
 import spaces
-# --- Initialize the Model Pipeline (No changes here) ---
 print("Loading MedGemma model...")
 try:
     pipe = pipeline(
-        "image-text-to-text",
         model="google/medgemma-4b-it",
         torch_dtype=torch.bfloat16,
         device_map="auto",
@@ -21,11 +22,12 @@ except Exception as e:
     model_loaded = False
     print(f"Error loading model: {e}")
-# --- Core Analysis Function (Corrected) ---
 @spaces.GPU()
-def analyze_symptoms(symptom_image, symptoms_text):
     """
-    Analyzes user's symptoms using the correct prompt format and keyword arguments for MedGemma.
     """
     if not model_loaded:
         return "Error: The AI model could not be loaded. Please check the Space logs."
@@ -35,51 +37,62 @@ def analyze_symptoms(symptom_image, symptoms_text):
         return "Please describe your symptoms or upload an image for analysis."
     try:
-        # --- PROMPT LOGIC (Unchanged) ---
-        instruction = (
             "You are an expert, empathetic AI medical assistant. "
-            "Analyze the potential medical condition based on the following information. "
             "Provide a list of possible conditions, your reasoning, and a clear, "
-            "actionable next-steps plan. Start your analysis by describing the user-provided "
-            "information (text and/or image)."
         )
-        prompt_parts = ["<start_of_turn>user"]
-        if symptoms_text:
-            prompt_parts.append(symptoms_text)
         if symptom_image:
-            prompt_parts.append("<image>")
-        prompt_parts.append(instruction)
-        prompt_parts.append("<start_of_turn>model")
-        prompt = "\n".join(prompt_parts)
-        print("Generating pipeline output...")
-        # --- CORRECTED & ROBUST PIPELINE CALL ---
-        # We build a dictionary of all arguments to pass to the pipeline.
-        # This avoids the TypeError by ensuring all arguments are passed explicitly by keyword.
-        pipeline_args = {
-            "prompt": prompt,
-            "max_new_tokens": 512,
-            "do_sample": True,
-            "temperature": 0.7
-        }
-        # The `images` argument should be a list of PIL Images.
-        # We only add it to our arguments dictionary if an image is provided.
-        if symptom_image:
-            pipeline_args["images"] = [symptom_image]
-        # We use the ** syntax to unpack the dictionary into keyword arguments.
-        # This results in a call like: pipe(prompt=..., images=..., max_new_tokens=...)
-        output = pipe(**pipeline_args)
         print("Pipeline Output:", output)
-        # --- SIMPLIFIED OUTPUT PROCESSING (Unchanged) ---
-        if output and isinstance(output, list) and 'generated_text' in output[0]:
-            full_text = output[0]['generated_text']
-            result = full_text.split("<start_of_turn>model\n")[-1]
         else:
             result = "The model did not return a valid response. Please try again."
@@ -91,7 +104,8 @@ def analyze_symptoms(symptom_image, symptoms_text):
         print(f"An exception occurred during analysis: {type(e).__name__}: {e}")
         return f"An error occurred during analysis. Please check the logs for details: {str(e)}"
-# --- Gradio Interface (No changes needed) ---
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Symptom Analyzer") as demo:
     gr.HTML("""
         <div style="text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 10px; margin-bottom: 2rem;">

 import os
 import spaces
+# --- Initialize the Model Pipeline (No changes) ---
 print("Loading MedGemma model...")
 try:
+    # Using "image-to-text" is more robust for modern multimodal chat models.
     pipe = pipeline(
+        "image-to-text",
         model="google/medgemma-4b-it",
         torch_dtype=torch.bfloat16,
         device_map="auto",
     model_loaded = False
     print(f"Error loading model: {e}")
+# --- Core Analysis Function (Final Corrected Version) ---
 @spaces.GPU()
+def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
     """
+    Analyzes user's symptoms using the officially recommended chat format
+    for the MedGemma multimodal model.
     """
     if not model_loaded:
         return "Error: The AI model could not be loaded. Please check the Space logs."
         return "Please describe your symptoms or upload an image for analysis."
     try:
+        # --- DEFINITIVE CHAT-BASED PROMPT LOGIC ---
+        # 1. System Prompt: This sets the AI's persona and overall goal.
+        system_instruction = (
             "You are an expert, empathetic AI medical assistant. "
+            "Analyze the potential medical condition based on the user's input. "
             "Provide a list of possible conditions, your reasoning, and a clear, "
+            "actionable next-steps plan. Begin your analysis by describing the information "
+            "the user provided."
         )
+        # 2. User Content: This must be a list of dictionaries for multimodal input.
+        user_content = []
+        # The model requires some form of text. If the user provides none,
+        # we add a generic prompt to accompany the image.
+        text_to_send = symptoms_text if symptoms_text else "Please analyze this medical image."
+        user_content.append({"type": "text", "text": text_to_send})
+        # Add the image part if it exists.
         if symptom_image:
+            user_content.append({"type": "image", "image": symptom_image})
+        # 3. Construct the full message list for the pipeline
+        messages = [
+            {"role": "system", "content": system_instruction},
+            {"role": "user", "content": user_content},
+        ]
+        print("Generating pipeline output with chat format...")
+        # --- CORRECTED PIPELINE CALL ---
+        # Pass the `messages` list directly. The pipeline's processor, which knows
+        # the model's chat template, will format it correctly.
+        output = pipe(
+            messages,
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.7
+        )
         print("Pipeline Output:", output)
+        # --- ROBUST OUTPUT PROCESSING ---
+        # The output from a chat-templated pipeline call is a list containing the full
+        # conversation history, including the newly generated assistant message.
+        if output and isinstance(output, list) and output[0].get('generated_text'):
+            # The generated_text contains the full conversation history
+            full_conversation = output[0]['generated_text']
+            # The last message in the list is the AI's response.
+            assistant_message = full_conversation[-1]
+            if assistant_message['role'] == 'assistant':
+                result = assistant_message['content']
+            else:
+                # Fallback in case the last message isn't from the assistant
+                result = str(assistant_message)
         else:
             result = "The model did not return a valid response. Please try again."
         print(f"An exception occurred during analysis: {type(e).__name__}: {e}")
         return f"An error occurred during analysis. Please check the logs for details: {str(e)}"
+# --- Create the Gradio Interface (No changes needed) ---
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Symptom Analyzer") as demo:
     gr.HTML("""
         <div style="text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 10px; margin-bottom: 2rem;">