Spaces:

hponepyae
/

codewithmedgemma

Sleeping

App Files Files Community

hponepyae commited on Jun 17

Commit

4334aa5

verified ·

1 Parent(s): 9f24600

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -30,8 +30,8 @@ except Exception as e:
 @spaces.GPU()
 def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
     """
-    Analyzes symptoms by MANUALLY constructing the prompt string to ensure all special
-    tokens are correctly placed, bypassing the faulty chat template abstraction.
     """
     if not model_loaded:
         return "Error: The AI model could not be loaded. Please check the Space logs."
@@ -41,51 +41,56 @@ def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
         return "Please describe your symptoms or upload an image for analysis."
     try:
-        # --- DEFINITIVE MANUAL PROMPT CONSTRUCTION ---
         system_instruction = (
             "You are an expert, empathetic AI medical assistant. "
             "Analyze the potential medical condition based on the following information. "
             "Provide a list of possible conditions, your reasoning, and a clear, actionable next-steps plan."
         )
-        # 1. Manually build the prompt string as a list of parts.
-        prompt_parts = ["<start_of_turn>user"]
-        # 2. CRUCIAL: Add the <image> placeholder *only* if an image exists.
         if symptom_image:
-            prompt_parts.append("<image>")
-        # 3. Add all text content.
-        prompt_parts.append(f"{symptoms_text}\n\n{system_instruction}")
-        # 4. Signal the start of the model's turn.
-        prompt_parts.append("<start_of_turn>model")
-        # 5. Join all parts into a single string. This is our final prompt.
-        prompt = "\n".join(prompt_parts)
-        # 6. Use the processor with our manually built prompt. It will now find the <image>
-        # token and correctly process the associated image object.
         inputs = processor(
             text=prompt,
-            images=symptom_image,  # This will be None for text-only, which is now handled correctly.
             return_tensors="pt"
         ).to(model.device)
-        # 7. Generation parameters
         generate_kwargs = {
             "max_new_tokens": 512,
             "do_sample": True,
             "temperature": 0.7,
         }
-        print("Generating model output with manually constructed prompt...")
-        # 8. Generate the response
         generate_ids = model.generate(**inputs, **generate_kwargs)
-        # 9. Decode only the newly generated tokens. This logic is correct.
         input_token_len = inputs["input_ids"].shape[-1]
         result = processor.batch_decode(generate_ids[:, input_token_len:], skip_special_tokens=True)[0]

 @spaces.GPU()
 def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
     """
+    Analyzes symptoms using the definitive two-step templating and processing method
+    required by modern multimodal chat models.
     """
     if not model_loaded:
         return "Error: The AI model could not be loaded. Please check the Space logs."
         return "Please describe your symptoms or upload an image for analysis."
     try:
+        # --- STEP 1: Build the structured messages list ---
         system_instruction = (
             "You are an expert, empathetic AI medical assistant. "
             "Analyze the potential medical condition based on the following information. "
             "Provide a list of possible conditions, your reasoning, and a clear, actionable next-steps plan."
         )
+        # The 'content' for a user's turn is a LIST of dictionaries.
+        user_content_list = []
         if symptom_image:
+            # Add a placeholder dictionary for the image.
+            user_content_list.append({"type": "image"})
+        # Add the dictionary for the text.
+        text_content = f"{symptoms_text}\n\n{system_instruction}"
+        user_content_list.append({"type": "text", "text": text_content})
+        messages = [
+            {"role": "user", "content": user_content_list}
+        ]
+        # --- STEP 2: Generate the prompt string using the official template ---
+        # This will correctly create a string with all special tokens, including <image>.
+        prompt = processor.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        # --- STEP 3: Process the prompt string and image together ---
+        # This is where the prompt's <image> token is linked to the actual image data.
         inputs = processor(
             text=prompt,
+            images=symptom_image, # This can be None for text-only cases
             return_tensors="pt"
         ).to(model.device)
+        # Generation parameters
         generate_kwargs = {
             "max_new_tokens": 512,
             "do_sample": True,
             "temperature": 0.7,
         }
+        print("Generating model output with the definitive two-step process...")
+        # Generate the response
         generate_ids = model.generate(**inputs, **generate_kwargs)
+        # Decode only the newly generated tokens
         input_token_len = inputs["input_ids"].shape[-1]
         result = processor.batch_decode(generate_ids[:, input_token_len:], skip_special_tokens=True)[0]