hponepyae commited on
Commit
4334aa5
·
verified ·
1 Parent(s): 9f24600

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -30,8 +30,8 @@ except Exception as e:
30
  @spaces.GPU()
31
  def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
32
  """
33
- Analyzes symptoms by MANUALLY constructing the prompt string to ensure all special
34
- tokens are correctly placed, bypassing the faulty chat template abstraction.
35
  """
36
  if not model_loaded:
37
  return "Error: The AI model could not be loaded. Please check the Space logs."
@@ -41,51 +41,56 @@ def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
41
  return "Please describe your symptoms or upload an image for analysis."
42
 
43
  try:
44
- # --- DEFINITIVE MANUAL PROMPT CONSTRUCTION ---
45
-
46
  system_instruction = (
47
  "You are an expert, empathetic AI medical assistant. "
48
  "Analyze the potential medical condition based on the following information. "
49
  "Provide a list of possible conditions, your reasoning, and a clear, actionable next-steps plan."
50
  )
51
 
52
- # 1. Manually build the prompt string as a list of parts.
53
- prompt_parts = ["<start_of_turn>user"]
54
-
55
- # 2. CRUCIAL: Add the <image> placeholder *only* if an image exists.
56
  if symptom_image:
57
- prompt_parts.append("<image>")
58
-
59
- # 3. Add all text content.
60
- prompt_parts.append(f"{symptoms_text}\n\n{system_instruction}")
61
 
62
- # 4. Signal the start of the model's turn.
63
- prompt_parts.append("<start_of_turn>model")
64
-
65
- # 5. Join all parts into a single string. This is our final prompt.
66
- prompt = "\n".join(prompt_parts)
 
 
 
 
 
 
 
 
 
 
67
 
68
- # 6. Use the processor with our manually built prompt. It will now find the <image>
69
- # token and correctly process the associated image object.
70
  inputs = processor(
71
  text=prompt,
72
- images=symptom_image, # This will be None for text-only, which is now handled correctly.
73
  return_tensors="pt"
74
  ).to(model.device)
75
 
76
- # 7. Generation parameters
77
  generate_kwargs = {
78
  "max_new_tokens": 512,
79
  "do_sample": True,
80
  "temperature": 0.7,
81
  }
82
 
83
- print("Generating model output with manually constructed prompt...")
84
 
85
- # 8. Generate the response
86
  generate_ids = model.generate(**inputs, **generate_kwargs)
87
 
88
- # 9. Decode only the newly generated tokens. This logic is correct.
89
  input_token_len = inputs["input_ids"].shape[-1]
90
  result = processor.batch_decode(generate_ids[:, input_token_len:], skip_special_tokens=True)[0]
91
 
 
30
  @spaces.GPU()
31
  def analyze_symptoms(symptom_image: Image.Image, symptoms_text: str):
32
  """
33
+ Analyzes symptoms using the definitive two-step templating and processing method
34
+ required by modern multimodal chat models.
35
  """
36
  if not model_loaded:
37
  return "Error: The AI model could not be loaded. Please check the Space logs."
 
41
  return "Please describe your symptoms or upload an image for analysis."
42
 
43
  try:
44
+ # --- STEP 1: Build the structured messages list ---
 
45
  system_instruction = (
46
  "You are an expert, empathetic AI medical assistant. "
47
  "Analyze the potential medical condition based on the following information. "
48
  "Provide a list of possible conditions, your reasoning, and a clear, actionable next-steps plan."
49
  )
50
 
51
+ # The 'content' for a user's turn is a LIST of dictionaries.
52
+ user_content_list = []
 
 
53
  if symptom_image:
54
+ # Add a placeholder dictionary for the image.
55
+ user_content_list.append({"type": "image"})
 
 
56
 
57
+ # Add the dictionary for the text.
58
+ text_content = f"{symptoms_text}\n\n{system_instruction}"
59
+ user_content_list.append({"type": "text", "text": text_content})
60
+
61
+ messages = [
62
+ {"role": "user", "content": user_content_list}
63
+ ]
64
+
65
+ # --- STEP 2: Generate the prompt string using the official template ---
66
+ # This will correctly create a string with all special tokens, including <image>.
67
+ prompt = processor.tokenizer.apply_chat_template(
68
+ messages,
69
+ tokenize=False,
70
+ add_generation_prompt=True
71
+ )
72
 
73
+ # --- STEP 3: Process the prompt string and image together ---
74
+ # This is where the prompt's <image> token is linked to the actual image data.
75
  inputs = processor(
76
  text=prompt,
77
+ images=symptom_image, # This can be None for text-only cases
78
  return_tensors="pt"
79
  ).to(model.device)
80
 
81
+ # Generation parameters
82
  generate_kwargs = {
83
  "max_new_tokens": 512,
84
  "do_sample": True,
85
  "temperature": 0.7,
86
  }
87
 
88
+ print("Generating model output with the definitive two-step process...")
89
 
90
+ # Generate the response
91
  generate_ids = model.generate(**inputs, **generate_kwargs)
92
 
93
+ # Decode only the newly generated tokens
94
  input_token_len = inputs["input_ids"].shape[-1]
95
  result = processor.batch_decode(generate_ids[:, input_token_len:], skip_special_tokens=True)[0]
96