Update app.py
Browse filesUpdate the logic to separate the <think> block and the JSON block in the generated 'response'.
app.py
CHANGED
@@ -133,7 +133,39 @@ class BasicModel:
|
|
133 |
response = outputs[0]["generated_text"]
|
134 |
assert isinstance(response, str)
|
135 |
|
136 |
-
# 3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
if stop_sequences:
|
138 |
# find the earliest occurrence of any stop token
|
139 |
cuts = [response.find(s) for s in stop_sequences if response.find(s) != -1]
|
@@ -142,7 +174,7 @@ class BasicModel:
|
|
142 |
|
143 |
print(f"\n--- RAW MODEL OUTPUT ---\n{response}\n------------------------")
|
144 |
|
145 |
-
#
|
146 |
try:
|
147 |
# The agent's response is expected to be a JSON string, according to the system prompt
|
148 |
# Parse it into a Python dictionary
|
@@ -178,7 +210,7 @@ class BasicModel:
|
|
178 |
|
179 |
print(f"\n--- FIXED AND FINAL OUTPUT ---\n{response}\n----------------------------")
|
180 |
|
181 |
-
#
|
182 |
return ChatMessage(role="assistant", content=response)
|
183 |
# return {
|
184 |
# "role": 'assistant',
|
|
|
133 |
response = outputs[0]["generated_text"]
|
134 |
assert isinstance(response, str)
|
135 |
|
136 |
+
# 3. The response consists of a <think></think> block and a JSON block. we want to separate the two blocks.
|
137 |
+
## Save the <think></think> block to print in the log
|
138 |
+
try:
|
139 |
+
# This regex finds the content between <think> and </think>
|
140 |
+
thought_pattern = r"<think>(.*?)</think>"
|
141 |
+
# re.DOTALL allows the '.' to match newline characters
|
142 |
+
match = regex.search(thought_pattern, response, flags=regex.DOTALL)
|
143 |
+
if match:
|
144 |
+
# .group(1) gets the content captured by the (.*?) part of the pattern
|
145 |
+
thought_content = match.group(1).strip()
|
146 |
+
print(f"--- CAPTURED THOUGHT ---\n{thought_content}\n------------------------")
|
147 |
+
except Exception as e:
|
148 |
+
# This is just for logging, so we don't want it to crash the main process.
|
149 |
+
print(f"Could not extract <think> block: {e}")
|
150 |
+
|
151 |
+
## Pass the JSON block as the 'response'
|
152 |
+
try:
|
153 |
+
# Find the first '{' and the last '}'
|
154 |
+
start_index = response.find('{')
|
155 |
+
end_index = response.rfind('}')
|
156 |
+
# If both braces are found, extract the substring
|
157 |
+
if start_index != -1 and end_index != -1 and end_index > start_index:
|
158 |
+
json_substring = response[start_index : end_index + 1]
|
159 |
+
# Try to parse the substring to ensure it's valid JSON
|
160 |
+
json.loads(json_substring)
|
161 |
+
# If it's valid, we replace the original response with just the clean JSON part.
|
162 |
+
response = json_substring
|
163 |
+
# If no valid JSON is found, we proceed with the original response string.
|
164 |
+
except (json.JSONDecodeError, TypeError):
|
165 |
+
# Pass silently and let the framework handle the potentially malformed string.
|
166 |
+
pass
|
167 |
+
|
168 |
+
# 4. Optionally map SmolAgents’ stop_sequences → HF pipeline’s 'stop'
|
169 |
if stop_sequences:
|
170 |
# find the earliest occurrence of any stop token
|
171 |
cuts = [response.find(s) for s in stop_sequences if response.find(s) != -1]
|
|
|
174 |
|
175 |
print(f"\n--- RAW MODEL OUTPUT ---\n{response}\n------------------------")
|
176 |
|
177 |
+
# 5. NEW: Parse, Fix, and Re-serialize the agent's code output
|
178 |
try:
|
179 |
# The agent's response is expected to be a JSON string, according to the system prompt
|
180 |
# Parse it into a Python dictionary
|
|
|
210 |
|
211 |
print(f"\n--- FIXED AND FINAL OUTPUT ---\n{response}\n----------------------------")
|
212 |
|
213 |
+
# 6. Wrap back into a chat message dict
|
214 |
return ChatMessage(role="assistant", content=response)
|
215 |
# return {
|
216 |
# "role": 'assistant',
|