import re def parse_agent_response(response): """结构化解析API响应""" result = { "final_answer": "", "execution_steps": [], "search_links": [] } current_step = {} link_pattern = re.compile(r'https?://\S+') # 新增详细结果匹配模式 detail_pattern = re.compile(r'### 2\. Task outcome \(extremely detailed version\):(.+?)(?=### 3|\Z)', re.DOTALL) for msg in response: content = str(msg.get('content', '')) metadata = msg.get('metadata', {}) # 解析最终答案 if "Final answer:" in content: result["final_answer"] = _extract_final_answer(content) # 解析执行步骤(仅处理assistant消息) if msg.get('role') == 'assistant' and content.startswith("**Step"): current_step = { "title": content.strip('* '), "details": [] } result["execution_steps"].append(current_step) elif current_step and msg.get('role') == 'assistant': # 精确提取详细分析内容 if (detail_match := detail_pattern.search(content)): cleaned_content = re.sub(r'\*{2,}|`{3,}', '', detail_match.group(1)).strip() current_step["details"].append(cleaned_content) # 提取执行日志中的链接 if metadata.get('title') == '📝 Execution Logs': result["search_links"].extend( link_pattern.findall(content) ) return result def _extract_final_answer(content): """提取并清理最终答案""" answer = content.split("Final answer:")[-1] return re.sub(r'\*{2,}', '', answer).strip() def save_as_markdown(result, filename): """生成优化后的Markdown报告""" with open(filename, 'w', encoding='utf-8') as f: # 最终答案部分 f.write("## Novelty Research Report\n") f.write(result["final_answer"] + "\n") # 研究步骤部分 - 添加步骤去重 if result["execution_steps"]: f.write("\n### Execution Steps\n") seen_steps = set() unique_steps = [] for step in result["execution_steps"]: # 使用标题和详情内容作为唯一性判断 step_content = (step['title'], tuple(step['details'])) if step_content not in seen_steps: seen_steps.add(step_content) unique_steps.append(step) # 输出去重后的步骤 for step in unique_steps: f.write(f"\n#### {step['title']}\n") f.write('\n'.join(step["details"]) + "\n") # 搜索结果部分 if result["search_links"]: f.write("\n### Relevant References\n") seen = set() for raw_link in result["search_links"]: # 分步骤清理链接 link = raw_link.split('?')[0] # 移除URL参数 link = re.sub(r'\).*', '', link) # 删除第一个)及其后所有内容 link = link.strip('\\n. ') # 清理首尾特殊符号 if link and link.startswith('http') and link not in seen: seen.add(link) f.write(f"- {link}\n") f.write('\n'.join([f"- {link}" for link in result["search_links"]])) if __name__ == "__main__": import json # 从本地JSON文件加载数据 with open(r"D:\007.Projects\008.deep_research\smolagents\examples\open_deep_research\last_result.json", encoding="utf-8") as f: test_data = json.load(f) parsed_result = parse_agent_response(test_data) report_path = r"D:\007.Projects\008.deep_research\smolagents\examples\open_deep_research\analysis_report.md" # 添加路径定义 save_as_markdown( parsed_result, report_path ) print(f"完整分析报告已保存至:{report_path}")