Spaces:
Running
Running
import re | |
def parse_agent_response(response): | |
"""结构化解析API响应""" | |
result = { | |
"final_answer": "", | |
"execution_steps": [], | |
"search_links": [] | |
} | |
current_step = {} | |
link_pattern = re.compile(r'https?://\S+') | |
# 新增详细结果匹配模式 | |
detail_pattern = re.compile(r'### 2\. Task outcome \(extremely detailed version\):(.+?)(?=### 3|\Z)', re.DOTALL) | |
for msg in response: | |
content = str(msg.get('content', '')) | |
metadata = msg.get('metadata', {}) | |
# 解析最终答案 | |
if "Final answer:" in content: | |
result["final_answer"] = _extract_final_answer(content) | |
# 解析执行步骤(仅处理assistant消息) | |
if msg.get('role') == 'assistant' and content.startswith("**Step"): | |
current_step = { | |
"title": content.strip('* '), | |
"details": [] | |
} | |
result["execution_steps"].append(current_step) | |
elif current_step and msg.get('role') == 'assistant': | |
# 精确提取详细分析内容 | |
if (detail_match := detail_pattern.search(content)): | |
cleaned_content = re.sub(r'\*{2,}|`{3,}', '', | |
detail_match.group(1)).strip() | |
current_step["details"].append(cleaned_content) | |
# 提取执行日志中的链接 | |
if metadata.get('title') == '📝 Execution Logs': | |
result["search_links"].extend( | |
link_pattern.findall(content) | |
) | |
return result | |
def _extract_final_answer(content): | |
"""提取并清理最终答案""" | |
answer = content.split("Final answer:")[-1] | |
return re.sub(r'\*{2,}', '', answer).strip() | |
def save_as_markdown(result, filename): | |
"""生成优化后的Markdown报告""" | |
with open(filename, 'w', encoding='utf-8') as f: | |
# 最终答案部分 | |
f.write("## Novelty Research Report\n") | |
f.write(result["final_answer"] + "\n") | |
# 研究步骤部分 - 添加步骤去重 | |
if result["execution_steps"]: | |
f.write("\n### Execution Steps\n") | |
seen_steps = set() | |
unique_steps = [] | |
for step in result["execution_steps"]: | |
# 使用标题和详情内容作为唯一性判断 | |
step_content = (step['title'], tuple(step['details'])) | |
if step_content not in seen_steps: | |
seen_steps.add(step_content) | |
unique_steps.append(step) | |
# 输出去重后的步骤 | |
for step in unique_steps: | |
f.write(f"\n#### {step['title']}\n") | |
f.write('\n'.join(step["details"]) + "\n") | |
# 搜索结果部分 | |
if result["search_links"]: | |
f.write("\n### Relevant References\n") | |
seen = set() | |
for raw_link in result["search_links"]: | |
# 分步骤清理链接 | |
link = raw_link.split('?')[0] # 移除URL参数 | |
link = re.sub(r'\).*', '', link) # 删除第一个)及其后所有内容 | |
link = link.strip('\\n. ') # 清理首尾特殊符号 | |
if link and link.startswith('http') and link not in seen: | |
seen.add(link) | |
f.write(f"- {link}\n") | |
f.write('\n'.join([f"- {link}" for link in result["search_links"]])) | |
if __name__ == "__main__": | |
import json | |
# 从本地JSON文件加载数据 | |
with open(r"D:\007.Projects\008.deep_research\smolagents\examples\open_deep_research\last_result.json", | |
encoding="utf-8") as f: | |
test_data = json.load(f) | |
parsed_result = parse_agent_response(test_data) | |
report_path = r"D:\007.Projects\008.deep_research\smolagents\examples\open_deep_research\analysis_report.md" # 添加路径定义 | |
save_as_markdown( | |
parsed_result, | |
report_path | |
) | |
print(f"完整分析报告已保存至:{report_path}") |