Spaces:

Ayanami0730
/

DeepResearch-Leaderboard

Running

App Files Files Community

DeepResearch-Leaderboard / tabs /leaderboard_tab.py

Ayanami0730

fix nvidia-aiq-research-assistant data

0b05f4c 29 days ago

raw

history blame contribute delete

9.7 kB

	from __future__ import annotations
	import gradio as gr
	import pandas as pd
	from pathlib import Path
	from typing import Union

	# 相对于主脚本的路径调整
	BASE_DIR = Path(__file__).resolve().parent.parent
	DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"

	# 用于标注的常量
	CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
	HIGHLIGHT_EMOJI = "🚀"

	# 列名重命名映射
	COLUMN_RENAME_MAP = {
	'overall_score': 'overall',
	'comprehensiveness': 'comp.',
	'insight': 'insight',
	'instruction_following': 'inst.',
	'readability': 'read.',
	'citation_accuracy': 'c.acc.',
	'effective_citations': 'eff.c.'
	}

	# 模型分类映射
	MODEL_CATEGORIES = {
	"Deep Research Agent": [
	"gemini-2.5-pro-deepresearch",
	"grok-deeper-search",
	"openai-deepresearch",
	"perplexity-Research",
	"doubao-deepresearch",
	"kimi-researcher",
	"claude-research",
	"langchain-open-deep-research"
	],
	"LLM with Search": [
	"claude-3-7-sonnet-with-search",
	"claude-3-5-sonnet-with-search",
	"sonar-reasoning-pro",
	"sonar-reasoning",
	"sonar-pro",
	"sonar",
	"gemini-2.5-pro-preview-05-06",
	"gpt-4o-search-preview",
	"gpt-4.1",
	"gemini-2.5-flash-preview-04-17",
	"gpt-4o-mini-search-preview",
	"nvidia-aiq-research-assistant",
	"gpt-4.1-mini"
	]
	}

	# 模型链接映射（目前都设置为空，可以后续添加具体链接）
	MODEL_LINKS = {
	# Deep Research Agent
	"gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
	"grok-deeper-search": "https://x.ai/news/grok-3",
	"openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
	"perplexity-Research": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research",
	"doubao-deepresearch": "https://www.doubao.com/chat/",
	"kimi-researcher": "https://moonshotai.github.io/Kimi-Researcher/",
	"claude-research": "https://www.anthropic.com/news/research",
	"nvidia-aiq-research-assistant": "https://github.com/NVIDIA-AI-Blueprints/aiq-research-assistant",
	"langchain-open-deep-research": "https://github.com/langchain-ai/open_deep_research",

	# LLM with Search
	"claude-3-7-sonnet-with-search": "",
	"claude-3-5-sonnet-with-search": "",
	"sonar-reasoning-pro": "",
	"sonar-reasoning": "",
	"sonar-pro": "",
	"sonar": "",
	"gemini-2.5-pro-preview-05-06": "",
	"gpt-4o-search-preview": "",
	"gpt-4.1": "",
	"gemini-2.5-flash-preview-04-17": "",
	"gpt-4o-mini-search-preview": "",
	"gpt-4.1-mini": ""
	}

	# 模型许可证类型映射
	MODEL_LICENSE_TYPE = {
	# Deep Research Agent
	"gemini-2.5-pro-deepresearch": "Proprietary",
	"grok-deeper-search": "Proprietary",
	"openai-deepresearch": "Proprietary",
	"perplexity-Research": "Proprietary",
	"doubao-deepresearch": "Proprietary",
	"kimi-researcher": "Proprietary",
	"claude-research": "Proprietary",
	"nvidia-aiq-research-assistant": "Apache 2.0",
	"langchain-open-deep-research": "MIT", # 需要确认具体许可证

	# LLM with Search
	"claude-3-7-sonnet-with-search": "Proprietary",
	"claude-3-5-sonnet-with-search": "Proprietary",
	"sonar-reasoning-pro": "Proprietary",
	"sonar-reasoning": "Proprietary",
	"sonar-pro": "Proprietary",
	"sonar": "Proprietary",
	"gemini-2.5-pro-preview-05-06": "Proprietary",
	"gpt-4o-search-preview": "Proprietary",
	"gpt-4.1": "Proprietary",
	"gemini-2.5-flash-preview-04-17": "Proprietary",
	"gpt-4o-mini-search-preview": "Proprietary",
	"gpt-4.1-mini": "Proprietary"
	}

	def load_leaderboard() -> pd.DataFrame:
	if not DATA_PATH.exists():
	raise FileNotFoundError(
	f"Leaderboard file not found: {DATA_PATH}.\n"
	"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
	)
	df = pd.read_csv(DATA_PATH)
	df.columns = [c.strip() for c in df.columns]

	def get_category(model_name):
	for category, models in MODEL_CATEGORIES.items():
	if model_name in models:
	return category
	return "Others"

	def get_license_type(model_name):
	return MODEL_LICENSE_TYPE.get(model_name, "Unknown")

	df['category'] = df['model'].apply(get_category)
	df['license_type'] = df['model'].apply(get_license_type)
	return df

	def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
	ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
	ranked.insert(0, "Rank", range(1, len(ranked) + 1))

	# 重命名列名为简写形式
	ranked = ranked.rename(columns=COLUMN_RENAME_MAP)

	# 格式化数值列为两位小数，但跳过包含"-"的值
	numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
	for col in numeric_columns:
	if col in ranked.columns:
	# 只对数值进行round操作，保持"-"不变
	ranked[col] = ranked[col].apply(
	lambda x: round(float(x), 2) if x != "-" and pd.notna(x) else x
	)

	# 为模型添加链接和高亮样式
	def format_model_name(row):
	model_name = row['model']
	link = MODEL_LINKS.get(model_name, "")

	# 根据类别决定是否高亮
	if row['category'] == CATEGORY_TO_HIGHLIGHT:
	display_name = f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {model_name}</span>'
	else:
	display_name = model_name

	# 如果有链接，包装成<a>标签
	if link and link.strip():
	return f'<a href="{link}" target="_blank" style="text-decoration: none;">{display_name}</a>'
	else:
	# 没有链接时，为将来添加链接做准备（可以添加点击事件等）
	return f'<span class="model-name" data-model="{model_name}">{display_name}</span>'

	ranked['model'] = ranked.apply(format_model_name, axis=1)

	return ranked

	def filter_data(search_text: str, selected_categories: list):
	df = load_leaderboard()

	if search_text.strip():
	df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]

	if selected_categories:
	df = df[df['category'].isin(selected_categories)]

	ranked_df = make_ranked(df)
	return ranked_df

	def create_leaderboard_tab():
	with gr.Tab("🏆Leaderboard"):
	with gr.Row():
	with gr.Column(scale=1):
	search_box = gr.Textbox(
	label="Model Search",
	placeholder="Entering model name to search...",
	value=""
	)
	with gr.Column(scale=2):
	category_checkboxes = gr.CheckboxGroup(
	label="Model Categories",
	choices=list(MODEL_CATEGORIES.keys()),
	value=list(MODEL_CATEGORIES.keys())
	)

	# 初始化数据（不使用样式）
	initial_df = make_ranked(load_leaderboard())

	# 获取列数据类型，将 model 列设置为 html
	column_count = len(initial_df.columns)
	datatypes = ["str"] * column_count
	model_col_index = initial_df.columns.get_loc('model')
	datatypes[model_col_index] = "html"

	# 创建 Dataframe 组件
	table = gr.Dataframe(
	value=initial_df,
	datatype=datatypes, # 设置数据类型，model 列为 html
	max_height=600, # 设置表格最大高度
	show_label=False, # 不显示标签
	elem_id="leaderboard_table", # 添加元素ID
	interactive=False, # 禁用编辑功能
	wrap=False, # 不换行
	column_widths=["80px", "350px", "100px", "100px", "100px", "100px", "100px", "100px", "100px", "200px", "150px"] # 设置各列宽度，model列设置为350px
	)

	def update_display(search_text, selected_categories):
	df = filter_data(search_text, selected_categories)
	return df

	# 绑定搜索框和复选框的变化事件
	search_box.change(
	fn=update_display,
	inputs=[search_box, category_checkboxes],
	outputs=table
	)
	category_checkboxes.change(
	fn=update_display,
	inputs=[search_box, category_checkboxes],
	outputs=table
	)

	# 在底部添加说明
	with gr.Row():
	gr.Markdown(f"""
	### 📊 Column Descriptions
	- Rank: Model ranking based on overall score
	- model: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
	- overall: Overall Score (weighted average of all metrics)
	- comp.: Comprehensiveness - How thorough and complete the research is
	- insight: Insight Quality - Depth and value of analysis
	- inst.: Instruction Following - Adherence to user instructions
	- read.: Readability - Clarity and organization of content
	- c.acc.: Citation Accuracy - Correctness of references
	- eff.c.: Effective Citations - Relevance and quality of sources
	- category: Model category
	- license_type: The software license type of the model/service

	💡 Tip: Model names are clickable when links are available. Visit the GitHub repositories for more details!
	""")

	return search_box