DeepResearch-Leaderboard / tabs /leaderboard_tab.py
Ayanami0730's picture
fix nvidia-aiq-research-assistant data
0b05f4c
from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union
# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"
# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"
# 列名重命名映射
COLUMN_RENAME_MAP = {
'overall_score': 'overall',
'comprehensiveness': 'comp.',
'insight': 'insight',
'instruction_following': 'inst.',
'readability': 'read.',
'citation_accuracy': 'c.acc.',
'effective_citations': 'eff.c.'
}
# 模型分类映射
MODEL_CATEGORIES = {
"Deep Research Agent": [
"gemini-2.5-pro-deepresearch",
"grok-deeper-search",
"openai-deepresearch",
"perplexity-Research",
"doubao-deepresearch",
"kimi-researcher",
"claude-research",
"langchain-open-deep-research"
],
"LLM with Search": [
"claude-3-7-sonnet-with-search",
"claude-3-5-sonnet-with-search",
"sonar-reasoning-pro",
"sonar-reasoning",
"sonar-pro",
"sonar",
"gemini-2.5-pro-preview-05-06",
"gpt-4o-search-preview",
"gpt-4.1",
"gemini-2.5-flash-preview-04-17",
"gpt-4o-mini-search-preview",
"nvidia-aiq-research-assistant",
"gpt-4.1-mini"
]
}
# 模型链接映射(目前都设置为空,可以后续添加具体链接)
MODEL_LINKS = {
# Deep Research Agent
"gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
"grok-deeper-search": "https://x.ai/news/grok-3",
"openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
"perplexity-Research": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research",
"doubao-deepresearch": "https://www.doubao.com/chat/",
"kimi-researcher": "https://moonshotai.github.io/Kimi-Researcher/",
"claude-research": "https://www.anthropic.com/news/research",
"nvidia-aiq-research-assistant": "https://github.com/NVIDIA-AI-Blueprints/aiq-research-assistant",
"langchain-open-deep-research": "https://github.com/langchain-ai/open_deep_research",
# LLM with Search
"claude-3-7-sonnet-with-search": "",
"claude-3-5-sonnet-with-search": "",
"sonar-reasoning-pro": "",
"sonar-reasoning": "",
"sonar-pro": "",
"sonar": "",
"gemini-2.5-pro-preview-05-06": "",
"gpt-4o-search-preview": "",
"gpt-4.1": "",
"gemini-2.5-flash-preview-04-17": "",
"gpt-4o-mini-search-preview": "",
"gpt-4.1-mini": ""
}
# 模型许可证类型映射
MODEL_LICENSE_TYPE = {
# Deep Research Agent
"gemini-2.5-pro-deepresearch": "Proprietary",
"grok-deeper-search": "Proprietary",
"openai-deepresearch": "Proprietary",
"perplexity-Research": "Proprietary",
"doubao-deepresearch": "Proprietary",
"kimi-researcher": "Proprietary",
"claude-research": "Proprietary",
"nvidia-aiq-research-assistant": "Apache 2.0",
"langchain-open-deep-research": "MIT", # 需要确认具体许可证
# LLM with Search
"claude-3-7-sonnet-with-search": "Proprietary",
"claude-3-5-sonnet-with-search": "Proprietary",
"sonar-reasoning-pro": "Proprietary",
"sonar-reasoning": "Proprietary",
"sonar-pro": "Proprietary",
"sonar": "Proprietary",
"gemini-2.5-pro-preview-05-06": "Proprietary",
"gpt-4o-search-preview": "Proprietary",
"gpt-4.1": "Proprietary",
"gemini-2.5-flash-preview-04-17": "Proprietary",
"gpt-4o-mini-search-preview": "Proprietary",
"gpt-4.1-mini": "Proprietary"
}
def load_leaderboard() -> pd.DataFrame:
if not DATA_PATH.exists():
raise FileNotFoundError(
f"Leaderboard file not found: {DATA_PATH}.\n"
"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
)
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]
def get_category(model_name):
for category, models in MODEL_CATEGORIES.items():
if model_name in models:
return category
return "Others"
def get_license_type(model_name):
return MODEL_LICENSE_TYPE.get(model_name, "Unknown")
df['category'] = df['model'].apply(get_category)
df['license_type'] = df['model'].apply(get_license_type)
return df
def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
ranked.insert(0, "Rank", range(1, len(ranked) + 1))
# 重命名列名为简写形式
ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
# 格式化数值列为两位小数,但跳过包含"-"的值
numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
for col in numeric_columns:
if col in ranked.columns:
# 只对数值进行round操作,保持"-"不变
ranked[col] = ranked[col].apply(
lambda x: round(float(x), 2) if x != "-" and pd.notna(x) else x
)
# 为模型添加链接和高亮样式
def format_model_name(row):
model_name = row['model']
link = MODEL_LINKS.get(model_name, "")
# 根据类别决定是否高亮
if row['category'] == CATEGORY_TO_HIGHLIGHT:
display_name = f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {model_name}</span>'
else:
display_name = model_name
# 如果有链接,包装成<a>标签
if link and link.strip():
return f'<a href="{link}" target="_blank" style="text-decoration: none;">{display_name}</a>'
else:
# 没有链接时,为将来添加链接做准备(可以添加点击事件等)
return f'<span class="model-name" data-model="{model_name}">{display_name}</span>'
ranked['model'] = ranked.apply(format_model_name, axis=1)
return ranked
def filter_data(search_text: str, selected_categories: list):
df = load_leaderboard()
if search_text.strip():
df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
if selected_categories:
df = df[df['category'].isin(selected_categories)]
ranked_df = make_ranked(df)
return ranked_df
def create_leaderboard_tab():
with gr.Tab("🏆Leaderboard"):
with gr.Row():
with gr.Column(scale=1):
search_box = gr.Textbox(
label="Model Search",
placeholder="Entering model name to search...",
value=""
)
with gr.Column(scale=2):
category_checkboxes = gr.CheckboxGroup(
label="Model Categories",
choices=list(MODEL_CATEGORIES.keys()),
value=list(MODEL_CATEGORIES.keys())
)
# 初始化数据(不使用样式)
initial_df = make_ranked(load_leaderboard())
# 获取列数据类型,将 model 列设置为 html
column_count = len(initial_df.columns)
datatypes = ["str"] * column_count
model_col_index = initial_df.columns.get_loc('model')
datatypes[model_col_index] = "html"
# 创建 Dataframe 组件
table = gr.Dataframe(
value=initial_df,
datatype=datatypes, # 设置数据类型,model 列为 html
max_height=600, # 设置表格最大高度
show_label=False, # 不显示标签
elem_id="leaderboard_table", # 添加元素ID
interactive=False, # 禁用编辑功能
wrap=False, # 不换行
column_widths=["80px", "350px", "100px", "100px", "100px", "100px", "100px", "100px", "100px", "200px", "150px"] # 设置各列宽度,model列设置为350px
)
def update_display(search_text, selected_categories):
df = filter_data(search_text, selected_categories)
return df
# 绑定搜索框和复选框的变化事件
search_box.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
category_checkboxes.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
# 在底部添加说明
with gr.Row():
gr.Markdown(f"""
### 📊 Column Descriptions
- **Rank**: Model ranking based on overall score
- **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
- **overall**: Overall Score (weighted average of all metrics)
- **comp.**: Comprehensiveness - How thorough and complete the research is
- **insight**: Insight Quality - Depth and value of analysis
- **inst.**: Instruction Following - Adherence to user instructions
- **read.**: Readability - Clarity and organization of content
- **c.acc.**: Citation Accuracy - Correctness of references
- **eff.c.**: Effective Citations - Relevance and quality of sources
- **category**: Model category
- **license_type**: The software license type of the model/service
💡 **Tip**: Model names are clickable when links are available. Visit the GitHub repositories for more details!
""")
return search_box