File size: 9,704 Bytes
71d9111 927e909 d115fb4 927e909 71d9111 927e909 71d9111 927e909 b91c8cc 927e909 1d11ffb de24ae3 927e909 1d11ffb 927e909 1d11ffb 927e909 0b05f4c 1d11ffb 927e909 de24ae3 927e909 71d9111 927e909 de24ae3 927e909 de24ae3 927e909 8d76bf3 b91c8cc 8d76bf3 b91c8cc de24ae3 141f575 71d9111 141f575 927e909 141f575 71d9111 927e909 71d9111 927e909 71d9111 b91c8cc 927e909 71d9111 927e909 71d9111 0b05f4c 71d9111 0b05f4c 927e909 71d9111 927e909 b91c8cc 927e909 71d9111 de24ae3 71d9111 927e909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union
# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"
# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"
# 列名重命名映射
COLUMN_RENAME_MAP = {
'overall_score': 'overall',
'comprehensiveness': 'comp.',
'insight': 'insight',
'instruction_following': 'inst.',
'readability': 'read.',
'citation_accuracy': 'c.acc.',
'effective_citations': 'eff.c.'
}
# 模型分类映射
MODEL_CATEGORIES = {
"Deep Research Agent": [
"gemini-2.5-pro-deepresearch",
"grok-deeper-search",
"openai-deepresearch",
"perplexity-Research",
"doubao-deepresearch",
"kimi-researcher",
"claude-research",
"langchain-open-deep-research"
],
"LLM with Search": [
"claude-3-7-sonnet-with-search",
"claude-3-5-sonnet-with-search",
"sonar-reasoning-pro",
"sonar-reasoning",
"sonar-pro",
"sonar",
"gemini-2.5-pro-preview-05-06",
"gpt-4o-search-preview",
"gpt-4.1",
"gemini-2.5-flash-preview-04-17",
"gpt-4o-mini-search-preview",
"nvidia-aiq-research-assistant",
"gpt-4.1-mini"
]
}
# 模型链接映射(目前都设置为空,可以后续添加具体链接)
MODEL_LINKS = {
# Deep Research Agent
"gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
"grok-deeper-search": "https://x.ai/news/grok-3",
"openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
"perplexity-Research": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research",
"doubao-deepresearch": "https://www.doubao.com/chat/",
"kimi-researcher": "https://moonshotai.github.io/Kimi-Researcher/",
"claude-research": "https://www.anthropic.com/news/research",
"nvidia-aiq-research-assistant": "https://github.com/NVIDIA-AI-Blueprints/aiq-research-assistant",
"langchain-open-deep-research": "https://github.com/langchain-ai/open_deep_research",
# LLM with Search
"claude-3-7-sonnet-with-search": "",
"claude-3-5-sonnet-with-search": "",
"sonar-reasoning-pro": "",
"sonar-reasoning": "",
"sonar-pro": "",
"sonar": "",
"gemini-2.5-pro-preview-05-06": "",
"gpt-4o-search-preview": "",
"gpt-4.1": "",
"gemini-2.5-flash-preview-04-17": "",
"gpt-4o-mini-search-preview": "",
"gpt-4.1-mini": ""
}
# 模型许可证类型映射
MODEL_LICENSE_TYPE = {
# Deep Research Agent
"gemini-2.5-pro-deepresearch": "Proprietary",
"grok-deeper-search": "Proprietary",
"openai-deepresearch": "Proprietary",
"perplexity-Research": "Proprietary",
"doubao-deepresearch": "Proprietary",
"kimi-researcher": "Proprietary",
"claude-research": "Proprietary",
"nvidia-aiq-research-assistant": "Apache 2.0",
"langchain-open-deep-research": "MIT", # 需要确认具体许可证
# LLM with Search
"claude-3-7-sonnet-with-search": "Proprietary",
"claude-3-5-sonnet-with-search": "Proprietary",
"sonar-reasoning-pro": "Proprietary",
"sonar-reasoning": "Proprietary",
"sonar-pro": "Proprietary",
"sonar": "Proprietary",
"gemini-2.5-pro-preview-05-06": "Proprietary",
"gpt-4o-search-preview": "Proprietary",
"gpt-4.1": "Proprietary",
"gemini-2.5-flash-preview-04-17": "Proprietary",
"gpt-4o-mini-search-preview": "Proprietary",
"gpt-4.1-mini": "Proprietary"
}
def load_leaderboard() -> pd.DataFrame:
if not DATA_PATH.exists():
raise FileNotFoundError(
f"Leaderboard file not found: {DATA_PATH}.\n"
"→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
)
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]
def get_category(model_name):
for category, models in MODEL_CATEGORIES.items():
if model_name in models:
return category
return "Others"
def get_license_type(model_name):
return MODEL_LICENSE_TYPE.get(model_name, "Unknown")
df['category'] = df['model'].apply(get_category)
df['license_type'] = df['model'].apply(get_license_type)
return df
def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
ranked.insert(0, "Rank", range(1, len(ranked) + 1))
# 重命名列名为简写形式
ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
# 格式化数值列为两位小数,但跳过包含"-"的值
numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
for col in numeric_columns:
if col in ranked.columns:
# 只对数值进行round操作,保持"-"不变
ranked[col] = ranked[col].apply(
lambda x: round(float(x), 2) if x != "-" and pd.notna(x) else x
)
# 为模型添加链接和高亮样式
def format_model_name(row):
model_name = row['model']
link = MODEL_LINKS.get(model_name, "")
# 根据类别决定是否高亮
if row['category'] == CATEGORY_TO_HIGHLIGHT:
display_name = f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {model_name}</span>'
else:
display_name = model_name
# 如果有链接,包装成<a>标签
if link and link.strip():
return f'<a href="{link}" target="_blank" style="text-decoration: none;">{display_name}</a>'
else:
# 没有链接时,为将来添加链接做准备(可以添加点击事件等)
return f'<span class="model-name" data-model="{model_name}">{display_name}</span>'
ranked['model'] = ranked.apply(format_model_name, axis=1)
return ranked
def filter_data(search_text: str, selected_categories: list):
df = load_leaderboard()
if search_text.strip():
df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
if selected_categories:
df = df[df['category'].isin(selected_categories)]
ranked_df = make_ranked(df)
return ranked_df
def create_leaderboard_tab():
with gr.Tab("🏆Leaderboard"):
with gr.Row():
with gr.Column(scale=1):
search_box = gr.Textbox(
label="Model Search",
placeholder="Entering model name to search...",
value=""
)
with gr.Column(scale=2):
category_checkboxes = gr.CheckboxGroup(
label="Model Categories",
choices=list(MODEL_CATEGORIES.keys()),
value=list(MODEL_CATEGORIES.keys())
)
# 初始化数据(不使用样式)
initial_df = make_ranked(load_leaderboard())
# 获取列数据类型,将 model 列设置为 html
column_count = len(initial_df.columns)
datatypes = ["str"] * column_count
model_col_index = initial_df.columns.get_loc('model')
datatypes[model_col_index] = "html"
# 创建 Dataframe 组件
table = gr.Dataframe(
value=initial_df,
datatype=datatypes, # 设置数据类型,model 列为 html
max_height=600, # 设置表格最大高度
show_label=False, # 不显示标签
elem_id="leaderboard_table", # 添加元素ID
interactive=False, # 禁用编辑功能
wrap=False, # 不换行
column_widths=["80px", "350px", "100px", "100px", "100px", "100px", "100px", "100px", "100px", "200px", "150px"] # 设置各列宽度,model列设置为350px
)
def update_display(search_text, selected_categories):
df = filter_data(search_text, selected_categories)
return df
# 绑定搜索框和复选框的变化事件
search_box.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
category_checkboxes.change(
fn=update_display,
inputs=[search_box, category_checkboxes],
outputs=table
)
# 在底部添加说明
with gr.Row():
gr.Markdown(f"""
### 📊 Column Descriptions
- **Rank**: Model ranking based on overall score
- **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
- **overall**: Overall Score (weighted average of all metrics)
- **comp.**: Comprehensiveness - How thorough and complete the research is
- **insight**: Insight Quality - Depth and value of analysis
- **inst.**: Instruction Following - Adherence to user instructions
- **read.**: Readability - Clarity and organization of content
- **c.acc.**: Citation Accuracy - Correctness of references
- **eff.c.**: Effective Citations - Relevance and quality of sources
- **category**: Model category
- **license_type**: The software license type of the model/service
💡 **Tip**: Model names are clickable when links are available. Visit the GitHub repositories for more details!
""")
return search_box |