File size: 9,704 Bytes
71d9111
927e909
 
 
d115fb4
927e909
 
 
 
 
71d9111
927e909
71d9111
927e909
b91c8cc
927e909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d11ffb
 
 
de24ae3
 
927e909
 
 
1d11ffb
 
 
 
 
 
927e909
1d11ffb
927e909
 
0b05f4c
1d11ffb
927e909
 
 
de24ae3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927e909
 
 
 
 
 
 
 
 
 
 
 
 
71d9111
927e909
de24ae3
 
 
927e909
de24ae3
927e909
 
 
 
 
 
 
 
 
8d76bf3
b91c8cc
 
 
8d76bf3
 
 
 
b91c8cc
de24ae3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141f575
71d9111
141f575
927e909
 
 
 
 
 
 
 
 
141f575
71d9111
927e909
 
 
 
71d9111
 
 
 
 
 
 
 
 
 
 
 
927e909
71d9111
b91c8cc
927e909
71d9111
 
 
 
 
 
 
927e909
71d9111
 
0b05f4c
71d9111
0b05f4c
 
 
 
927e909
 
 
71d9111
 
927e909
b91c8cc
927e909
 
 
 
 
 
 
 
 
 
 
71d9111
 
 
 
 
 
 
 
 
 
 
 
 
 
de24ae3
 
 
71d9111
927e909
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union

# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"

# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"

# 列名重命名映射
COLUMN_RENAME_MAP = {
    'overall_score': 'overall',
    'comprehensiveness': 'comp.',
    'insight': 'insight',
    'instruction_following': 'inst.',
    'readability': 'read.',
    'citation_accuracy': 'c.acc.',
    'effective_citations': 'eff.c.'
}

# 模型分类映射
MODEL_CATEGORIES = {
    "Deep Research Agent": [
        "gemini-2.5-pro-deepresearch",
        "grok-deeper-search",
        "openai-deepresearch", 
        "perplexity-Research",
        "doubao-deepresearch",
        "kimi-researcher",
        "claude-research",
        "langchain-open-deep-research"
    ],
    "LLM with Search": [
        "claude-3-7-sonnet-with-search",
        "claude-3-5-sonnet-with-search",
        "sonar-reasoning-pro",
        "sonar-reasoning",
        "sonar-pro",
        "sonar",
        "gemini-2.5-pro-preview-05-06",
        "gpt-4o-search-preview",
        "gpt-4.1",
        "gemini-2.5-flash-preview-04-17",
        "gpt-4o-mini-search-preview",
        "nvidia-aiq-research-assistant",
        "gpt-4.1-mini"
    ]
}

# 模型链接映射(目前都设置为空,可以后续添加具体链接)
MODEL_LINKS = {
    # Deep Research Agent
    "gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
    "grok-deeper-search": "https://x.ai/news/grok-3",
    "openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
    "perplexity-Research": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research",
    "doubao-deepresearch": "https://www.doubao.com/chat/",
    "kimi-researcher": "https://moonshotai.github.io/Kimi-Researcher/",
    "claude-research": "https://www.anthropic.com/news/research",
    "nvidia-aiq-research-assistant": "https://github.com/NVIDIA-AI-Blueprints/aiq-research-assistant",
    "langchain-open-deep-research": "https://github.com/langchain-ai/open_deep_research",
    
    # LLM with Search
    "claude-3-7-sonnet-with-search": "",
    "claude-3-5-sonnet-with-search": "",
    "sonar-reasoning-pro": "",
    "sonar-reasoning": "",
    "sonar-pro": "",
    "sonar": "",
    "gemini-2.5-pro-preview-05-06": "",
    "gpt-4o-search-preview": "",
    "gpt-4.1": "",
    "gemini-2.5-flash-preview-04-17": "",
    "gpt-4o-mini-search-preview": "",
    "gpt-4.1-mini": ""
}

# 模型许可证类型映射
MODEL_LICENSE_TYPE = {
    # Deep Research Agent
    "gemini-2.5-pro-deepresearch": "Proprietary",
    "grok-deeper-search": "Proprietary", 
    "openai-deepresearch": "Proprietary",
    "perplexity-Research": "Proprietary",
    "doubao-deepresearch": "Proprietary",
    "kimi-researcher": "Proprietary",
    "claude-research": "Proprietary",
    "nvidia-aiq-research-assistant": "Apache 2.0",
    "langchain-open-deep-research": "MIT",  # 需要确认具体许可证
    
    # LLM with Search
    "claude-3-7-sonnet-with-search": "Proprietary",
    "claude-3-5-sonnet-with-search": "Proprietary",
    "sonar-reasoning-pro": "Proprietary",
    "sonar-reasoning": "Proprietary", 
    "sonar-pro": "Proprietary",
    "sonar": "Proprietary",
    "gemini-2.5-pro-preview-05-06": "Proprietary",
    "gpt-4o-search-preview": "Proprietary",
    "gpt-4.1": "Proprietary",
    "gemini-2.5-flash-preview-04-17": "Proprietary",
    "gpt-4o-mini-search-preview": "Proprietary",
    "gpt-4.1-mini": "Proprietary"
}

def load_leaderboard() -> pd.DataFrame:
    if not DATA_PATH.exists():
        raise FileNotFoundError(
            f"Leaderboard file not found: {DATA_PATH}.\n"
            "→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
        )
    df = pd.read_csv(DATA_PATH)
    df.columns = [c.strip() for c in df.columns]
    
    def get_category(model_name):
        for category, models in MODEL_CATEGORIES.items():
            if model_name in models:
                return category
        return "Others"
    
    def get_license_type(model_name):
        return MODEL_LICENSE_TYPE.get(model_name, "Unknown")
    
    df['category'] = df['model'].apply(get_category)
    df['license_type'] = df['model'].apply(get_license_type)
    return df

def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
    ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
    ranked.insert(0, "Rank", range(1, len(ranked) + 1))
    
    # 重命名列名为简写形式
    ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
    
    # 格式化数值列为两位小数,但跳过包含"-"的值
    numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
    for col in numeric_columns:
        if col in ranked.columns:
            # 只对数值进行round操作,保持"-"不变
            ranked[col] = ranked[col].apply(
                lambda x: round(float(x), 2) if x != "-" and pd.notna(x) else x
            )
    
    # 为模型添加链接和高亮样式
    def format_model_name(row):
        model_name = row['model']
        link = MODEL_LINKS.get(model_name, "")
        
        # 根据类别决定是否高亮
        if row['category'] == CATEGORY_TO_HIGHLIGHT:
            display_name = f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {model_name}</span>'
        else:
            display_name = model_name
        
        # 如果有链接,包装成<a>标签
        if link and link.strip():
            return f'<a href="{link}" target="_blank" style="text-decoration: none;">{display_name}</a>'
        else:
            # 没有链接时,为将来添加链接做准备(可以添加点击事件等)
            return f'<span class="model-name" data-model="{model_name}">{display_name}</span>'
    
    ranked['model'] = ranked.apply(format_model_name, axis=1)
    
    return ranked

def filter_data(search_text: str, selected_categories: list):
    df = load_leaderboard()
    
    if search_text.strip():
        df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
    
    if selected_categories:
        df = df[df['category'].isin(selected_categories)]
    
    ranked_df = make_ranked(df)
    return ranked_df

def create_leaderboard_tab():
    with gr.Tab("🏆Leaderboard"):
        with gr.Row():
            with gr.Column(scale=1):
                search_box = gr.Textbox(
                    label="Model Search", 
                    placeholder="Entering model name to search...",
                    value=""
                )
            with gr.Column(scale=2):
                category_checkboxes = gr.CheckboxGroup(
                    label="Model Categories",
                    choices=list(MODEL_CATEGORIES.keys()),
                    value=list(MODEL_CATEGORIES.keys())
                )
        
        # 初始化数据(不使用样式)
        initial_df = make_ranked(load_leaderboard())
        
        # 获取列数据类型,将 model 列设置为 html
        column_count = len(initial_df.columns)
        datatypes = ["str"] * column_count
        model_col_index = initial_df.columns.get_loc('model')
        datatypes[model_col_index] = "html"
        
        # 创建 Dataframe 组件
        table = gr.Dataframe(
            value=initial_df,
            datatype=datatypes,  # 设置数据类型,model 列为 html
            max_height=600,  # 设置表格最大高度  
            show_label=False,  # 不显示标签
            elem_id="leaderboard_table",  # 添加元素ID
            interactive=False,  # 禁用编辑功能
            wrap=False,  # 不换行
            column_widths=["80px", "350px", "100px", "100px", "100px", "100px", "100px", "100px", "100px", "200px", "150px"]  # 设置各列宽度,model列设置为350px
        )

        def update_display(search_text, selected_categories):
            df = filter_data(search_text, selected_categories)
            return df

        # 绑定搜索框和复选框的变化事件
        search_box.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        category_checkboxes.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        
        # 在底部添加说明
        with gr.Row():
            gr.Markdown(f"""
            ### 📊 Column Descriptions
            - **Rank**: Model ranking based on overall score
            - **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
            - **overall**: Overall Score (weighted average of all metrics)
            - **comp.**: Comprehensiveness - How thorough and complete the research is
            - **insight**: Insight Quality - Depth and value of analysis
            - **inst.**: Instruction Following - Adherence to user instructions
            - **read.**: Readability - Clarity and organization of content
            - **c.acc.**: Citation Accuracy - Correctness of references
            - **eff.c.**: Effective Citations - Relevance and quality of sources
            - **category**: Model category
            - **license_type**: The software license type of the model/service
            
            💡 **Tip**: Model names are clickable when links are available. Visit the GitHub repositories for more details!
            """)
        
    return search_box