Spaces:

Ayanami0730
/

DeepResearch-Leaderboard

Running

File size: 9,704 Bytes

from __future__ import annotations
import gradio as gr
import pandas as pd
from pathlib import Path
from typing import Union

# 相对于主脚本的路径调整
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_PATH = BASE_DIR / "data" / "leaderboard.csv"

# 用于标注的常量
CATEGORY_TO_HIGHLIGHT = "Deep Research Agent"
HIGHLIGHT_EMOJI = "🚀"

# 列名重命名映射
COLUMN_RENAME_MAP = {
    'overall_score': 'overall',
    'comprehensiveness': 'comp.',
    'insight': 'insight',
    'instruction_following': 'inst.',
    'readability': 'read.',
    'citation_accuracy': 'c.acc.',
    'effective_citations': 'eff.c.'
}

# 模型分类映射
MODEL_CATEGORIES = {
    "Deep Research Agent": [
        "gemini-2.5-pro-deepresearch",
        "grok-deeper-search",
        "openai-deepresearch", 
        "perplexity-Research",
        "doubao-deepresearch",
        "kimi-researcher",
        "claude-research",
        "langchain-open-deep-research"
    ],
    "LLM with Search": [
        "claude-3-7-sonnet-with-search",
        "claude-3-5-sonnet-with-search",
        "sonar-reasoning-pro",
        "sonar-reasoning",
        "sonar-pro",
        "sonar",
        "gemini-2.5-pro-preview-05-06",
        "gpt-4o-search-preview",
        "gpt-4.1",
        "gemini-2.5-flash-preview-04-17",
        "gpt-4o-mini-search-preview",
        "nvidia-aiq-research-assistant",
        "gpt-4.1-mini"
    ]
}

# 模型链接映射（目前都设置为空，可以后续添加具体链接）
MODEL_LINKS = {
    # Deep Research Agent
    "gemini-2.5-pro-deepresearch": "https://gemini.google/overview/deep-research/",
    "grok-deeper-search": "https://x.ai/news/grok-3",
    "openai-deepresearch": "https://openai.com/zh-Hans-CN/index/introducing-deep-research/",
    "perplexity-Research": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research",
    "doubao-deepresearch": "https://www.doubao.com/chat/",
    "kimi-researcher": "https://moonshotai.github.io/Kimi-Researcher/",
    "claude-research": "https://www.anthropic.com/news/research",
    "nvidia-aiq-research-assistant": "https://github.com/NVIDIA-AI-Blueprints/aiq-research-assistant",
    "langchain-open-deep-research": "https://github.com/langchain-ai/open_deep_research",
    
    # LLM with Search
    "claude-3-7-sonnet-with-search": "",
    "claude-3-5-sonnet-with-search": "",
    "sonar-reasoning-pro": "",
    "sonar-reasoning": "",
    "sonar-pro": "",
    "sonar": "",
    "gemini-2.5-pro-preview-05-06": "",
    "gpt-4o-search-preview": "",
    "gpt-4.1": "",
    "gemini-2.5-flash-preview-04-17": "",
    "gpt-4o-mini-search-preview": "",
    "gpt-4.1-mini": ""
}

# 模型许可证类型映射
MODEL_LICENSE_TYPE = {
    # Deep Research Agent
    "gemini-2.5-pro-deepresearch": "Proprietary",
    "grok-deeper-search": "Proprietary", 
    "openai-deepresearch": "Proprietary",
    "perplexity-Research": "Proprietary",
    "doubao-deepresearch": "Proprietary",
    "kimi-researcher": "Proprietary",
    "claude-research": "Proprietary",
    "nvidia-aiq-research-assistant": "Apache 2.0",
    "langchain-open-deep-research": "MIT",  # 需要确认具体许可证
    
    # LLM with Search
    "claude-3-7-sonnet-with-search": "Proprietary",
    "claude-3-5-sonnet-with-search": "Proprietary",
    "sonar-reasoning-pro": "Proprietary",
    "sonar-reasoning": "Proprietary", 
    "sonar-pro": "Proprietary",
    "sonar": "Proprietary",
    "gemini-2.5-pro-preview-05-06": "Proprietary",
    "gpt-4o-search-preview": "Proprietary",
    "gpt-4.1": "Proprietary",
    "gemini-2.5-flash-preview-04-17": "Proprietary",
    "gpt-4o-mini-search-preview": "Proprietary",
    "gpt-4.1-mini": "Proprietary"
}

def load_leaderboard() -> pd.DataFrame:
    if not DATA_PATH.exists():
        raise FileNotFoundError(
            f"Leaderboard file not found: {DATA_PATH}.\n"
            "→ 先运行 rank_leaderboard.py 生成 data/leaderboard.csv"
        )
    df = pd.read_csv(DATA_PATH)
    df.columns = [c.strip() for c in df.columns]
    
    def get_category(model_name):
        for category, models in MODEL_CATEGORIES.items():
            if model_name in models:
                return category
        return "Others"
    
    def get_license_type(model_name):
        return MODEL_LICENSE_TYPE.get(model_name, "Unknown")
    
    df['category'] = df['model'].apply(get_category)
    df['license_type'] = df['model'].apply(get_license_type)
    return df

def make_ranked(df: pd.DataFrame) -> pd.DataFrame:
    ranked = df.sort_values(by='overall_score', ascending=False).reset_index(drop=True)
    ranked.insert(0, "Rank", range(1, len(ranked) + 1))
    
    # 重命名列名为简写形式
    ranked = ranked.rename(columns=COLUMN_RENAME_MAP)
    
    # 格式化数值列为两位小数，但跳过包含"-"的值
    numeric_columns = ['overall', 'comp.', 'insight', 'inst.', 'read.', 'c.acc.', 'eff.c.']
    for col in numeric_columns:
        if col in ranked.columns:
            # 只对数值进行round操作，保持"-"不变
            ranked[col] = ranked[col].apply(
                lambda x: round(float(x), 2) if x != "-" and pd.notna(x) else x
            )
    
    # 为模型添加链接和高亮样式
    def format_model_name(row):
        model_name = row['model']
        link = MODEL_LINKS.get(model_name, "")
        
        # 根据类别决定是否高亮
        if row['category'] == CATEGORY_TO_HIGHLIGHT:
            display_name = f'<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} {model_name}</span>'
        else:
            display_name = model_name
        
        # 如果有链接，包装成<a>标签
        if link and link.strip():
            return f'<a href="{link}" target="_blank" style="text-decoration: none;">{display_name}</a>'
        else:
            # 没有链接时，为将来添加链接做准备（可以添加点击事件等）
            return f'<span class="model-name" data-model="{model_name}">{display_name}</span>'
    
    ranked['model'] = ranked.apply(format_model_name, axis=1)
    
    return ranked

def filter_data(search_text: str, selected_categories: list):
    df = load_leaderboard()
    
    if search_text.strip():
        df = df[df['model'].str.contains(search_text.strip(), case=False, na=False)]
    
    if selected_categories:
        df = df[df['category'].isin(selected_categories)]
    
    ranked_df = make_ranked(df)
    return ranked_df

def create_leaderboard_tab():
    with gr.Tab("🏆Leaderboard"):
        with gr.Row():
            with gr.Column(scale=1):
                search_box = gr.Textbox(
                    label="Model Search", 
                    placeholder="Entering model name to search...",
                    value=""
                )
            with gr.Column(scale=2):
                category_checkboxes = gr.CheckboxGroup(
                    label="Model Categories",
                    choices=list(MODEL_CATEGORIES.keys()),
                    value=list(MODEL_CATEGORIES.keys())
                )
        
        # 初始化数据（不使用样式）
        initial_df = make_ranked(load_leaderboard())
        
        # 获取列数据类型，将 model 列设置为 html
        column_count = len(initial_df.columns)
        datatypes = ["str"] * column_count
        model_col_index = initial_df.columns.get_loc('model')
        datatypes[model_col_index] = "html"
        
        # 创建 Dataframe 组件
        table = gr.Dataframe(
            value=initial_df,
            datatype=datatypes,  # 设置数据类型，model 列为 html
            max_height=600,  # 设置表格最大高度  
            show_label=False,  # 不显示标签
            elem_id="leaderboard_table",  # 添加元素ID
            interactive=False,  # 禁用编辑功能
            wrap=False,  # 不换行
            column_widths=["80px", "350px", "100px", "100px", "100px", "100px", "100px", "100px", "100px", "200px", "150px"]  # 设置各列宽度，model列设置为350px
        )

        def update_display(search_text, selected_categories):
            df = filter_data(search_text, selected_categories)
            return df

        # 绑定搜索框和复选框的变化事件
        search_box.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        category_checkboxes.change(
            fn=update_display,
            inputs=[search_box, category_checkboxes],
            outputs=table
        )
        
        # 在底部添加说明
        with gr.Row():
            gr.Markdown(f"""
            ### 📊 Column Descriptions
            - **Rank**: Model ranking based on overall score
            - **model**: Model name (<span style="color: #823AFF;">{HIGHLIGHT_EMOJI} = {CATEGORY_TO_HIGHLIGHT}</span>)
            - **overall**: Overall Score (weighted average of all metrics)
            - **comp.**: Comprehensiveness - How thorough and complete the research is
            - **insight**: Insight Quality - Depth and value of analysis
            - **inst.**: Instruction Following - Adherence to user instructions
            - **read.**: Readability - Clarity and organization of content
            - **c.acc.**: Citation Accuracy - Correctness of references
            - **eff.c.**: Effective Citations - Relevance and quality of sources
            - **category**: Model category
            - **license_type**: The software license type of the model/service
            
            💡 **Tip**: Model names are clickable when links are available. Visit the GitHub repositories for more details!
            """)
        
    return search_box