Spaces:

Ayanami0730
/

DeepResearch-Leaderboard

Running

File size: 6,109 Bytes

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import os
import csv
from pathlib import Path
from collections import defaultdict


def parse_race_result(race_result_file):
    """解析race_result.txt文件获取各维度分数"""
    scores = {}
    
    with open(race_result_file, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = float(value.strip())
                
                if key == 'Comprehensiveness':
                    scores['comprehensiveness'] = value * 100
                elif key == 'Insight':
                    scores['insight'] = value * 100
                elif key == 'Instruction Following':
                    scores['instruction_following'] = value * 100
                elif key == 'Readability':
                    scores['readability'] = value * 100
                elif key == 'Overall Score':
                    scores['overall_score'] = value * 100
    
    return scores


def parse_fact_result(fact_result_file):
    """解析fact_result.txt文件获取引用相关指标"""
    citation_scores = {}
    
    if not fact_result_file.exists():
        return citation_scores
    
    with open(fact_result_file, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = float(value.strip())
                
                if key == 'valid_rate':
                    citation_scores['citation_accuracy'] = value * 100
                elif key == 'total_valid_citations':
                    citation_scores['effective_citations'] = value
                elif key == 'supported_per_task':
                    citation_scores['effective_citations'] = value
    
    return citation_scores


def process_model_data(model_dir):
    """处理单个模型文件夹的数据"""
    model_name = model_dir.name
    race_result_file = model_dir / "race_result.txt"
    
    if not race_result_file.exists():
        print(f"警告: 模型 {model_name} 的文件夹中未找到 race_result.txt")
        return None
    
    print(f"正在处理模型: {model_name}")
    
    try:
        scores = parse_race_result(race_result_file)
        
        if not scores:
            print(f"  - 警告: 未能解析到有效分数")
            return None
        
        # 查找对应的fact_result.txt文件
        project_root = Path(__file__).parent.parent
        fact_results_dir = project_root / "data" / "fact_results"
        fact_result_file = fact_results_dir / model_name / "fact_result.txt"
        
        citation_scores = parse_fact_result(fact_result_file)
        
        if citation_scores:
            print(f"  - 总分: {scores['overall_score']:.2f}, 引用准确率: {citation_scores.get('citation_accuracy', 'N/A'):.2f}%, 有效引用数: {citation_scores.get('effective_citations', 'N/A')}")
        else:
            print(f"  - 总分: {scores['overall_score']:.2f}, 引用数据: 未找到")
        
        result = {
            'model': model_name,
            'overall_score': scores['overall_score'],
            'comprehensiveness': scores['comprehensiveness'],
            'insight': scores['insight'],
            'instruction_following': scores['instruction_following'],
            'readability': scores['readability'],
            'citation_accuracy': citation_scores.get('citation_accuracy', None),
            'effective_citations': citation_scores.get('effective_citations', None)
        }
        
        return result
        
    except Exception as e:
        print(f"  - 错误: 处理文件时出错: {e}")
        return None


def rank_leaderboard():
    """计算排行榜并保存到CSV"""
    project_root = Path(__file__).parent.parent
    input_dir = project_root / "data" / "raw_results"
    output_file = project_root / "data" / "leaderboard.csv"
    
    model_dirs = [d for d in input_dir.iterdir() if d.is_dir()]
    print(f"找到 {len(model_dirs)} 个模型文件夹")
    
    if not model_dirs:
        print("未找到任何模型文件夹")
        return
    
    model_results = []
    for model_dir in model_dirs:
        try:
            result = process_model_data(model_dir)
            if result:
                model_results.append(result)
        except Exception as e:
            print(f"处理文件夹 {model_dir.name} 时出错: {e}")
            continue
    
    # 按overall_score排序
    model_results.sort(key=lambda x: x['overall_score'], reverse=True)
    
    # 写入CSV文件
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['model', 'overall_score', 'comprehensiveness', 'insight', 'instruction_following', 'readability', 'citation_accuracy', 'effective_citations']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        
        for result in model_results:
            # 格式化数值，对于None值使用"-"
            row = {
                'model': result['model'],
                'overall_score': f"{result['overall_score']:.2f}",
                'comprehensiveness': f"{result['comprehensiveness']:.2f}",
                'insight': f"{result['insight']:.2f}",
                'instruction_following': f"{result['instruction_following']:.2f}",
                'readability': f"{result['readability']:.2f}",
                'citation_accuracy': f"{result['citation_accuracy']:.2f}" if result['citation_accuracy'] is not None else "-",
                'effective_citations': f"{result['effective_citations']:.2f}" if result['effective_citations'] is not None else "-"
            }
            writer.writerow(row)
    
    print(f"\n排行榜已保存到: {output_file}")
    print(f"共处理了 {len(model_results)} 个模型")


if __name__ == "__main__":
    rank_leaderboard()
    print("排行榜计算完成！")