File size: 6,109 Bytes
927e909
 
 
 
 
 
 
 
 
 
1d11ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927e909
 
1d11ffb
 
 
927e909
1d11ffb
 
927e909
1d11ffb
927e909
1d11ffb
 
 
 
 
927e909
1d11ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927e909
1d11ffb
927e909
1d11ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927e909
 
 
 
 
 
 
 
1d11ffb
 
927e909
1d11ffb
 
927e909
 
 
1d11ffb
927e909
1d11ffb
 
 
927e909
1d11ffb
927e909
 
1d11ffb
927e909
 
 
 
1d11ffb
927e909
 
 
 
 
1d11ffb
 
927e909
 
 
 
 
1d11ffb
 
 
 
 
927e909
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import os
import csv
from pathlib import Path
from collections import defaultdict


def parse_race_result(race_result_file):
    """解析race_result.txt文件获取各维度分数"""
    scores = {}
    
    with open(race_result_file, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = float(value.strip())
                
                if key == 'Comprehensiveness':
                    scores['comprehensiveness'] = value * 100
                elif key == 'Insight':
                    scores['insight'] = value * 100
                elif key == 'Instruction Following':
                    scores['instruction_following'] = value * 100
                elif key == 'Readability':
                    scores['readability'] = value * 100
                elif key == 'Overall Score':
                    scores['overall_score'] = value * 100
    
    return scores


def parse_fact_result(fact_result_file):
    """解析fact_result.txt文件获取引用相关指标"""
    citation_scores = {}
    
    if not fact_result_file.exists():
        return citation_scores
    
    with open(fact_result_file, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = float(value.strip())
                
                if key == 'valid_rate':
                    citation_scores['citation_accuracy'] = value * 100
                elif key == 'total_valid_citations':
                    citation_scores['effective_citations'] = value
                elif key == 'supported_per_task':
                    citation_scores['effective_citations'] = value
    
    return citation_scores


def process_model_data(model_dir):
    """处理单个模型文件夹的数据"""
    model_name = model_dir.name
    race_result_file = model_dir / "race_result.txt"
    
    if not race_result_file.exists():
        print(f"警告: 模型 {model_name} 的文件夹中未找到 race_result.txt")
        return None
    
    print(f"正在处理模型: {model_name}")
    
    try:
        scores = parse_race_result(race_result_file)
        
        if not scores:
            print(f"  - 警告: 未能解析到有效分数")
            return None
        
        # 查找对应的fact_result.txt文件
        project_root = Path(__file__).parent.parent
        fact_results_dir = project_root / "data" / "fact_results"
        fact_result_file = fact_results_dir / model_name / "fact_result.txt"
        
        citation_scores = parse_fact_result(fact_result_file)
        
        if citation_scores:
            print(f"  - 总分: {scores['overall_score']:.2f}, 引用准确率: {citation_scores.get('citation_accuracy', 'N/A'):.2f}%, 有效引用数: {citation_scores.get('effective_citations', 'N/A')}")
        else:
            print(f"  - 总分: {scores['overall_score']:.2f}, 引用数据: 未找到")
        
        result = {
            'model': model_name,
            'overall_score': scores['overall_score'],
            'comprehensiveness': scores['comprehensiveness'],
            'insight': scores['insight'],
            'instruction_following': scores['instruction_following'],
            'readability': scores['readability'],
            'citation_accuracy': citation_scores.get('citation_accuracy', None),
            'effective_citations': citation_scores.get('effective_citations', None)
        }
        
        return result
        
    except Exception as e:
        print(f"  - 错误: 处理文件时出错: {e}")
        return None


def rank_leaderboard():
    """计算排行榜并保存到CSV"""
    project_root = Path(__file__).parent.parent
    input_dir = project_root / "data" / "raw_results"
    output_file = project_root / "data" / "leaderboard.csv"
    
    model_dirs = [d for d in input_dir.iterdir() if d.is_dir()]
    print(f"找到 {len(model_dirs)} 个模型文件夹")
    
    if not model_dirs:
        print("未找到任何模型文件夹")
        return
    
    model_results = []
    for model_dir in model_dirs:
        try:
            result = process_model_data(model_dir)
            if result:
                model_results.append(result)
        except Exception as e:
            print(f"处理文件夹 {model_dir.name} 时出错: {e}")
            continue
    
    # 按overall_score排序
    model_results.sort(key=lambda x: x['overall_score'], reverse=True)
    
    # 写入CSV文件
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['model', 'overall_score', 'comprehensiveness', 'insight', 'instruction_following', 'readability', 'citation_accuracy', 'effective_citations']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        
        for result in model_results:
            # 格式化数值,对于None值使用"-"
            row = {
                'model': result['model'],
                'overall_score': f"{result['overall_score']:.2f}",
                'comprehensiveness': f"{result['comprehensiveness']:.2f}",
                'insight': f"{result['insight']:.2f}",
                'instruction_following': f"{result['instruction_following']:.2f}",
                'readability': f"{result['readability']:.2f}",
                'citation_accuracy': f"{result['citation_accuracy']:.2f}" if result['citation_accuracy'] is not None else "-",
                'effective_citations': f"{result['effective_citations']:.2f}" if result['effective_citations'] is not None else "-"
            }
            writer.writerow(row)
    
    print(f"\n排行榜已保存到: {output_file}")
    print(f"共处理了 {len(model_results)} 个模型")


if __name__ == "__main__":
    rank_leaderboard()
    print("排行榜计算完成!")