File size: 740 Bytes
8474f02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from .base_benchmark import BaseBenchmark, BenchmarkResult
from .mmlu_benchmark import MMLUBenchmark
from .gsm8k_benchmark import GSM8KBenchmark
from .humaneval_benchmark import HumanEvalBenchmark
from .gpqa_benchmark import GPQABenchmark
from .math_benchmark import MATHBenchmark

BENCHMARK_REGISTRY = {
    'mmlu': MMLUBenchmark,
    'gsm8k': GSM8KBenchmark,
    'humaneval': HumanEvalBenchmark,
    'gpqa': GPQABenchmark,
    'math': MATHBenchmark
}

def get_benchmark(name: str) -> BaseBenchmark:
    """Get benchmark instance by name"""
    if name.lower() not in BENCHMARK_REGISTRY:
        raise ValueError(f"Unknown benchmark: {name}. Available: {list(BENCHMARK_REGISTRY.keys())}")
    
    return BENCHMARK_REGISTRY[name.lower()]()