Spaces:
Running
Running
File size: 740 Bytes
8474f02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from .base_benchmark import BaseBenchmark, BenchmarkResult
from .mmlu_benchmark import MMLUBenchmark
from .gsm8k_benchmark import GSM8KBenchmark
from .humaneval_benchmark import HumanEvalBenchmark
from .gpqa_benchmark import GPQABenchmark
from .math_benchmark import MATHBenchmark
BENCHMARK_REGISTRY = {
'mmlu': MMLUBenchmark,
'gsm8k': GSM8KBenchmark,
'humaneval': HumanEvalBenchmark,
'gpqa': GPQABenchmark,
'math': MATHBenchmark
}
def get_benchmark(name: str) -> BaseBenchmark:
"""Get benchmark instance by name"""
if name.lower() not in BENCHMARK_REGISTRY:
raise ValueError(f"Unknown benchmark: {name}. Available: {list(BENCHMARK_REGISTRY.keys())}")
return BENCHMARK_REGISTRY[name.lower()]() |