llm-pricing-calculator

Running

llm-pricing-calculator / src /lib /benchmarks /deepseek.ts

d7502bf 2 days ago

3.94 kB

	import { Benchmark } from "./types";

	export const deepseekBenchmarks: Benchmark[] = [
	{
	model: "DeepSeek-R1-0528",
	provider: "DeepSeek",
	inputPrice: 0.55, // Placeholder, update if pricing becomes available
	outputPrice: 2.19,
	benchmark: {
	aime_24: 91.4,
	aime_2025: 87.5,
	gpqa_diamond: 81.0,
	gpqa: 81.0, // For compatibility; can remove if you want to only use gpqa_diamond
	mmlu_pro: 85.0,
	mmlu: 93.4, // MMLU-Redux assumed to be "mmlu"
	simpleqa: 27.8,
	lcb: 73.3, // LiveCodeBench
	aider_polyglot: 71.6,
	swe_bench_verified: 57.6,
	// Optional or less frequent benchmarks:
	humanitys_last_exam: 17.7,
	// Not in BenchmarkMetric, but useful (commented for type safety):
	// codeforces_div1: 1930,
	// frames: 83.0,
	tau_bench_airline: 53.5,
	tau_bench_retail: 63.9,
	// bfcl_v3_multiturn: 37.0,
	// cnmo_2024: 86.9,
	// hmmt_2025: 79.4,
	},
	source: "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
	},

	{
	model: "DeepSeek-V3-0324",
	provider: "DeepSeek",
	inputPrice: 0.27, // Placeholder — adjust if actual pricing becomes available
	outputPrice: 1.10,
	benchmark: {
	mmlu: 87.1, // From original DeepSeek-V3
	mmlu_pro: 81.2, // Updated in V3-0324
	gpqa: 68.4, // Updated in V3-0324
	gpqa_diamond: 59.1, // From V3
	aime_24: 59.4, // Updated in V3-0324
	lcb: 49.2, // Updated LiveCodeBench
	simpleqa: 24.9, // From V3
	aider_polyglot: 49.6, // From V3
	swe_bench_verified: 42.0 // From V3
	},
	source: "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
	},
	{
	model: "DeepSeek-V3",
	provider: "DeepSeek",
	inputPrice: 0.27, // Placeholder — update if real pricing is known
	outputPrice: 1.10,
	benchmark: {
	mmlu: 87.1,
	mmlu_pro: 64.4,
	// mmlu_redux: 86.2, // Commented: not in BenchmarkMetric
	gpqa_diamond: 59.1,
	simpleqa: 24.9,
	aime_24: 39.2,
	lcb: 37.6, // LiveCodeBench (Pass@1)
	aider_polyglot: 49.6,
	swe_bench_verified: 42.0,

	// Optional or not yet in your schema:
	// humanitys_last_exam: undefined,
	// codeforces: 51.6,
	// drop: 89.0,
	// gsm8k: 89.3,
	// math_em: 61.6,
	// mgsm: 79.8,
	// cmath: 90.7,
	// cruxeval_i: 67.3,
	// cruxeval_o: 69.8,
	// triviaqa: 82.9,
	// naturalquestions: 40.0,
	// agieval: 79.6,
	// hellaSwag: 88.9,
	// piqa: 84.7,
	// winogrande: 84.9,
	},
	source: "https://huggingface.co/deepseek-ai/DeepSeek-V3",
	},
	{
	model: "DeepSeek-R1",
	provider: "DeepSeek",
	inputPrice: 0.55, // Placeholder, update if pricing becomes available
	outputPrice: 2.19,
	benchmark: {
	mmlu: 90.8,
	mmlu_pro: 84.0,
	gpqa_diamond: 71.5,
	simpleqa: 30.1,
	lcb: 65.9, // LiveCodeBench (Pass@1-CoT)
	swe_bench_verified: 49.2,
	aider_polyglot: 53.3,
	aime_24: 79.8,
	// aime_2025: undefined, // not provided
	// gpqa: undefined, // use gpqa_diamond
	// egoschema: undefined,
	// mmmu: undefined,
	// loft: undefined,
	// humanitys_last_exam: undefined, // optional
	},
	source: "https://huggingface.co/deepseek-ai/DeepSeek-R1",
	},
	];