|
import { Benchmark } from "./types"; |
|
|
|
export const googleBenchmarks: Benchmark[] = [ |
|
{ |
|
model: "Gemini 2.5 Pro (Thinking-enabled, <=200k context)", |
|
provider: "Google", |
|
inputPrice: 1.25, |
|
outputPrice: 10.0, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 69.0, |
|
aider_polyglot: 82.2, |
|
swe_bench_verified: 67.2, |
|
gpqa_diamond: 86.4, |
|
aime_2025: 88.0, |
|
humanitys_last_exam: 21.6, |
|
simpleqa: 54.0, |
|
facts_grounding: 87.8, |
|
global_mmlu_lite: 89.2, |
|
mrcr_v2_avg_128k: 58.0, |
|
mrcr_v2_pointwise_1m: 16.4, |
|
mmmu: 82.0, |
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Pro (Thinking-enabled, >200k context)", |
|
provider: "Google", |
|
inputPrice: 2.5, |
|
outputPrice: 15.0, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 69.0, |
|
aider_polyglot: 82.2, |
|
swe_bench_verified: 67.2, |
|
gpqa_diamond: 86.4, |
|
aime_2025: 88.0, |
|
humanitys_last_exam: 21.6, |
|
simpleqa: 54.0, |
|
facts_grounding: 87.8, |
|
global_mmlu_lite: 89.2, |
|
mrcr_v2_avg_128k: 58.0, |
|
mrcr_v2_pointwise_1m: 16.4, |
|
mmmu: 82.0, |
|
}, |
|
}, |
|
|
|
{ |
|
model: "Gemini 2.5 Pro Experimental (03-25)", |
|
provider: "Google", |
|
inputPrice: 1.25, |
|
outputPrice: 10.0, |
|
source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/", |
|
benchmark: { |
|
livecodebench_v6: 70.4, |
|
aider_polyglot: 74.0, |
|
swe_bench_verified: 63.8, |
|
gpqa_diamond: 84.0, |
|
aime_2025: 86.7, |
|
humanitys_last_exam: 18.8, |
|
simpleqa: 52.9, |
|
global_mmlu_lite: 89.8, |
|
mrcr_v2_avg_128k: 94.5, |
|
mrcr_v2_pointwise_1m: 83.1, |
|
mmmu: 81.7, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Pro Preview (05-06)", |
|
provider: "Google", |
|
inputPrice: 1.25, |
|
outputPrice: 10.0, |
|
source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/", |
|
benchmark: { |
|
livecodebench_v6: 75.6, |
|
aider_polyglot: 76.5, |
|
swe_bench_verified: 63.2, |
|
gpqa_diamond: 83.0, |
|
aime_2025: 83.0, |
|
humanitys_last_exam: 17.8, |
|
simpleqa: 50.8, |
|
global_mmlu_lite: 88.6, |
|
mrcr_v2_avg_128k: 93.0, |
|
mrcr_v2_pointwise_1m: 82.9, |
|
mmmu: 79.6, |
|
|
|
|
|
}, |
|
}, |
|
|
|
{ |
|
model: "Gemini 2.5 Flash (Thinking-enabled, default)", |
|
provider: "Google", |
|
inputPrice: 0.30, |
|
outputPrice: 3.5, |
|
source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/", |
|
benchmark: { |
|
livecodebench_v6: 55.4, |
|
aider_polyglot: 56.7, |
|
swe_bench_verified: 60.3, |
|
gpqa_diamond: 82.8, |
|
aime_2025: 72.0, |
|
humanitys_last_exam: 11.0, |
|
simpleqa: 26.9, |
|
facts_grounding: 85.3, |
|
global_mmlu_lite: 88.4, |
|
mrcr_v2_avg_128k: 54.3, |
|
mrcr_v2_pointwise_1m: 21.0, |
|
mmmu: 79.7, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Flash (Non-Thinking)", |
|
provider: "Google", |
|
inputPrice: 0.30, |
|
outputPrice: 2.50, |
|
source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/", |
|
benchmark: { |
|
humanitys_last_exam: 8.4, |
|
gpqa_diamond: 78.3, |
|
aime_2025: 61.6, |
|
livecodebench_v6: 41.1, |
|
aider_polyglot: 44.0, |
|
swe_bench_verified: 50.0, |
|
simpleqa: 25.8, |
|
facts_grounding: 83.4, |
|
mmmu: 76.9, |
|
|
|
mrcr_v2_avg_128k: 34.1, |
|
mrcr_v2_pointwise_1m: 16.8, |
|
global_mmlu_lite: 85.8, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Flash-Lite (Non-Thinking)", |
|
provider: "Google", |
|
inputPrice: 0.10, |
|
outputPrice: 0.40, |
|
source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/", |
|
benchmark: { |
|
humanitys_last_exam: 5.1, |
|
gpqa_diamond: 64.6, |
|
aime_2025: 49.8, |
|
livecodebench_v6: 33.7, |
|
aider_polyglot: 26.7, |
|
swe_bench_verified: 42.6, |
|
simpleqa: 10.7, |
|
facts_grounding: 84.1, |
|
mmmu: 72.9, |
|
|
|
mrcr_v2_avg_128k: 16.6, |
|
mrcr_v2_pointwise_1m: 4.1, |
|
global_mmlu_lite: 81.1, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.5 Flash-Lite (Thinking)", |
|
provider: "Google", |
|
inputPrice: 0.10, |
|
outputPrice: 0.40, |
|
source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/", |
|
benchmark: { |
|
humanitys_last_exam: 6.9, |
|
gpqa_diamond: 66.7, |
|
aime_2025: 63.1, |
|
livecodebench_v6: 34.3, |
|
aider_polyglot: 27.1, |
|
swe_bench_verified: 44.9, |
|
simpleqa: 13.0, |
|
facts_grounding: 86.8, |
|
mmmu: 72.9, |
|
|
|
mrcr_v2_avg_128k: 30.6, |
|
mrcr_v2_pointwise_1m: 5.4, |
|
global_mmlu_lite: 84.5, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 2.0 Flash-Lite", |
|
provider: "Google", |
|
inputPrice: 0.10, |
|
outputPrice: 0.40, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 29.1, |
|
aider_polyglot: 10.5, |
|
swe_bench_verified: 23.1, |
|
gpqa_diamond: 50.5, |
|
aime_2025: 23.8, |
|
humanitys_last_exam: 4.6, |
|
simpleqa: 16.5, |
|
facts_grounding: 82.4, |
|
global_mmlu_lite: 78.0, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini Diffusion", |
|
provider: "Google", |
|
inputPrice: 0, |
|
outputPrice: 0, |
|
source: "https://deepmind.google/models/gemini-diffusion/", |
|
benchmark: { |
|
livecodebench_v6: 30.9, |
|
bigcodebench: 45.4, |
|
lbpp_v2: 56.8, |
|
swe_bench_verified: 22.9, |
|
humaneval: 89.6, |
|
mbpp: 76.0, |
|
gpqa_diamond: 40.4, |
|
aime_2025: 23.3, |
|
bigbench_extra_hard: 15.0, |
|
global_mmlu_lite: 69.1, |
|
}, |
|
}, |
|
|
|
{ |
|
model: "Gemini 2.0 Flash", |
|
provider: "Google", |
|
inputPrice: 0.1, |
|
outputPrice: 0.4, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
aime_2025: 29.7, |
|
gpqa_diamond: 65.2, |
|
simpleqa: 29.9, |
|
global_mmlu_lite: 83.4, |
|
livecodebench_v6: 29.1, |
|
mmmu: 69.3, |
|
facts_grounding: 84.6, |
|
humanitys_last_exam: 5.1, |
|
mrcr_v2_avg_128k: 19.0, |
|
mrcr_v2_pointwise_1m: 5.3, |
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Gemini 1.5 Pro (<=128k context)", |
|
provider: "Google", |
|
inputPrice: 1.25, |
|
outputPrice: 5.00, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 29.7, |
|
aider_polyglot: 16.9, |
|
swe_bench_verified: 34.2, |
|
gpqa_diamond: 58.1, |
|
aime_2025: 17.5, |
|
humanitys_last_exam: 4.6, |
|
simpleqa: 24.9, |
|
facts_grounding: 80.0, |
|
global_mmlu_lite: 80.8, |
|
mrcr_v2_avg_128k: 26.2, |
|
mrcr_v2_pointwise_1m: 12.1, |
|
mmmu: 67.7, |
|
}, |
|
}, |
|
{ |
|
model: "Gemini 1.5 Pro (>128k context)", |
|
provider: "Google", |
|
inputPrice: 2.50, |
|
outputPrice: 10.00, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 29.7, |
|
aider_polyglot: 16.9, |
|
swe_bench_verified: 34.2, |
|
gpqa_diamond: 58.1, |
|
aime_2025: 17.5, |
|
humanitys_last_exam: 4.6, |
|
simpleqa: 24.9, |
|
facts_grounding: 80.0, |
|
global_mmlu_lite: 80.8, |
|
mrcr_v2_avg_128k: 26.2, |
|
mrcr_v2_pointwise_1m: 12.1, |
|
mmmu: 67.7, |
|
}, |
|
}, |
|
|
|
{ |
|
model: "Gemini 1.5 Flash (<=128k context)", |
|
provider: "Google", |
|
inputPrice: 0.075, |
|
outputPrice: 0.30, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 30.3, |
|
aider_polyglot: 2.8, |
|
swe_bench_verified: 19.7, |
|
gpqa_diamond: 50.0, |
|
aime_2025: 14.7, |
|
simpleqa: 8.6, |
|
facts_grounding: 82.9, |
|
global_mmlu_lite: 72.5, |
|
mrcr_v2_avg_128k: 18.4, |
|
mrcr_v2_pointwise_1m: 10.2, |
|
mmmu: 58.3, |
|
}, |
|
}, |
|
{ |
|
model: "Gemini 1.5 Flash (>128k context)", |
|
provider: "Google", |
|
inputPrice: 0.15, |
|
outputPrice: 0.60, |
|
source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf", |
|
benchmark: { |
|
livecodebench_v6: 30.3, |
|
aider_polyglot: 2.8, |
|
swe_bench_verified: 19.7, |
|
gpqa_diamond: 50.0, |
|
aime_2025: 14.7, |
|
simpleqa: 8.6, |
|
facts_grounding: 82.9, |
|
global_mmlu_lite: 72.5, |
|
mrcr_v2_avg_128k: 18.4, |
|
mrcr_v2_pointwise_1m: 10.2, |
|
mmmu: 58.3, |
|
}, |
|
}, |
|
]; |
|
|