|
import { Benchmark } from "./types"; |
|
|
|
export const anthropicBenchmarks: Benchmark[] = [ |
|
{ |
|
model: "Claude Opus 4", |
|
provider: "Anthropic", |
|
inputPrice: 15.0, |
|
outputPrice: 75.0, |
|
source: "https://www.anthropic.com/news/claude-4", |
|
benchmark: { |
|
swe_bench_verified: 72.5, |
|
|
|
gpqa_diamond: 79.6, |
|
aime_2025: 75.5, |
|
mmmlu: 88.8, |
|
mmmu: 76.5, |
|
tau_bench_retail: 81.4, |
|
tau_bench_airline: 59.6, |
|
}, |
|
}, |
|
{ |
|
model: "Claude Sonnet 4", |
|
provider: "Anthropic", |
|
inputPrice: 3.0, |
|
outputPrice: 15.0, |
|
source: "https://www.anthropic.com/news/claude-4", |
|
benchmark: { |
|
swe_bench_verified: 72.7, |
|
|
|
gpqa_diamond: 75.4, |
|
aime_2025: 70.5, |
|
mmmlu: 86.5, |
|
mmmu: 74.4, |
|
tau_bench_retail: 80.5, |
|
tau_bench_airline: 60.0, |
|
}, |
|
}, |
|
{ |
|
model: "Claude 3.7 Sonnet (Extended Thinking 64K)", |
|
provider: "Anthropic", |
|
inputPrice: 3.0, |
|
outputPrice: 15.0, |
|
source: "https://www.anthropic.com/news/claude-3-7-sonnet", |
|
benchmark: { |
|
gpqa_diamond: 78.2, |
|
tau_bench_retail: 81.2, |
|
tau_bench_airline: 58.4, |
|
mmmlu: 86.1, |
|
mmmu: 75.0, |
|
aime_24: 61.3, |
|
}, |
|
}, |
|
{ |
|
model: "Claude 3.7 Sonnet (No Extended Thinking)", |
|
provider: "Anthropic", |
|
inputPrice: 3.0, |
|
outputPrice: 15.0, |
|
source: "https://www.anthropic.com/news/claude-3-7-sonnet", |
|
benchmark: { |
|
gpqa_diamond: 68.0, |
|
swe_bench_verified: 62.3, |
|
mmmlu: 83.2, |
|
mmmu: 71.8, |
|
aime_24: 51.7, |
|
}, |
|
}, |
|
{ |
|
model: "Claude 3.5 Sonnet (New)", |
|
provider: "Anthropic", |
|
inputPrice: 3.0, |
|
outputPrice: 15.0, |
|
source: "https://www.anthropic.com/news/claude-3-7-sonnet", |
|
benchmark: { |
|
gpqa_diamond: 65.0, |
|
swe_bench_verified: 49.0, |
|
tau_bench_retail: 71.5, |
|
tau_bench_airline: 48.8, |
|
mmmlu: 82.1, |
|
mmmu: 70.4, |
|
aime_24: 16.0, |
|
}, |
|
}, |
|
{ |
|
model: "Claude 3.5 Haiku", |
|
provider: "Anthropic", |
|
inputPrice: 3.0, |
|
outputPrice: 15.0, |
|
source: "https://www.anthropic.com/news/3-5-models-and-computer-use", |
|
benchmark: { |
|
gpqa_diamond: 41.6, |
|
swe_bench_verified: 49.0, |
|
tau_bench_retail: 51.0, |
|
tau_bench_airline: 22.8, |
|
humaneval: 88.1, |
|
mmmlu: 65.0, |
|
aime_24: 5.3, |
|
}, |
|
}, |
|
{ |
|
model: "Claude 3 Opus", |
|
provider: "Anthropic", |
|
inputPrice: 15.0, |
|
outputPrice: 75.0, |
|
source: "https://www.anthropic.com/news/claude-3-family", |
|
benchmark: { |
|
gpqa_diamond: 50.4, |
|
mmmlu: 86.8, |
|
mmmu: 59.4, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Claude 3 Sonnet", |
|
provider: "Anthropic", |
|
inputPrice: 3.0, |
|
outputPrice: 15.0, |
|
source: "https://www.anthropic.com/news/claude-3-family", |
|
benchmark: { |
|
gpqa_diamond: 40.4, |
|
mmmlu: 79.0, |
|
mmmu: 53.1, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}, |
|
}, |
|
{ |
|
model: "Claude 3 Haiku", |
|
provider: "Anthropic", |
|
inputPrice: 0.25, |
|
outputPrice: 1.25, |
|
source: "https://www.anthropic.com/news/claude-3-family", |
|
benchmark: { |
|
gpqa_diamond: 33.3, |
|
mmmlu: 75.2, |
|
mmmu: 50.2, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}, |
|
}, |
|
]; |
|
|