davidpomerenke commited on
Commit
f88768f
·
verified ·
1 Parent(s): 95c4e14

Upload from GitHub Actions: Update evaluation results

Browse files
Files changed (3) hide show
  1. languages.json +3 -3
  2. models.json +196 -136
  3. results.json +2 -2
languages.json CHANGED
@@ -1291,7 +1291,7 @@
1291
  "family":"Indo-European",
1292
  "flores_path":"cat_Latn",
1293
  "fleurs_tag":"ca_es",
1294
- "commonvoice_hours":2883.0,
1295
  "commonvoice_locale":"ca",
1296
  "in_benchmark":true
1297
  },
@@ -2167,7 +2167,7 @@
2167
  "family":"Indo-European",
2168
  "flores_path":"glg_Latn",
2169
  "fleurs_tag":"gl_es",
2170
- "commonvoice_hours":164.0,
2171
  "commonvoice_locale":"gl",
2172
  "in_benchmark":true
2173
  },
@@ -3559,7 +3559,7 @@
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
- "commonvoice_hours":106.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
 
1291
  "family":"Indo-European",
1292
  "flores_path":"cat_Latn",
1293
  "fleurs_tag":"ca_es",
1294
+ "commonvoice_hours":2884.0,
1295
  "commonvoice_locale":"ca",
1296
  "in_benchmark":true
1297
  },
 
2167
  "family":"Indo-European",
2168
  "flores_path":"glg_Latn",
2169
  "fleurs_tag":"gl_es",
2170
+ "commonvoice_hours":166.0,
2171
  "commonvoice_locale":"gl",
2172
  "in_benchmark":true
2173
  },
 
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
+ "commonvoice_hours":108.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
models.json CHANGED
@@ -19,26 +19,6 @@
19
  "mgsm"
20
  ]
21
  },
22
- {
23
- "id":"anthropic\/claude-3-haiku",
24
- "name":"Claude 3 Haiku",
25
- "provider_name":"Anthropic",
26
- "cost":1.25,
27
- "hf_id":null,
28
- "size":null,
29
- "type":"closed-source",
30
- "license":null,
31
- "creation_date":1710288000000,
32
- "tasks":[
33
- "translation_from",
34
- "translation_to",
35
- "classification",
36
- "mmlu",
37
- "arc",
38
- "truthfulqa",
39
- "mgsm"
40
- ]
41
- },
42
  {
43
  "id":"anthropic\/claude-3.7-sonnet",
44
  "name":"Claude 3.7 Sonnet",
@@ -80,35 +60,15 @@
80
  ]
81
  },
82
  {
83
- "id":"arliai\/qwq-32b-arliai-rpr-v1",
84
- "name":"QwQ 32B RpR v1",
85
- "provider_name":"ArliAI",
86
- "cost":0.0,
87
- "hf_id":"ArliAI\/QwQ-32B-ArliAI-RpR-v1",
88
- "size":32763876352.0,
89
- "type":"open-source",
90
- "license":"Apache 2.0",
91
- "creation_date":1743984000000,
92
- "tasks":[
93
- "translation_from",
94
- "translation_to",
95
- "classification",
96
- "mmlu",
97
- "arc",
98
- "truthfulqa",
99
- "mgsm"
100
- ]
101
- },
102
- {
103
- "id":"cohere\/command-r-08-2024",
104
- "name":"Command R (08-2024)",
105
  "provider_name":"Cohere",
106
- "cost":0.6,
107
  "hf_id":null,
108
  "size":null,
109
  "type":"closed-source",
110
  "license":null,
111
- "creation_date":1724976000000,
112
  "tasks":[
113
  "translation_from",
114
  "translation_to",
@@ -179,46 +139,6 @@
179
  "mgsm"
180
  ]
181
  },
182
- {
183
- "id":"deepseek\/deepseek-r1",
184
- "name":"R1",
185
- "provider_name":"DeepSeek",
186
- "cost":0.0,
187
- "hf_id":"deepseek-ai\/DeepSeek-R1",
188
- "size":684531386000.0,
189
- "type":"open-source",
190
- "license":"Mit",
191
- "creation_date":1737331200000,
192
- "tasks":[
193
- "translation_from",
194
- "translation_to",
195
- "classification",
196
- "mmlu",
197
- "arc",
198
- "truthfulqa",
199
- "mgsm"
200
- ]
201
- },
202
- {
203
- "id":"deepseek\/deepseek-r1-0528-qwen3-8b",
204
- "name":"Deepseek R1 0528 Qwen3 8B",
205
- "provider_name":"DeepSeek",
206
- "cost":0.0,
207
- "hf_id":"deepseek-ai\/DeepSeek-R1-0528-Qwen3-8B",
208
- "size":8190735360.0,
209
- "type":"open-source",
210
- "license":"Mit",
211
- "creation_date":1748476800000,
212
- "tasks":[
213
- "translation_from",
214
- "translation_to",
215
- "classification",
216
- "mmlu",
217
- "arc",
218
- "truthfulqa",
219
- "mgsm"
220
- ]
221
- },
222
  {
223
  "id":"google\/gemini-2.0-flash-001",
224
  "name":"Gemini 2.0 Flash",
@@ -339,6 +259,26 @@
339
  "mgsm"
340
  ]
341
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  {
343
  "id":"meta-llama\/llama-3.1-70b-instruct",
344
  "name":"Llama 3.1 70B Instruct",
@@ -399,6 +339,26 @@
399
  "mgsm"
400
  ]
401
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  {
403
  "id":"microsoft\/phi-4",
404
  "name":"Phi 4",
@@ -440,15 +400,15 @@
440
  ]
441
  },
442
  {
443
- "id":"mistralai\/mistral-7b-instruct-v0.3",
444
- "name":"Mistral 7B Instruct v0.3",
445
- "provider_name":"Mistral",
446
- "cost":0.05,
447
- "hf_id":"mistralai\/Mistral-7B-Instruct-v0.3",
448
- "size":7248023552.0,
449
- "type":"open-source",
450
- "license":"Apache 2.0",
451
- "creation_date":1716336000000,
452
  "tasks":[
453
  "translation_from",
454
  "translation_to",
@@ -499,6 +459,26 @@
499
  "mgsm"
500
  ]
501
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  {
503
  "id":"mistralai\/mistral-small-3.1-24b-instruct",
504
  "name":"Mistral Small 3.1 24B",
@@ -540,15 +520,15 @@
540
  ]
541
  },
542
  {
543
- "id":"neversleep\/llama-3-lumimaid-70b",
544
- "name":"Llama 3 Lumimaid 70B",
545
- "provider_name":"NeverSleep",
546
- "cost":6.0,
547
- "hf_id":"NeverSleep\/Llama-3-Lumimaid-70B-v0.1",
548
- "size":70553706496.0,
549
  "type":"open-source",
550
- "license":"Cc By Nc 4.0",
551
- "creation_date":1714262400000,
552
  "tasks":[
553
  "translation_from",
554
  "translation_to",
@@ -560,15 +540,75 @@
560
  ]
561
  },
562
  {
563
- "id":"nvidia\/llama-3.1-nemotron-70b-instruct",
564
- "name":"Llama 3.1 Nemotron 70B Instruct",
565
- "provider_name":"NVIDIA",
566
- "cost":0.3,
567
- "hf_id":"nvidia\/Llama-3.1-Nemotron-70B-Instruct-HF",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
  "size":70553706496.0,
569
  "type":"open-source",
570
- "license":"Llama3.1",
571
- "creation_date":1728691200000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572
  "tasks":[
573
  "translation_from",
574
  "translation_to",
@@ -740,15 +780,15 @@
740
  ]
741
  },
742
  {
743
- "id":"qwen\/qwen-2.5-coder-32b-instruct",
744
- "name":"Qwen2.5 Coder 32B Instruct",
745
- "provider_name":"Qwen2.5 Coder 32B Instruct (free)",
746
  "cost":0.0,
747
- "hf_id":"Qwen\/Qwen2.5-Coder-32B-Instruct",
748
- "size":32763876352.0,
749
  "type":"open-source",
750
  "license":"Apache 2.0",
751
- "creation_date":1730851200000,
752
  "tasks":[
753
  "translation_from",
754
  "translation_to",
@@ -819,6 +859,26 @@
819
  "mgsm"
820
  ]
821
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  {
823
  "id":"scb10x\/llama3.1-typhoon2-70b-instruct",
824
  "name":"Typhoon2 70B Instruct",
@@ -840,15 +900,15 @@
840
  ]
841
  },
842
  {
843
- "id":"tencent\/hunyuan-a13b-instruct",
844
- "name":"Hunyuan A13B Instruct",
845
- "provider_name":"Tencent",
846
  "cost":0.0,
847
- "hf_id":"tencent\/Hunyuan-A13B-Instruct",
848
- "size":80393183232.0,
849
  "type":"open-source",
850
- "license":"Other",
851
- "creation_date":1750809600000,
852
  "tasks":[
853
  "translation_from",
854
  "translation_to",
@@ -860,15 +920,15 @@
860
  ]
861
  },
862
  {
863
- "id":"thedrummer\/anubis-pro-105b-v1",
864
- "name":"Anubis Pro 105B V1",
865
- "provider_name":"TheDrummer",
866
- "cost":1.0,
867
- "hf_id":"TheDrummer\/Anubis-Pro-105B-v1",
868
- "size":104779882496.0,
869
- "type":"open-source",
870
- "license":"Other",
871
- "creation_date":1738454400000,
872
  "tasks":[
873
  "translation_from",
874
  "translation_to",
@@ -900,15 +960,15 @@
900
  ]
901
  },
902
  {
903
- "id":"z-ai\/glm-4.5v",
904
- "name":"GLM 4.5V",
905
  "provider_name":"Z.AI",
906
- "cost":1.8,
907
- "hf_id":"zai-org\/GLM-4.5V",
908
- "size":107710933120.0,
909
  "type":"open-source",
910
  "license":"Mit",
911
- "creation_date":1754784000000,
912
  "tasks":[
913
  "translation_from",
914
  "translation_to",
 
19
  "mgsm"
20
  ]
21
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  {
23
  "id":"anthropic\/claude-3.7-sonnet",
24
  "name":"Claude 3.7 Sonnet",
 
60
  ]
61
  },
62
  {
63
+ "id":"cohere\/command-r-plus-04-2024",
64
+ "name":"Command R+ (04-2024)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "provider_name":"Cohere",
66
+ "cost":15.0,
67
  "hf_id":null,
68
  "size":null,
69
  "type":"closed-source",
70
  "license":null,
71
+ "creation_date":1712016000000,
72
  "tasks":[
73
  "translation_from",
74
  "translation_to",
 
139
  "mgsm"
140
  ]
141
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  {
143
  "id":"google\/gemini-2.0-flash-001",
144
  "name":"Gemini 2.0 Flash",
 
259
  "mgsm"
260
  ]
261
  },
262
+ {
263
+ "id":"meta-llama\/llama-3-8b-instruct",
264
+ "name":"Llama 3 8B Instruct",
265
+ "provider_name":"Meta",
266
+ "cost":0.06,
267
+ "hf_id":"meta-llama\/Meta-Llama-3-8B-Instruct",
268
+ "size":8030261248.0,
269
+ "type":"open-source",
270
+ "license":"Llama3",
271
+ "creation_date":1713312000000,
272
+ "tasks":[
273
+ "translation_from",
274
+ "translation_to",
275
+ "classification",
276
+ "mmlu",
277
+ "arc",
278
+ "truthfulqa",
279
+ "mgsm"
280
+ ]
281
+ },
282
  {
283
  "id":"meta-llama\/llama-3.1-70b-instruct",
284
  "name":"Llama 3.1 70B Instruct",
 
339
  "mgsm"
340
  ]
341
  },
342
+ {
343
+ "id":"meta-llama\/llama-guard-3-8b",
344
+ "name":"Llama Guard 3 8B",
345
+ "provider_name":"Llama Guard 3 8B",
346
+ "cost":0.06,
347
+ "hf_id":"meta-llama\/Llama-Guard-3-8B",
348
+ "size":8030261248.0,
349
+ "type":"open-source",
350
+ "license":"Llama3.1",
351
+ "creation_date":1721606400000,
352
+ "tasks":[
353
+ "translation_from",
354
+ "translation_to",
355
+ "classification",
356
+ "mmlu",
357
+ "arc",
358
+ "truthfulqa",
359
+ "mgsm"
360
+ ]
361
+ },
362
  {
363
  "id":"microsoft\/phi-4",
364
  "name":"Phi 4",
 
400
  ]
401
  },
402
  {
403
+ "id":"microsoft\/wizardlm-2-8x22b",
404
+ "name":"WizardLM-2 8x22B",
405
+ "provider_name":"WizardLM-2 8x22B",
406
+ "cost":0.48,
407
+ "hf_id":null,
408
+ "size":null,
409
+ "type":"closed-source",
410
+ "license":null,
411
+ "creation_date":1713225600000,
412
  "tasks":[
413
  "translation_from",
414
  "translation_to",
 
459
  "mgsm"
460
  ]
461
  },
462
+ {
463
+ "id":"mistralai\/mistral-small-24b-instruct-2501",
464
+ "name":"Mistral Small 3",
465
+ "provider_name":"Mistral",
466
+ "cost":0.0,
467
+ "hf_id":"mistralai\/Mistral-Small-24B-Instruct-2501",
468
+ "size":23572403200.0,
469
+ "type":"open-source",
470
+ "license":"Apache 2.0",
471
+ "creation_date":1738022400000,
472
+ "tasks":[
473
+ "translation_from",
474
+ "translation_to",
475
+ "classification",
476
+ "mmlu",
477
+ "arc",
478
+ "truthfulqa",
479
+ "mgsm"
480
+ ]
481
+ },
482
  {
483
  "id":"mistralai\/mistral-small-3.1-24b-instruct",
484
  "name":"Mistral Small 3.1 24B",
 
520
  ]
521
  },
522
  {
523
+ "id":"nousresearch\/deephermes-3-llama-3-8b-preview",
524
+ "name":"DeepHermes 3 Llama 3 8B Preview",
525
+ "provider_name":"Nous",
526
+ "cost":0.0,
527
+ "hf_id":"NousResearch\/DeepHermes-3-Llama-3-8B-Preview",
528
+ "size":8030261248.0,
529
  "type":"open-source",
530
+ "license":"Llama3",
531
+ "creation_date":1739318400000,
532
  "tasks":[
533
  "translation_from",
534
  "translation_to",
 
540
  ]
541
  },
542
  {
543
+ "id":"nousresearch\/hermes-2-pro-llama-3-8b",
544
+ "name":"Hermes 2 Pro - Llama-3 8B",
545
+ "provider_name":"NousResearch",
546
+ "cost":0.04,
547
+ "hf_id":"NousResearch\/Hermes-2-Pro-Llama-3-8B",
548
+ "size":8030523392.0,
549
+ "type":"open-source",
550
+ "license":"Llama3",
551
+ "creation_date":1714435200000,
552
+ "tasks":[
553
+ "translation_from",
554
+ "translation_to",
555
+ "classification",
556
+ "mmlu",
557
+ "arc",
558
+ "truthfulqa",
559
+ "mgsm"
560
+ ]
561
+ },
562
+ {
563
+ "id":"nousresearch\/hermes-3-llama-3.1-405b",
564
+ "name":"Hermes 3 405B Instruct",
565
+ "provider_name":"Nous",
566
+ "cost":0.8,
567
+ "hf_id":"NousResearch\/Hermes-3-Llama-3.1-405B",
568
+ "size":405853388800.0,
569
+ "type":"open-source",
570
+ "license":"Llama3",
571
+ "creation_date":1723507200000,
572
+ "tasks":[
573
+ "translation_from",
574
+ "translation_to",
575
+ "classification",
576
+ "mmlu",
577
+ "arc",
578
+ "truthfulqa",
579
+ "mgsm"
580
+ ]
581
+ },
582
+ {
583
+ "id":"nousresearch\/hermes-3-llama-3.1-70b",
584
+ "name":"Hermes 3 70B Instruct",
585
+ "provider_name":"Nous",
586
+ "cost":0.28,
587
+ "hf_id":"NousResearch\/Hermes-3-Llama-3.1-70B",
588
  "size":70553706496.0,
589
  "type":"open-source",
590
+ "license":"Llama3",
591
+ "creation_date":1722211200000,
592
+ "tasks":[
593
+ "translation_from",
594
+ "translation_to",
595
+ "classification",
596
+ "mmlu",
597
+ "arc",
598
+ "truthfulqa",
599
+ "mgsm"
600
+ ]
601
+ },
602
+ {
603
+ "id":"openai\/gpt-3.5-turbo-0613",
604
+ "name":"GPT-3.5 Turbo (older v0613)",
605
+ "provider_name":"OpenAI",
606
+ "cost":2.0,
607
+ "hf_id":null,
608
+ "size":null,
609
+ "type":"closed-source",
610
+ "license":null,
611
+ "creation_date":1706140800000,
612
  "tasks":[
613
  "translation_from",
614
  "translation_to",
 
780
  ]
781
  },
782
  {
783
+ "id":"openai\/gpt-oss-20b",
784
+ "name":"gpt-oss-20b",
785
+ "provider_name":"OpenAI",
786
  "cost":0.0,
787
+ "hf_id":"openai\/gpt-oss-20b",
788
+ "size":21511953984.0,
789
  "type":"open-source",
790
  "license":"Apache 2.0",
791
+ "creation_date":1754265600000,
792
  "tasks":[
793
  "translation_from",
794
  "translation_to",
 
859
  "mgsm"
860
  ]
861
  },
862
+ {
863
+ "id":"sao10k\/l3-lunaris-8b",
864
+ "name":"Llama 3 8B Lunaris",
865
+ "provider_name":"Sao10K",
866
+ "cost":0.05,
867
+ "hf_id":"Sao10K\/L3-8B-Lunaris-v1",
868
+ "size":8030261248.0,
869
+ "type":"open-source",
870
+ "license":"Llama3",
871
+ "creation_date":1719360000000,
872
+ "tasks":[
873
+ "translation_from",
874
+ "translation_to",
875
+ "classification",
876
+ "mmlu",
877
+ "arc",
878
+ "truthfulqa",
879
+ "mgsm"
880
+ ]
881
+ },
882
  {
883
  "id":"scb10x\/llama3.1-typhoon2-70b-instruct",
884
  "name":"Typhoon2 70B Instruct",
 
900
  ]
901
  },
902
  {
903
+ "id":"shisa-ai\/shisa-v2-llama3.3-70b",
904
+ "name":"Shisa V2 Llama 3.3 70B ",
905
+ "provider_name":"Shisa AI",
906
  "cost":0.0,
907
+ "hf_id":"shisa-ai\/shisa-v2-llama3.3-70b",
908
+ "size":70553706496.0,
909
  "type":"open-source",
910
+ "license":"Llama3.3",
911
+ "creation_date":1744502400000,
912
  "tasks":[
913
  "translation_from",
914
  "translation_to",
 
920
  ]
921
  },
922
  {
923
+ "id":"x-ai\/grok-2-vision-1212",
924
+ "name":"Grok 2 Vision 1212",
925
+ "provider_name":"xAI",
926
+ "cost":10.0,
927
+ "hf_id":null,
928
+ "size":null,
929
+ "type":"closed-source",
930
+ "license":null,
931
+ "creation_date":1734220800000,
932
  "tasks":[
933
  "translation_from",
934
  "translation_to",
 
960
  ]
961
  },
962
  {
963
+ "id":"z-ai\/glm-4.5",
964
+ "name":"GLM 4.5",
965
  "provider_name":"Z.AI",
966
+ "cost":1.32,
967
+ "hf_id":"zai-org\/GLM-4.5",
968
+ "size":358337791296.0,
969
  "type":"open-source",
970
  "license":"Mit",
971
+ "creation_date":1752969600000,
972
  "tasks":[
973
  "translation_from",
974
  "translation_to",
results.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:649509b8373b76e51a79809fdab77badff44e5536ca3bd8e3eb409f406b6ecda
3
- size 13260774
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3425d95dc42dec7d32104ca836a9d82d256d6f4ca4cc4971bab4a43339eb3090
3
+ size 15266201