diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -8258,17 +8258,6 @@ "type": "Commercial", "license": null, "creation_date": "2024-12-05" - }, - { - "id": "anthropic/claude-3.5-sonnet", - "name": "Claude 3.5 Sonnet", - "provider_name": "Anthropic", - "cost": 15.0, - "hf_id": null, - "size": null, - "type": "Commercial", - "license": null, - "creation_date": "2024-10-22" } ], "scores": [ @@ -8448,22 +8437,6 @@ "score": 0.762029391170019, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.5157250202457466, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.7038347316939249, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", @@ -8640,22 +8613,6 @@ "score": 0.49423240120783246, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.29707776197115804, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.5094838456142188, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", @@ -8832,22 +8789,6 @@ "score": 1.0, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", @@ -9024,22 +8965,6 @@ "score": 0.4599756430080559, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.3793970928219617, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.5384125202333925, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", @@ -9216,22 +9141,6 @@ "score": 0.6372680189651158, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.4453094933864169, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.6148455639799472, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", @@ -9408,22 +9317,6 @@ "score": 0.5314089060682492, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.37752144939477184, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.5494778583745301, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", @@ -9600,22 +9493,6 @@ "score": 0.5216531073745614, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.32060183762800015, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.5258982083606875, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", @@ -9792,22 +9669,6 @@ "score": 0.5072760587388273, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.49084794047865243, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.6606364516519111, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", @@ -9984,22 +9845,6 @@ "score": 0.502364219831564, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.4164593261612853, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.5895619617012146, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", @@ -10176,1065 +10021,969 @@ "score": 0.6798382116037067, "sentence_nr": 0 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.5166723256924997, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.7009262917135753, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.07407154448063642, - "sentence_nr": 1 + "score": 0.36921945860245514, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.43145434527321425, - "sentence_nr": 1 + "score": 0.5602656572610939, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 + "score": 0.3963410285961713, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 + "score": 0.613166190285915, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.390589858528132, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.024459391267874976, - "sentence_nr": 1 + "score": 0.5498108214680063, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.12351824822447692, - "sentence_nr": 1 + "score": 0.33433503990805974, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.46822754470803873, - "sentence_nr": 1 + "score": 0.5465517653500693, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 + "score": 0.37224644590020084, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 + "score": 0.5716341952568125, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2820342917142487, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4031456247133876, - "sentence_nr": 1 + "score": 0.5410881356746259, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.20669086265781264, - "sentence_nr": 1 + "score": 0.4611551555069207, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5076721272198604, - "sentence_nr": 1 + "score": 0.6294324146720465, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.17630490037560695, - "sentence_nr": 1 + "score": 0.19685577478840446, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.48116430160978857, - "sentence_nr": 1 + "score": 0.4551749985589161, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.15611634095633747, - "sentence_nr": 1 + "score": 0.41865363173987147, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5075814499747183, - "sentence_nr": 1 + "score": 0.6246787832833863, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18474062565700086, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4122750002638689, - "sentence_nr": 1 + "score": 0.3944315616553734, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.15412719160788987, - "sentence_nr": 1 + "score": 0.4044350002821056, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.5010353699512481, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.20387261486363278, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4846245724468382, - "sentence_nr": 1 + "score": 0.5937440273149751, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.06647168102389285, - "sentence_nr": 1 + "score": 0.4404175157492415, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.34350832619898364, - "sentence_nr": 1 + "score": 0.6499400950194552, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 + "score": 0.44294247711132617, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 + "score": 0.5915660675216782, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.17077058518804336, - "sentence_nr": 1 + "score": 0.418987297037058, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5022008374701596, - "sentence_nr": 1 + "score": 0.5748791698084322, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.10784756064735967, - "sentence_nr": 1 + "score": 0.2020983719878774, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.4427230465401631, - "sentence_nr": 1 + "score": 0.4110516731487298, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 + "score": 0.3660623361610902, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 + "score": 0.5840165124966731, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.06656213940646748, - "sentence_nr": 1 + "score": 0.3495696951007327, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.38435741328258305, - "sentence_nr": 1 + "score": 0.5497542561082874, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.175396614619324, - "sentence_nr": 1 + "score": 0.4342485684315921, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.49736499605529066, - "sentence_nr": 1 + "score": 0.5862756549666985, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.15154395847232716, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.46053919348995803, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.1609675245202845, - "sentence_nr": 1 + "score": 0.32846260295658253, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5069863833094232, - "sentence_nr": 1 + "score": 0.5234687470369108, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.0626814220834104, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.4041678259311437, - "sentence_nr": 1 + "score": 0.2649283376124583, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.1290514243115152, - "sentence_nr": 1 + "score": 0.3504606692020456, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.4766581477336301, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.07595192904514617, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.41479321394739394, - "sentence_nr": 1 + "score": 0.5696982139616064, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.09735981717515908, - "sentence_nr": 1 + "score": 0.40306183496110326, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.35288934658906385, - "sentence_nr": 1 + "score": 0.6065077241830509, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 + "score": 0.3756985486608933, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 + "score": 0.5991443770283833, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.13012870333257068, - "sentence_nr": 1 + "score": 0.3437925129268647, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3852835519852091, - "sentence_nr": 1 + "score": 0.5496158439811546, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4457795438900481, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3356633416447032, - "sentence_nr": 1 + "score": 0.6608358312257032, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.12422788549118892, - "sentence_nr": 1 + "score": 0.500516497336299, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.40222210564426, - "sentence_nr": 1 + "score": 0.7019407549121803, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4389167617930115, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.28789057461471257, - "sentence_nr": 1 + "score": 0.6283965584123504, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.10467757347424328, - "sentence_nr": 1 + "score": 0.4544549777519972, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.36749853206282146, - "sentence_nr": 1 + "score": 0.6588011478075102, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0952569581727979, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.38264808953110185, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.13026649757585426, - "sentence_nr": 1 + "score": 0.4720654627116666, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.41550755035304077, - "sentence_nr": 1 + "score": 0.6517469394467796, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.13081443497119305, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.039782861678265974, - "sentence_nr": 1 + "score": 0.417733523030983, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1175904695048123, - "sentence_nr": 1 + "score": 0.5333753443479871, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3996881234028031, - "sentence_nr": 1 + "score": 0.6915993702819169, + "sentence_nr": 0 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.16142282195879326, - "sentence_nr": 1 + "score": 0.4892530408936975, + "sentence_nr": 0 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4147043899005278, - "sentence_nr": 1 + "score": 0.6697286007212407, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.11564012893219777, - "sentence_nr": 1 + "score": 0.5009456904181451, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.44599783682350064, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 + "score": 0.6893719644090858, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.12022286401047096, - "sentence_nr": 1 + "score": 0.5011800954736271, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.48279986805368713, - "sentence_nr": 1 + "score": 0.6882325337556615, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.15350377490367967, - "sentence_nr": 1 + "score": 0.42451113499289145, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.47645148444499064, - "sentence_nr": 1 + "score": 0.6326418045965277, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.13714845589364738, - "sentence_nr": 1 + "score": 0.4824471894538444, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.45499281593451946, - "sentence_nr": 1 + "score": 0.6756807439055712, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.47347369701789205, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.38785209659947417, - "sentence_nr": 1 + "score": 0.681786235656136, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.12848168928706002, - "sentence_nr": 1 + "score": 0.5043550869731553, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4421263683867116, - "sentence_nr": 1 + "score": 0.6689462373151898, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.18629760071299903, - "sentence_nr": 1 + "score": 0.28630516999083483, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4381418376415505, - "sentence_nr": 1 + "score": 0.4859658293338903, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.09198045184317984, - "sentence_nr": 1 + "score": 0.4628513442989428, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4598393646838097, - "sentence_nr": 1 + "score": 0.6651586361790265, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.07798530247118374, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4006113700211268, - "sentence_nr": 1 + "score": 0.004953764861294584, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.14541923959059266, - "sentence_nr": 1 + "score": 0.48578120610890896, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.47577612932999147, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.2033664688556054, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5013013732058768, - "sentence_nr": 1 + "score": 0.6418878687312928, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.498687604330117, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2521233582161207, - "sentence_nr": 1 + "score": 0.6495577010231699, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.18273944860385094, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.44261865187418153, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.1921021633645501, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.41090634933708026, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.3182970443542658, - "sentence_nr": 1 + "score": 0.2019984490972421, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5953162569846108, - "sentence_nr": 1 + "score": 0.42969616197156246, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.3026566818840519, - "sentence_nr": 1 + "score": 0.23363375253301555, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5945859352092411, - "sentence_nr": 1 + "score": 0.4539275409654266, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.18816868192268246, - "sentence_nr": 1 + "score": 0.24058995161649158, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5179253053631742, - "sentence_nr": 1 + "score": 0.4522509933949415, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.09629060614977814, - "sentence_nr": 1 + "score": 0.34777225435927045, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.43565498999747165, - "sentence_nr": 1 + "score": 0.5603739447290761, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.0343688963868873, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.2491787368704391, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.28418123342684043, - "sentence_nr": 1 + "score": 0.2488577037192601, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.539816402671069, - "sentence_nr": 1 + "score": 0.4700612059850866, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.3765959322920135, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6295826606382191, - "sentence_nr": 1 + "score": 0.10246901021115776, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.40801269202545287, - "sentence_nr": 1 + "score": 0.26380432026626405, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.6210533025653295, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.2744916220706949, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.542564139748617, - "sentence_nr": 1 + "score": 0.4634992426765033, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0867932999243575, + "score": 0.07407154448063642, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4201964133235075, + "score": 0.43145434527321425, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.08214106568089705, + "score": 0.12903696060775005, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3969463877642616, + "score": 0.456225988032654, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1897299381066278, + "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5086851537953713, + "score": 0.024459391267874976, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.12351824822447692, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3833939462124923, + "score": 0.46822754470803873, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.07793031063789554, + "score": 0.15815751066481462, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3700181221537743, + "score": 0.5152611872266766, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", "score": 0.0, @@ -11242,63 +10991,63 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.2718653389257641, + "score": 0.4031456247133876, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.20669086265781264, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3327209336079636, + "score": 0.5076721272198604, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1804000267306113, + "score": 0.17630490037560695, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.451798442226037, + "score": 0.48116430160978857, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1777835117834348, + "score": 0.15611634095633747, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5166806073547074, + "score": 0.5075814499747183, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", "score": 0.0, @@ -11306,191 +11055,175 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.009396473650937872, + "score": 0.4122750002638689, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.13582906387565688, + "score": 0.15412719160788987, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.43344913217266734, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.1250453615099799, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4413612249791572, + "score": 0.5010353699512481, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.06647168102389285, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3630576975795868, + "score": 0.34350832619898364, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0744904632040495, + "score": 0.12560672881768975, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4111163205685468, + "score": 0.4969560260291519, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.17077058518804336, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4363130300030932, + "score": 0.5022008374701596, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.10784756064735967, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.41747276065817185, + "score": 0.4427230465401631, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.12369892692249995, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.41649654108052436, + "score": 0.44549610902403686, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.07749370908741021, + "score": 0.06656213940646748, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3853293582383978, + "score": 0.38435741328258305, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.175396614619324, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.43622390508229153, + "score": 0.49736499605529066, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.13343258247486778, + "score": 0.15154395847232716, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4018842345370629, + "score": 0.46053919348995803, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.1609675245202845, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4250905063113662, + "score": 0.5069863833094232, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", "score": 0.0, @@ -11498,431 +11231,383 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.36347800793516216, + "score": 0.4041678259311437, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.06254678076846341, + "score": 0.1290514243115152, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.3887428577633272, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.1982991730465618, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.47607372633277156, + "score": 0.4766581477336301, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.08616711094288851, + "score": 0.09735981717515908, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3696512763473903, + "score": 0.35288934658906385, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12894104034845807, + "score": 0.08273178236238297, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4486368934849452, + "score": 0.36399666460809255, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.08825252192863794, + "score": 0.13012870333257068, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4377853721520782, + "score": 0.3852835519852091, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.05345137572833361, + "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3829169125379508, + "score": 0.3356633416447032, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.08767210132815903, + "score": 0.12422788549118892, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40476518002703893, + "score": 0.40222210564426, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.05422898988559086, + "score": 0.0, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.335890201952113, + "score": 0.28789057461471257, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.10467757347424328, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.34617921188455225, + "score": 0.36749853206282146, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0588222649477664, + "score": 0.0952569581727979, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3642771871011383, + "score": 0.38264808953110185, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.10186730973904586, + "score": 0.13026649757585426, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.43665642120840553, + "score": 0.41550755035304077, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.08248974616169381, + "score": 0.0, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40456777770242314, + "score": 0.039782861678265974, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.10496714075880566, + "score": 0.1175904695048123, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.4262440114275301, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.117026611061013, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.44043117464934733, + "score": 0.3996881234028031, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.15386029327005746, + "score": 0.11564012893219777, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.43911482594829104, + "score": 0.44599783682350064, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.10070927557742705, + "score": 0.12601482779921785, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.43718220262892105, + "score": 0.43595665254608706, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.11478463129234825, + "score": 0.12022286401047096, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4651957501593415, + "score": 0.48279986805368713, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.07137101582673294, + "score": 0.15350377490367967, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4075406301092705, + "score": 0.47645148444499064, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.14738500064905094, + "score": 0.13714845589364738, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4659728395318289, + "score": 0.45499281593451946, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0643329477522681, + "score": 0.0, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3960585990192623, + "score": 0.38785209659947417, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.17247941414020762, + "score": 0.12848168928706002, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.48320144379865687, + "score": 0.4421263683867116, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.057981164297440296, + "score": 0.18629760071299903, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33896784137459673, + "score": 0.4381418376415505, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.09751270821852938, + "score": 0.09198045184317984, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.395617758442078, + "score": 0.4598393646838097, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.06301432444316532, + "score": 0.07798530247118374, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4249725532507508, + "score": 0.4006113700211268, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.18248753930464637, + "score": 0.14541923959059266, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.4759830743101189, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.16678232269832466, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4756863623605152, + "score": 0.47577612932999147, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", "score": 0.0, @@ -11930,2219 +11615,2043 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.1946966569103724, + "score": 0.2521233582161207, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0772718393063023, + "score": 0.40959087443621306, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4203683137304257, + "score": 0.6348509381122925, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.08291357159799752, + "score": 0.40959087443621306, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4009694996956877, + "score": 0.6348509381122925, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.3182970443542658, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3714280466838255, + "score": 0.5953162569846108, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.3026566818840519, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3370100422576744, + "score": 0.5945859352092411, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.18816868192268246, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3538602132402044, + "score": 0.5179253053631742, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.167672929900467, + "score": 0.09629060614977814, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.46910779766306765, + "score": 0.43565498999747165, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.40959087443621306, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.32123020755377657, + "score": 0.6348509381122925, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.28418123342684043, "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.47435308668900444, + "score": 0.539816402671069, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.08351211898903935, + "score": 0.3765959322920135, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.33030812447506436, + "score": 0.6295826606382191, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.07528927678469202, + "score": 0.40801269202545287, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.422513417362817, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.1779993767214403, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5054922206303282, + "score": 0.6210533025653295, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3961285597009415, - "sentence_nr": 2 + "score": 0.0867932999243575, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6148751441350505, - "sentence_nr": 2 + "score": 0.4201964133235075, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 + "score": 0.3969463877642616, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3996712647649035, - "sentence_nr": 2 + "score": 0.1897299381066278, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6353525755760105, - "sentence_nr": 2 + "score": 0.5086851537953713, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5115346945020283, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7037574715738644, - "sentence_nr": 2 + "score": 0.3833939462124923, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 + "score": 0.07793031063789554, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 + "score": 0.3700181221537743, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.24715873794308874, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.49051792813181655, - "sentence_nr": 2 + "score": 0.2718653389257641, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.24508104771894088, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5725552336126134, - "sentence_nr": 2 + "score": 0.3327209336079636, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.1804000267306113, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.451798442226037, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.33608213382072566, - "sentence_nr": 2 + "score": 0.1777835117834348, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6155314069125684, - "sentence_nr": 2 + "score": 0.5166806073547074, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.26703508536995574, - "sentence_nr": 2 + "score": 0.009396473650937872, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.35315040956049437, - "sentence_nr": 2 + "score": 0.13582906387565688, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.625895188503691, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.33713757310040376, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6015500127828212, - "sentence_nr": 2 + "score": 0.43344913217266734, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17374951565433233, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.45325597884524305, - "sentence_nr": 2 + "score": 0.3630576975795868, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 + "score": 0.0744904632040495, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 + "score": 0.4111163205685468, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.41580120868053494, - "sentence_nr": 2 + "score": 0.4363130300030932, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.05963579607071745, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.31139762378406344, - "sentence_nr": 2 + "score": 0.41747276065817185, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 + "score": 0.41649654108052436, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.11346446511593337, - "sentence_nr": 2 + "score": 0.07749370908741021, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3675317022605926, - "sentence_nr": 2 + "score": 0.3853293582383978, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.10742716472890976, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.42694859148910824, - "sentence_nr": 2 + "score": 0.43622390508229153, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.13343258247486778, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4018842345370629, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.19073363590503933, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.49895382941569383, - "sentence_nr": 2 + "score": 0.4250905063113662, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.36347800793516216, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.14745870033404418, - "sentence_nr": 2 + "score": 0.06254678076846341, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.475170637938921, - "sentence_nr": 2 + "score": 0.3887428577633272, + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.20590661325770857, - "sentence_nr": 2 + "score": 0.08616711094288851, + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.482883619765493, - "sentence_nr": 2 + "score": 0.3696512763473903, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.26801022984888695, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.5654883864995515, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 + "score": 0.4486368934849452, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.2735429726790281, - "sentence_nr": 2 + "score": 0.08825252192863794, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5644723203818537, - "sentence_nr": 2 + "score": 0.4377853721520782, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.20223322445648179, - "sentence_nr": 2 + "score": 0.05345137572833361, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5084057058209687, - "sentence_nr": 2 + "score": 0.3829169125379508, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.26459538953931094, - "sentence_nr": 2 + "score": 0.08767210132815903, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5272178908335121, - "sentence_nr": 2 + "score": 0.40476518002703893, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.012201453805310429, - "sentence_nr": 2 + "score": 0.05422898988559086, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.063050817196087, - "sentence_nr": 2 + "score": 0.335890201952113, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.25848476545940924, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5525933856866961, - "sentence_nr": 2 + "score": 0.34617921188455225, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.13019082899297843, - "sentence_nr": 2 + "score": 0.0588222649477664, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.40512126305429846, - "sentence_nr": 2 + "score": 0.3642771871011383, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.24071298960902482, - "sentence_nr": 2 + "score": 0.10186730973904586, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5438509851618877, - "sentence_nr": 2 + "score": 0.43665642120840553, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.2063890416514164, - "sentence_nr": 2 + "score": 0.08248974616169381, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.38567678850872256, - "sentence_nr": 2 + "score": 0.40456777770242314, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3295566054952435, - "sentence_nr": 2 + "score": 0.10496714075880566, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.5816133441895466, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.2748068612990203, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5326219867738043, - "sentence_nr": 2 + "score": 0.4262440114275301, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.3574583793293068, - "sentence_nr": 2 + "score": 0.15386029327005746, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5924115119819969, - "sentence_nr": 2 + "score": 0.43911482594829104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 + "score": 0.10070927557742705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 + "score": 0.43718220262892105, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4206507730319955, - "sentence_nr": 2 + "score": 0.11478463129234825, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.678851303587664, - "sentence_nr": 2 + "score": 0.4651957501593415, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.35367180741660353, - "sentence_nr": 2 + "score": 0.07137101582673294, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6344846206551544, - "sentence_nr": 2 + "score": 0.4075406301092705, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.3563758622144919, - "sentence_nr": 2 + "score": 0.14738500064905094, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6037023613177924, - "sentence_nr": 2 + "score": 0.4659728395318289, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.2632018059331281, - "sentence_nr": 2 + "score": 0.0643329477522681, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.501302719796297, - "sentence_nr": 2 + "score": 0.3960585990192623, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.20586736678432452, - "sentence_nr": 2 + "score": 0.17247941414020762, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5693016623172978, - "sentence_nr": 2 + "score": 0.48320144379865687, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.27075075499555246, - "sentence_nr": 2 + "score": 0.057981164297440296, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5374328610523021, - "sentence_nr": 2 + "score": 0.33896784137459673, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1455399826828606, - "sentence_nr": 2 + "score": 0.09751270821852938, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4504825146558032, - "sentence_nr": 2 + "score": 0.395617758442078, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.175866555062937, - "sentence_nr": 2 + "score": 0.06301432444316532, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2758687846643748, - "sentence_nr": 2 + "score": 0.4249725532507508, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.2948978498692003, - "sentence_nr": 2 + "score": 0.18248753930464637, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.5529453973837751, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.3563982585943877, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6108017012121061, - "sentence_nr": 2 + "score": 0.4759830743101189, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.22292726306270316, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5653789747970112, - "sentence_nr": 2 + "score": 0.1946966569103724, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 + "score": 0.0772718393063023, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 + "score": 0.4203683137304257, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.26930937054323245, - "sentence_nr": 2 + "score": 0.08291357159799752, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5410704185827219, - "sentence_nr": 2 + "score": 0.4009694996956877, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.24634765861867908, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.55968513851572, - "sentence_nr": 2 + "score": 0.3714280466838255, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2158914621804855, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5448184155666022, - "sentence_nr": 2 + "score": 0.3370100422576744, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.39152357647177133, - "sentence_nr": 2 + "score": 0.3538602132402044, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.08175340974854195, - "sentence_nr": 2 + "score": 0.167672929900467, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4308342322390109, - "sentence_nr": 2 + "score": 0.46910779766306765, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3185785286756486, - "sentence_nr": 2 + "score": 0.32123020755377657, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.3268233487541633, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6084114123608597, - "sentence_nr": 2 + "score": 0.47435308668900444, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.08351211898903935, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.04759937639788563, - "sentence_nr": 2 + "score": 0.33030812447506436, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2500653935141143, - "sentence_nr": 2 + "score": 0.07528927678469202, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.585528867886047, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.18505378795140082, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5602893668984232, - "sentence_nr": 2 + "score": 0.422513417362817, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.21403222128228389, - "sentence_nr": 2 + "score": 0.06126604215610123, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.563121432204311, - "sentence_nr": 2 + "score": 0.3837677428398438, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 + "score": 0.0756907193511249, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 + "score": 0.4138725093679467, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.18505378795140082, - "sentence_nr": 2 + "score": 0.08866637424249016, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.47051087423292237, - "sentence_nr": 2 + "score": 0.44876462229383973, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.09807167131529582, - "sentence_nr": 2 + "score": 0.10574428430204418, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4646043403137081, - "sentence_nr": 2 + "score": 0.45371814600333005, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.18031307339768174, - "sentence_nr": 2 + "score": 0.09084091756463074, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.522164454804456, - "sentence_nr": 2 + "score": 0.4286741659142759, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.037874984245935134, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.13083094614009624, - "sentence_nr": 2 + "score": 0.3980589439671235, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.20064110494011925, - "sentence_nr": 2 + "score": 0.12649672885841734, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5205761630334527, - "sentence_nr": 2 + "score": 0.41329609863930566, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0684792839692368, - "sentence_nr": 2 + "score": 0.07465265387221826, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.3138898863773231, - "sentence_nr": 2 + "score": 0.4274000630396105, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.1573857459340795, - "sentence_nr": 2 + "score": 0.1219449069656942, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5347526444819753, - "sentence_nr": 2 + "score": 0.4776943038671049, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.008180069062416927, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.04605877529742035, - "sentence_nr": 2 + "score": 0.4606409590817001, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.13904320686250593, - "sentence_nr": 2 + "score": 0.07996209785853586, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.47169365083525167, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.10065329518890631, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.47259396735190334, - "sentence_nr": 2 + "score": 0.4279137012019699, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4054983797456263, - "sentence_nr": 2 + "score": 0.1195053737774238, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6264774230839022, - "sentence_nr": 2 + "score": 0.4512136289975786, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 + "score": 0.21748353646757182, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 + "score": 0.4462746462826943, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.43870712112271204, - "sentence_nr": 2 + "score": 0.24443999371485628, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6525926696001584, - "sentence_nr": 2 + "score": 0.4991016392840656, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4207445490015154, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6496192656497308, - "sentence_nr": 2 + "score": 0.34155562837143877, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2999092588227898, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5505916495384416, - "sentence_nr": 2 + "score": 0.16764957347186446, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2772655014585435, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.4799723286048352, - "sentence_nr": 2 + "score": 0.1139393935967296, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2516768028374535, - "sentence_nr": 2 + "score": 0.1701935252826955, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.49572209766846287, - "sentence_nr": 2 + "score": 0.4455315745640286, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2958351954606211, - "sentence_nr": 2 + "score": 0.11941817189528041, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5202221091638364, - "sentence_nr": 2 + "score": 0.4275071634813637, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.3471036105446511, - "sentence_nr": 2 + "score": 0.08932983819566953, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5663019495273462, - "sentence_nr": 2 + "score": 0.412238728569517, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.009070964338765818, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.06852404470758497, - "sentence_nr": 2 + "score": 0.35698569920901285, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.3212066202235163, - "sentence_nr": 2 + "score": 0.1275291133503835, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.5836558214123343, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.3359230828063257, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5729275692707756, - "sentence_nr": 2 + "score": 0.4127884601900206, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.20051119758906127, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5334791309401924, - "sentence_nr": 2 + "score": 0.3538966478758119, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 + "score": 0.4179644538349004, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.23724642034775328, - "sentence_nr": 2 + "score": 0.11116091368823534, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5175129869169551, - "sentence_nr": 2 + "score": 0.43307401079748475, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1849419409628554, - "sentence_nr": 2 + "score": 0.07649978886725356, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5067677916637257, - "sentence_nr": 2 + "score": 0.41031664319131844, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.4263684749347053, - "sentence_nr": 2 + "score": 0.4331286519146886, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.4228574070038002, - "sentence_nr": 2 + "score": 0.2626949949898101, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.11126509848873964, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.4338923576538663, - "sentence_nr": 2 + "score": 0.3795844422349344, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.31311320826536454, - "sentence_nr": 2 + "score": 0.3964061846611735, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.17236491061326006, - "sentence_nr": 2 + "score": 0.07012053105310272, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5548663878579595, - "sentence_nr": 2 + "score": 0.3123716745719453, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3540506408782035, - "sentence_nr": 2 + "score": 0.3856352748003268, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.08906092883748383, - "sentence_nr": 2 + "score": 0.0876062628502436, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.4317746285352776, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.16994548762775233, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.49416616344892494, - "sentence_nr": 2 + "score": 0.3978552283854932, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3535002370419364, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5959879218348465, - "sentence_nr": 2 + "score": 0.43759229210123524, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.10505106462290037, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.4474870048911137, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.11534976570369744, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.46761329904761845, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.38333108639273095, - "sentence_nr": 2 + "score": 0.0487561532099542, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6252821653079126, - "sentence_nr": 2 + "score": 0.3938156291645021, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2562849004088193, - "sentence_nr": 2 + "score": 0.11378204941109882, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5767019342009202, - "sentence_nr": 2 + "score": 0.4981472095171313, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.19851743023355672, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.49793621556542356, - "sentence_nr": 2 + "score": 0.4098374118843212, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.10954782904363085, - "sentence_nr": 2 + "score": 0.11335203496873462, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5090382887002297, - "sentence_nr": 2 + "score": 0.46528080200591054, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.1614809742656655, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4145218112165384, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2834484329788497, - "sentence_nr": 2 + "score": 0.108829546976023, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5201572704778937, - "sentence_nr": 2 + "score": 0.4177339268402449, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.006569332862878646, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.060864196135666904, - "sentence_nr": 2 + "score": 0.008776218574747889, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2756885721075884, - "sentence_nr": 2 + "score": 0.09026606980896171, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.5867077870431389, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.23380867598952562, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5162668764933175, - "sentence_nr": 2 + "score": 0.4602880143145438, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.35983766090218355, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5862251404739759, - "sentence_nr": 2 + "score": 0.4239838444198129, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 + "score": 0.0009218289085545725, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.3563982585943877, - "sentence_nr": 2 + "score": 0.14883746844067872, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5378970484635915, - "sentence_nr": 2 + "score": 0.4476843235219058, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.1510722413165652, - "sentence_nr": 2 + "score": 0.07999819990926477, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.43592329727028295, - "sentence_nr": 2 + "score": 0.3889987132692464, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.2465888500427759, - "sentence_nr": 2 + "score": 0.1059352062327485, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5221084445696768, - "sentence_nr": 2 + "score": 0.4291550754056065, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.13240628161243978, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3347576434758551, - "sentence_nr": 2 + "score": 0.3916082207331212, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.31372333533981844, - "sentence_nr": 2 + "score": 0.09171389226334559, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5741396495481692, - "sentence_nr": 2 + "score": 0.47356776940569145, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.16136315230667173, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.48202529715173736, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.4583603882613907, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.671355324267905, - "sentence_nr": 2 + "score": 0.4750212573397775, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.10077062063331403, - "sentence_nr": 2 + "score": 0.003607064963668313, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.09760482860544632, - "sentence_nr": 2 + "score": 0.09577479457615844, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.2600884210903425, - "sentence_nr": 2 + "score": 0.11161133657801552, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.531430106996609, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.23380867598952562, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5100551662629407, - "sentence_nr": 2 + "score": 0.4277891734340718, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.3961285597009415, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.7882997401328445, - "sentence_nr": 3 + "score": 0.6148751441350505, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 + "score": 0.4923751299732868, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.6853756490381199, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.3996712647649035, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.6353525755760105, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4855332614117322, - "sentence_nr": 3 + "score": 0.5115346945020283, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5299556742893647, - "sentence_nr": 3 + "score": 0.7037574715738644, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 + "score": 0.38870674200492367, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 + "score": 0.6484380084879691, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.24715873794308874, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5103516764863386, - "sentence_nr": 3 + "score": 0.49051792813181655, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.24508104771894088, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.5725552336126134, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -14150,7 +13659,7 @@ "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -14158,183 +13667,167 @@ "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.6960917409740967, - "sentence_nr": 3 + "score": 0.33608213382072566, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.8209757784637755, - "sentence_nr": 3 + "score": 0.6155314069125684, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.14790264259417688, - "sentence_nr": 3 + "score": 0.20801258614305904, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.27159767590045303, - "sentence_nr": 3 + "score": 0.26703508536995574, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.35315040956049437, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6849386986272349, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.625895188503691, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.25552199116069907, - "sentence_nr": 3 + "score": 0.17374951565433233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3799133205289109, - "sentence_nr": 3 + "score": 0.45325597884524305, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.17743299460161885, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 + "score": 0.43071271897416463, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 + "score": 0.16052654068024738, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.22090491782919655, - "sentence_nr": 3 + "score": 0.41580120868053494, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 3 + "score": 0.05963579607071745, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.280413108453108, - "sentence_nr": 3 + "score": 0.31139762378406344, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 + "score": 0.11133996756497437, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 + "score": 0.4410280353998367, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.139800134566647, - "sentence_nr": 3 + "score": 0.11346446511593337, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2510112235832054, - "sentence_nr": 3 + "score": 0.3675317022605926, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0925329498915617, - "sentence_nr": 3 + "score": 0.10742716472890976, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2110486160692096, - "sentence_nr": 3 + "score": 0.42694859148910824, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -14342,7 +13835,7 @@ "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -14350,23 +13843,23 @@ "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 3 + "score": 0.19073363590503933, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.41654484827391225, - "sentence_nr": 3 + "score": 0.49895382941569383, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -14374,575 +13867,527 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.12453389344594705, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.141543757252386, - "sentence_nr": 3 + "score": 0.14745870033404418, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2594145364221844, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.432962604241542, - "sentence_nr": 3 + "score": 0.475170637938921, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.26801022984888695, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.512762518189388, - "sentence_nr": 3 + "score": 0.5654883864995515, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.21665407194210906, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.4344921442639243, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.581972638479957, - "sentence_nr": 3 + "score": 0.2735429726790281, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6970914528585833, - "sentence_nr": 3 + "score": 0.5644723203818537, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.44120063733294235, - "sentence_nr": 3 + "score": 0.20223322445648179, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5296624608564717, - "sentence_nr": 3 + "score": 0.5084057058209687, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 3 + "score": 0.26459538953931094, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6181373706707737, - "sentence_nr": 3 + "score": 0.5272178908335121, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4440750605884706, - "sentence_nr": 3 + "score": 0.012201453805310429, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5402588602256685, - "sentence_nr": 3 + "score": 0.063050817196087, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6458552885189878, - "sentence_nr": 3 + "score": 0.25848476545940924, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7468283944111381, - "sentence_nr": 3 + "score": 0.5525933856866961, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4272870063962341, - "sentence_nr": 3 + "score": 0.13019082899297843, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5170917334956868, - "sentence_nr": 3 + "score": 0.40512126305429846, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.43310177167002284, - "sentence_nr": 3 + "score": 0.24071298960902482, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.534533410927948, - "sentence_nr": 3 + "score": 0.5438509851618877, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2063890416514164, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.26481979271706185, - "sentence_nr": 3 + "score": 0.38567678850872256, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.3212854967972961, - "sentence_nr": 3 + "score": 0.3295566054952435, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.47171327621770304, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.6256538561604215, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.6943310521668014, - "sentence_nr": 3 + "score": 0.5816133441895466, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.4577275269488853, - "sentence_nr": 3 + "score": 0.3574583793293068, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6747054474171109, - "sentence_nr": 3 + "score": 0.5924115119819969, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 + "score": 0.37994652561206577, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 + "score": 0.6464467277069994, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.3508739523842563, - "sentence_nr": 3 + "score": 0.4206507730319955, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5533976153694653, - "sentence_nr": 3 + "score": 0.678851303587664, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.23705266435224473, - "sentence_nr": 3 + "score": 0.35367180741660353, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.44716007458096513, - "sentence_nr": 3 + "score": 0.6344846206551544, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.35447530946908884, - "sentence_nr": 3 + "score": 0.6037023613177924, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.25530635525095574, - "sentence_nr": 3 + "score": 0.2632018059331281, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4224404198283467, - "sentence_nr": 3 + "score": 0.501302719796297, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.44392090655418587, - "sentence_nr": 3 + "score": 0.20586736678432452, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5678926447384061, - "sentence_nr": 3 + "score": 0.5693016623172978, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.27075075499555246, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5374328610523021, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.40891568776497583, - "sentence_nr": 3 + "score": 0.1455399826828606, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.46522329223142805, - "sentence_nr": 3 + "score": 0.4504825146558032, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.11436433361427001, - "sentence_nr": 3 + "score": 0.175866555062937, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.23221971735799607, - "sentence_nr": 3 + "score": 0.2758687846643748, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.18580985894574314, - "sentence_nr": 3 + "score": 0.2948978498692003, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3347249292100999, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.5529453973837751, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.27668736912821895, - "sentence_nr": 3 + "score": 0.22292726306270316, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4414406760568898, - "sentence_nr": 3 + "score": 0.5653789747970112, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 + "score": 0.09362261118571368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 + "score": 0.3452056942265759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.2465659486053858, - "sentence_nr": 3 + "score": 0.26930937054323245, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5689069160047179, - "sentence_nr": 3 + "score": 0.5410704185827219, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.24634765861867908, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.30391153783979835, - "sentence_nr": 3 + "score": 0.55968513851572, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.15138514598766048, - "sentence_nr": 3 + "score": 0.2158914621804855, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3237497764315872, - "sentence_nr": 3 + "score": 0.5448184155666022, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.1544458227548897, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3343587266874694, - "sentence_nr": 3 + "score": 0.39152357647177133, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.23817261442630488, - "sentence_nr": 3 + "score": 0.08175340974854195, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.448286611717823, - "sentence_nr": 3 + "score": 0.4308342322390109, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3185785286756486, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.20679845323803403, - "sentence_nr": 3 + "score": 0.3268233487541633, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.47636494608150104, - "sentence_nr": 3 + "score": 0.6084114123608597, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -14950,575 +14395,527 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.04759937639788563, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.09147827112247602, - "sentence_nr": 3 + "score": 0.2500653935141143, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3258762519783793, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.19107912313367556, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.49367395128894914, - "sentence_nr": 3 + "score": 0.585528867886047, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.21403222128228389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6159319815107203, - "sentence_nr": 3 + "score": 0.563121432204311, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.18917620656425485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 + "score": 0.4346170232980484, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.18505378795140082, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2391308148553106, - "sentence_nr": 3 + "score": 0.47051087423292237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.18180608220159192, - "sentence_nr": 3 + "score": 0.09807167131529582, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.27307753334479423, - "sentence_nr": 3 + "score": 0.4646043403137081, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.15604242268653643, - "sentence_nr": 3 + "score": 0.18031307339768174, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2255928425212252, - "sentence_nr": 3 + "score": 0.522164454804456, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.14965975078050625, - "sentence_nr": 3 + "score": 0.037874984245935134, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.22213502776474325, - "sentence_nr": 3 + "score": 0.13083094614009624, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3160946016179871, - "sentence_nr": 3 + "score": 0.20064110494011925, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.407876439044591, - "sentence_nr": 3 + "score": 0.5205761630334527, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.0684792839692368, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.025108530586642898, - "sentence_nr": 3 + "score": 0.3138898863773231, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.17466240109087192, - "sentence_nr": 3 + "score": 0.1573857459340795, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2719194508460068, - "sentence_nr": 3 + "score": 0.5347526444819753, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.008180069062416927, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.12316365460790003, - "sentence_nr": 3 + "score": 0.04605877529742035, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.13904320686250593, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.35862918415512257, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.5087816327099641, - "sentence_nr": 3 + "score": 0.47169365083525167, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.4054983797456263, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.78479833664205, - "sentence_nr": 3 + "score": 0.6264774230839022, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 + "score": 0.420450507904553, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 + "score": 0.6503146347305717, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.43870712112271204, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.6525926696001584, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.3267294026204632, - "sentence_nr": 3 + "score": 0.4207445490015154, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4510525482602028, - "sentence_nr": 3 + "score": 0.6496192656497308, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.24728515687112834, - "sentence_nr": 3 + "score": 0.2999092588227898, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3088155734423375, - "sentence_nr": 3 + "score": 0.5505916495384416, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2772655014585435, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.27718461611705486, - "sentence_nr": 3 + "score": 0.4799723286048352, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5365920629514802, - "sentence_nr": 3 + "score": 0.2516768028374535, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6274039030337838, - "sentence_nr": 3 + "score": 0.49572209766846287, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.2958351954606211, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5202221091638364, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4578226095312774, - "sentence_nr": 3 + "score": 0.3471036105446511, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5406295999835291, - "sentence_nr": 3 + "score": 0.5663019495273462, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.009070964338765818, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.002054231717337716, - "sentence_nr": 3 + "score": 0.06852404470758497, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.12286996020967837, - "sentence_nr": 3 + "score": 0.3212066202235163, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.31567668741706395, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.6834390596430621, - "sentence_nr": 3 + "score": 0.5836558214123343, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4815092081725061, - "sentence_nr": 3 + "score": 0.20051119758906127, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5820265218174012, - "sentence_nr": 3 + "score": 0.5334791309401924, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 + "score": 0.24894072982768842, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 + "score": 0.5212235893093335, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.36210097004176117, - "sentence_nr": 3 + "score": 0.23724642034775328, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.408098151133905, - "sentence_nr": 3 + "score": 0.5175129869169551, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3165014630070639, - "sentence_nr": 3 + "score": 0.1849419409628554, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.42516173623967946, - "sentence_nr": 3 + "score": 0.5067677916637257, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.353203510510529, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4910213297498164, - "sentence_nr": 3 + "score": 0.4263684749347053, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.15820362165931962, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.2249046365436241, - "sentence_nr": 3 + "score": 0.4228574070038002, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4753167451887016, - "sentence_nr": 3 + "score": 0.11126509848873964, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6372909532389948, - "sentence_nr": 3 + "score": 0.4338923576538663, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.31311320826536454, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.38317923930200504, - "sentence_nr": 3 + "score": 0.17236491061326006, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.47975624978837655, - "sentence_nr": 3 + "score": 0.5548663878579595, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -15526,351 +14923,319 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.2054194471318506, - "sentence_nr": 3 + "score": 0.3540506408782035, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.25678404806291744, - "sentence_nr": 3 + "score": 0.08906092883748383, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.37045149029437513, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.5617848264135781, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.706027620990306, - "sentence_nr": 3 + "score": 0.4317746285352776, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.3535002370419364, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.5959879218348465, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1243018504102695, - "sentence_nr": 3 + "score": 0.38333108639273095, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.32950116238735283, - "sentence_nr": 3 + "score": 0.6252821653079126, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.23660362391696813, - "sentence_nr": 3 + "score": 0.2562849004088193, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.34152697838249696, - "sentence_nr": 3 + "score": 0.5767019342009202, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.19851743023355672, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5103516764863386, - "sentence_nr": 3 + "score": 0.49793621556542356, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.10954782904363085, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.5090382887002297, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.1614809742656655, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4145218112165384, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.36763082847636347, - "sentence_nr": 3 + "score": 0.2834484329788497, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.45637140510576385, - "sentence_nr": 3 + "score": 0.5201572704778937, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.006569332862878646, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.16935976352352106, - "sentence_nr": 3 + "score": 0.060864196135666904, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.31268514922728713, - "sentence_nr": 3 + "score": 0.2756885721075884, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.41990725085948355, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.6869404628233521, - "sentence_nr": 3 + "score": 0.5867077870431389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.35983766090218355, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.5862251404739759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.41020178654369294, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3563982585943877, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.5378970484635915, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1510722413165652, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.43592329727028295, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.2465888500427759, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.5221084445696768, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.13240628161243978, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3347576434758551, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8363600587440573, - "sentence_nr": 3 + "score": 0.31372333533981844, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9912737182609732, - "sentence_nr": 3 + "score": 0.5741396495481692, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -15878,7 +15243,7 @@ "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -15886,3927 +15251,3591 @@ "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4583603882613907, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.671355324267905, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.6018154975998465, - "sentence_nr": 3 + "score": 0.10077062063331403, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7669980679050217, - "sentence_nr": 3 + "score": 0.09760482860544632, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.2600884210903425, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.531430106996609, + "sentence_nr": 2 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.2971752224486841, + "sentence_nr": 2 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.605133664481872, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.2329856851831642, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.5405751250637106, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.2563564295134795, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.5499025328773104, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2500653935141143, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 + "score": 0.5098952451698188, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.22150370805587954, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.5463488388082953, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.14574402656519908, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.4275543759804943, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 + "score": 0.24750028117795922, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 + "score": 0.5894646098566614, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.2687379663485886, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.5826619907747026, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.15897333608001968, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.4496168003395693, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.15089318423122544, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.37184214350816, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.7949498209605872, - "sentence_nr": 4 + "score": 0.4042478943311393, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2786312783602775, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4836796407825139, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.41756686236967944, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5616829345739638, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4205004825822372, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5674537639314233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40974323819644953, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5581982021478125, - "sentence_nr": 4 + "score": 0.5368112087257564, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39461811323775403, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5655204109921267, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.219672574669477, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.37413906702142435, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3116520879159789, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.47517792402030584, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3627923367798331, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5255399246733422, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12870376210497989, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.22151451171035633, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.30181468526956173, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5261802780475523, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39579112101105834, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.43795381992037963, - "sentence_nr": 4 + "score": 0.6431490866428237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.38189567401226293, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 + "score": 0.6154314825900052, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3436153961225413, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.46670957224939175, - "sentence_nr": 4 + "score": 0.5954254642696512, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4339219137216798, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.37544324742239676, - "sentence_nr": 4 + "score": 0.6338401824373191, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25564177137418986, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.49546288984677567, - "sentence_nr": 4 + "score": 0.49870011615602194, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27946415227589155, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.41602211217571683, - "sentence_nr": 4 + "score": 0.4938296655037709, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26925601229087914, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.49713060327965375, - "sentence_nr": 4 + "score": 0.6050395148484196, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27336087678628246, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.440129802760994, - "sentence_nr": 4 + "score": 0.4919779927233182, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.16195570128532405, - "sentence_nr": 4 + "score": 0.43235877156651625, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.581645267684411, - "sentence_nr": 4 + "score": 0.614485867381761, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.012951112459987979, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.429292711066547, - "sentence_nr": 4 + "score": 0.11882277038397698, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.3272963527043486, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.5848202846227532, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.41734150775835166, - "sentence_nr": 4 + "score": 0.5715613564297359, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35253338922743144, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.41213231348812146, - "sentence_nr": 4 + "score": 0.6487975154557831, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2126707920684064, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.4659908460634765, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2914880531303981, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.34256683873776383, - "sentence_nr": 4 + "score": 0.5962886968213414, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23944666570758283, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.41477028165511615, - "sentence_nr": 4 + "score": 0.5106509239874657, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15985840708020788, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.40854152133685306, - "sentence_nr": 4 + "score": 0.44951053332729884, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17920531400657588, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4164061298971701, - "sentence_nr": 4 + "score": 0.4522763055702811, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26199400535088346, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5709936728721758, - "sentence_nr": 4 + "score": 0.5804827870380099, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13442725522288548, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.39067504005337655, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20533250289138671, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.40562290854898025, - "sentence_nr": 4 + "score": 0.49681810344665644, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0950330051810703, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.33546955366063214, - "sentence_nr": 4 + "score": 0.2535554509913635, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20378172261136207, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.40319099863003527, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4628457025650974, - "sentence_nr": 4 + "score": 0.48600008237332104, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24513414885202045, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.5476647609559218, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23240102389974368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 + "score": 0.4973274282641141, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2516768028374535, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3818534926571001, - "sentence_nr": 4 + "score": 0.47249781871556595, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16105265992626083, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3644112480028862, - "sentence_nr": 4 + "score": 0.404377371664668, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27217589854489177, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.5756343666825848, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15813859795767055, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.393379300802006, - "sentence_nr": 4 + "score": 0.44607340294350173, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20533250289138671, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3892064098781075, - "sentence_nr": 4 + "score": 0.45975635079501215, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0698714799763323, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3066682918799934, - "sentence_nr": 4 + "score": 0.18217918401705574, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3077422016953529, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3010381621698183, - "sentence_nr": 4 + "score": 0.5943673820353285, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09564571510780719, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.28783297914763095, - "sentence_nr": 4 + "score": 0.1907009110214351, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10353153556093725, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.3958941272081701, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3113686002796155, - "sentence_nr": 4 + "score": 0.40215410362634535, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5042211795038526, - "sentence_nr": 4 + "score": 0.7882997401328445, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5806197937310393, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.7346706700987636, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4855332614117322, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.5299556742893647, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.38249626297768063, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5948724602646328, - "sentence_nr": 4 + "score": 0.40976234193505356, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.369345079296433, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5049375875723539, - "sentence_nr": 4 + "score": 0.5103516764863386, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5357110024227318, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5582360999449585, - "sentence_nr": 4 + "score": 0.6365941772753647, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.48375513642780327, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6960917409740967, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4915933923809756, - "sentence_nr": 4 + "score": 0.8209757784637755, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14790264259417688, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.41469341972645324, - "sentence_nr": 4 + "score": 0.27159767590045303, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.39451521279220947, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.47825370157575003, - "sentence_nr": 4 + "score": 0.6849386986272349, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25552199116069907, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5516607622642397, - "sentence_nr": 4 + "score": 0.3799133205289109, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 + "score": 0.3682311523733465, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11739521786077453, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5256353512715748, - "sentence_nr": 4 + "score": 0.22090491782919655, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1892240568795935, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3765697091436241, - "sentence_nr": 4 + "score": 0.280413108453108, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.27447938256311044, - "sentence_nr": 4 + "score": 0.08635800047213174, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.615291848344044, - "sentence_nr": 4 + "score": 0.218109371254876, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.31573558123189943, - "sentence_nr": 4 + "score": 0.139800134566647, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6989238098201116, - "sentence_nr": 4 + "score": 0.2510112235832054, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0925329498915617, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6245952145297528, - "sentence_nr": 4 + "score": 0.2110486160692096, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.44995700110278536, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.2680165156355779, - "sentence_nr": 4 + "score": 0.2887308472548599, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5989264158576341, - "sentence_nr": 4 + "score": 0.41654484827391225, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4425650919372919, - "sentence_nr": 4 + "score": 0.12453389344594705, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 4 + "score": 0.141543757252386, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.6263180162489238, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6263180162489238, - "sentence_nr": 4 + "score": 0.2594145364221844, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4576529535952892, - "sentence_nr": 4 + "score": 0.512762518189388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 + "score": 0.6931369519059803, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.581972638479957, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4726395749383864, - "sentence_nr": 4 + "score": 0.6970914528585833, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.44120063733294235, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40052428191473877, - "sentence_nr": 4 + "score": 0.5296624608564717, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5642761727828352, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.33762297226992255, - "sentence_nr": 4 + "score": 0.6181373706707737, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4440750605884706, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.39336600752225864, - "sentence_nr": 4 + "score": 0.5402588602256685, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6458552885189878, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4101715667811344, - "sentence_nr": 4 + "score": 0.7468283944111381, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.4272870063962341, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5170917334956868, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43310177167002284, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.45834841871997833, - "sentence_nr": 4 + "score": 0.534533410927948, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.31754227193241025, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26481979271706185, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.3974726419025883, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3212854967972961, + "sentence_nr": 3 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.43106863786661676, - "sentence_nr": 4 + "score": 0.47171327621770304, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.4577275269488853, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6374693500772332, - "sentence_nr": 4 + "score": 0.6747054474171109, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.25383339228798274, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 + "score": 0.45896379476820603, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.3508739523842563, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.686947433675709, - "sentence_nr": 4 + "score": 0.5533976153694653, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23705266435224473, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4746119151171374, - "sentence_nr": 4 + "score": 0.44716007458096513, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.2658483576665877, - "sentence_nr": 4 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6410540990527072, - "sentence_nr": 4 + "score": 0.35447530946908884, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25530635525095574, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5639241776831634, - "sentence_nr": 4 + "score": 0.4224404198283467, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.44392090655418587, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5281061979991509, - "sentence_nr": 4 + "score": 0.5678926447384061, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6668099404219522, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40891568776497583, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5554602680850725, - "sentence_nr": 4 + "score": 0.46522329223142805, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11436433361427001, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.41291750111233794, - "sentence_nr": 4 + "score": 0.23221971735799607, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 + "score": 0.18580985894574314, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.6053635787005981, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.2658483576665877, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6798749495422826, - "sentence_nr": 4 + "score": 0.3347249292100999, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27668736912821895, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.46426595961938383, - "sentence_nr": 4 + "score": 0.4414406760568898, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17200767571780612, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.3723150838362789, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2465659486053858, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.5689069160047179, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4806367958084579, - "sentence_nr": 4 + "score": 0.30391153783979835, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15138514598766048, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6562641136790542, - "sentence_nr": 4 + "score": 0.3237497764315872, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1544458227548897, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5013632657267051, - "sentence_nr": 4 + "score": 0.3343587266874694, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23817261442630488, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4224991954993499, - "sentence_nr": 4 + "score": 0.448286611717823, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5191362758854317, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20679845323803403, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5379068753129642, - "sentence_nr": 4 + "score": 0.47636494608150104, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3348758882377771, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.4601349893675622, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09147827112247602, + "sentence_nr": 3 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5066311799500233, - "sentence_nr": 4 + "score": 0.3258762519783793, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.6159319815107203, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.28685201698226354, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 + "score": 0.2391308148553106, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.18180608220159192, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 + "score": 0.27307753334479423, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.15604242268653643, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.2255928425212252, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.14965975078050625, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 + "score": 0.22213502776474325, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 + "score": 0.3160946016179871, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 + "score": 0.407876439044591, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.025108530586642898, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.17466240109087192, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.2719194508460068, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.12316365460790003, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.35862918415512257, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.78479833664205, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.3254455687469726, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.4474512036484817, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.702540870003671, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.3267294026204632, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.4510525482602028, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.3088155734423375, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.27718461611705486, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.5365920629514802, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.6274039030337838, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.4578226095312774, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.5406295999835291, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.002054231717337716, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 + "score": 0.12286996020967837, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.31567668741706395, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.5820265218174012, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.23887527917609022, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.4120359948636439, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.36210097004176117, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.408098151133905, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.3165014630070639, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.42516173623967946, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.353203510510529, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.2828367156737383, - "sentence_nr": 5 + "score": 0.4910213297498164, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 + "score": 0.15820362165931962, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7513336773729535, - "sentence_nr": 5 + "score": 0.2249046365436241, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.4753167451887016, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.6372909532389948, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7341375356694393, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 + "score": 0.38317923930200504, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 + "score": 0.47975624978837655, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.2404315522172745, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.49155714102395526, - "sentence_nr": 5 + "score": 0.2054194471318506, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3477250470582593, - "sentence_nr": 5 + "score": 0.25678404806291744, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.7188419868243952, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.37045149029437513, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 + "score": 0.728208634600343, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.1243018504102695, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.32950116238735283, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.23660362391696813, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.34152697838249696, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.369345079296433, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6392900613840917, - "sentence_nr": 5 + "score": 0.5103516764863386, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.5357110024227318, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.6365941772753647, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6392900613840917, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.36763082847636347, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.45637140510576385, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4386229919587297, - "sentence_nr": 5 + "score": 0.16935976352352106, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.31268514922728713, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.41990725085948355, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5379348324975908, - "sentence_nr": 5 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7703766110349561, - "sentence_nr": 5 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.22391522968021457, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6087618281135659, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.8363600587440573, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.9912737182609732, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.6018154975998465, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4621757041594117, - "sentence_nr": 5 + "score": 0.7669980679050217, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.22067731046885494, - "sentence_nr": 5 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5635661737033422, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.47320724783393625, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.5833006006517599, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 + "score": 0.3556521383601747, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 + "score": 0.594830811413066, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.5406964703993759, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.5964595329953364, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.2575863752355164, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.3717184743596148, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.34589895849033114, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.44792042673107413, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.08197539732074254, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.7689532399280165, - "sentence_nr": 5 + "score": 0.2552663483401067, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.3730786950813075, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.47401660085208147, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.7338978299765546, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 + "score": 0.38223593598574, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 + "score": 0.5729676575997464, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.24011079455637607, - "sentence_nr": 5 + "score": 0.19328966457045355, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.19920494035049138, - "sentence_nr": 5 + "score": 0.20477156411200437, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.614209720001149, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.3371728179865314, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.5896613549548209, - "sentence_nr": 5 + "score": 0.08939270118279458, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7528914749586836, - "sentence_nr": 5 + "score": 0.2952752522340665, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.21629114799587432, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.3542320138389837, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.5300714512917181, - "sentence_nr": 5 + "score": 0.21993356630819796, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7461630750708693, - "sentence_nr": 5 + "score": 0.3822901360655399, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.12212865548711085, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.27604929504751197, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.19984607356962125, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.29326031481052006, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.33359103227594633, - "sentence_nr": 5 + "score": 0.08939270118279458, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.701102363286568, - "sentence_nr": 5 + "score": 0.2952752522340665, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.5271017464925504, - "sentence_nr": 5 + "score": 0.17095864413061523, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7749613594649343, - "sentence_nr": 5 + "score": 0.2805749649536233, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 5 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.13952118378975725, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.2962794525145751, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4335364472118335, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6878319610579101, - "sentence_nr": 5 + "score": 0.028735632183908046, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 + "score": 0.17670087745185423, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.7032048786770096, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.5300714512917181, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7461630750708693, - "sentence_nr": 5 + "score": 0.3136010782144669, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.4801289744823913, - "sentence_nr": 5 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6766690087429765, - "sentence_nr": 5 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.728208634600343, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.30421485886156485, - "sentence_nr": 5 + "score": 0.17662903260733673, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.566236392445952, - "sentence_nr": 5 + "score": 0.359573626731952, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.27274191069381915, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.24706467963183681, - "sentence_nr": 5 + "score": 0.37436438971100644, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.32965129549221617, - "sentence_nr": 5 + "score": 0.526589137558171, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.623436907204599, - "sentence_nr": 5 + "score": 0.5667866238125795, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.32078739729528816, - "sentence_nr": 5 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5817366082116868, - "sentence_nr": 5 + "score": 0.7155411017347171, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 + "score": 0.6960917409740967, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 + "score": 0.8209757784637755, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.44332438338421004, - "sentence_nr": 5 + "score": 0.019516573752972968, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 + "score": 0.22419056820298167, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.3577306040313533, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 5 + "score": 0.583526016818016, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5764325110247531, - "sentence_nr": 5 + "score": 0.6994652193905146, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 + "score": 0.27405612859390877, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 + "score": 0.4639958592456083, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 5 + "score": 0.4390960897971484, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5878575558111695, - "sentence_nr": 5 + "score": 0.541742178821102, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 + "score": 0.13232291594986312, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 + "score": 0.301901669683193, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3665134361137304, - "sentence_nr": 5 + "score": 0.2434330428491034, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6118771029352303, - "sentence_nr": 5 + "score": 0.31858900384957733, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 + "score": 0.15084825228964133, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 + "score": 0.3284886849880412, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 + "score": 0.4452652851854937, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 + "score": 0.5889782977654896, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.06088829927112382, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4100134571476398, - "sentence_nr": 5 + "score": 0.2677353447271197, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5856608401367807, - "sentence_nr": 5 + "score": 0.3932141708916282, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.17098323692758396, - "sentence_nr": 5 + "score": 0.1536690667279411, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5216877937894046, - "sentence_nr": 5 + "score": 0.23373462830676886, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3527295712700594, - "sentence_nr": 5 + "score": 0.16800102974369996, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6062826429226292, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.3665134361137304, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.6118771029352303, - "sentence_nr": 5 + "score": 0.3319781987745275, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.6471892368478446, - "sentence_nr": 5 + "score": 0.6052987576779449, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8142499721936278, - "sentence_nr": 5 + "score": 0.643602170728296, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 + "score": 0.13004800471424346, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 + "score": 0.28217142159025543, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.3924259174695316, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.45050557152077386, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.40202477345336673, - "sentence_nr": 5 + "score": 0.10601317434781207, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7469480084357536, - "sentence_nr": 5 + "score": 0.2344095627038401, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.24007528246707907, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2799331151961311, - "sentence_nr": 5 + "score": 0.31084467045503017, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.40157733283424196, - "sentence_nr": 5 + "score": 0.12769027061800275, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7133166401137868, - "sentence_nr": 5 + "score": 0.21844360831325868, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0.23705266435224473, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7494665344743727, - "sentence_nr": 5 + "score": 0.3838188339168412, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.42612283570374254, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7185121839177114, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.1852972751417938, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.36660412101424933, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5199388279318895, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.23141570376732995, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.5938624587877649, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7830281347135482, - "sentence_nr": 5 + "score": 0.2653698485201136, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -19814,7 +18843,7 @@ "task": "translation_from", "metric": "bleu", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -19822,7 +18851,7 @@ "task": "translation_from", "metric": "chrf", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -19830,7 +18859,7 @@ "task": "translation_from", "metric": "bleu", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -19838,7 +18867,7 @@ "task": "translation_from", "metric": "chrf", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -19846,95 +18875,95 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 + "score": 0.2246029757863831, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.37709297891717664, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6881502501430368, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -19942,7 +18971,7 @@ "task": "translation_from", "metric": "bleu", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -19950,151 +18979,135 @@ "task": "translation_from", "metric": "chrf", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3390387389794623, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6170420596680538, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6279894552667558, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.551397074868541, - "sentence_nr": 6 + "score": 0.5581982021478125, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5403400891349619, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -20102,1183 +19115,1087 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5460240376042262, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.24343304284910333, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6275577931282961, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6431872581462166, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.43795381992037963, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.5881561248602009, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.46670957224939175, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6589376390020449, - "sentence_nr": 6 + "score": 0.37544324742239676, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4216890913810254, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6885217194158456, - "sentence_nr": 6 + "score": 0.49546288984677567, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6589376390020449, - "sentence_nr": 6 + "score": 0.41602211217571683, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.29176300840900793, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6143650111703199, - "sentence_nr": 6 + "score": 0.49713060327965375, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.43021236941942204, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7142896582178452, - "sentence_nr": 6 + "score": 0.440129802760994, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.16195570128532405, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.581645267684411, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.24090844358935917, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5468852870478801, - "sentence_nr": 6 + "score": 0.429292711066547, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.43021236941942204, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.7142896582178452, - "sentence_nr": 6 + "score": 0.5848202846227532, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.41213231348812146, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.40435987083533204, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.34256683873776383, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.41477028165511615, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.40854152133685306, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.4164061298971701, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5709936728721758, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.40435987083533204, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.40562290854898025, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.33546955366063214, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "anthropic/claude-3.5-sonnet", + "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.40319099863003527, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.39858613265631837, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.3818534926571001, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.3644112480028862, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.393379300802006, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.3892064098781075, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.3066682918799934, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.3010381621698183, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.39545121937832856, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6963801389253689, - "sentence_nr": 6 + "score": 0.28783297914763095, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.3958941272081701, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4101479464529936, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7041976254287654, - "sentence_nr": 6 + "score": 0.5042211795038526, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2919394073770869, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5957961314949175, - "sentence_nr": 6 + "score": 0.511876122662448, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.17537670874647399, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4800889669735933, - "sentence_nr": 6 + "score": 0.511876122662448, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 + "score": 0.5948724602646328, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.22845493240080628, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.584996891148118, - "sentence_nr": 6 + "score": 0.5049375875723539, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 + "score": 0.5582360999449585, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.23272696712467975, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5794868721814046, - "sentence_nr": 6 + "score": 0.48375513642780327, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.38785611216800814, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6673259967761724, - "sentence_nr": 6 + "score": 0.4915933923809756, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.16331948281960493, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.350650198151987, - "sentence_nr": 6 + "score": 0.41469341972645324, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.8056920633274978, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.8391519966182309, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.22640935662631664, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.6263363844543545, - "sentence_nr": 6 + "score": 0.39451521279220947, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 + "score": 0.5516607622642397, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.47160616105623426, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.5256353512715748, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.3765697091436241, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.27447938256311044, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.615291848344044, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.31573558123189943, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6989238098201116, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6245952145297528, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.44995700110278536, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.2680165156355779, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.5989264158576341, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.4425650919372919, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.32594818888335836, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6263180162489238, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7971172820981081, - "sentence_nr": 6 + "score": 0.4576529535952892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 + "score": 0.5309982646782259, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 + "score": 0.4726395749383864, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4207937380724192, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6985308026285912, - "sentence_nr": 6 + "score": 0.40052428191473877, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.8020845125558708, - "sentence_nr": 6 + "score": 0.33762297226992255, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 + "score": 0.39336600752225864, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5700185304500285, - "sentence_nr": 6 + "score": 0.4101715667811344, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7945212279546889, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.43011383006801057, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7140577175386648, - "sentence_nr": 6 + "score": 0.45834841871997833, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -21286,4319 +20203,3951 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3843363395779093, - "sentence_nr": 6 + "score": 0.31754227193241025, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.651158213392685, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.860002370506267, - "sentence_nr": 6 + "score": 0.3974726419025883, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.6374693500772332, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 + "score": 0.6151179643430991, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7719180936906627, - "sentence_nr": 6 + "score": 0.686947433675709, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6590438071804039, - "sentence_nr": 6 + "score": 0.4746119151171374, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.2658483576665877, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.6410540990527072, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5639241776831634, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5281061979991509, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 + "score": 0.6668099404219522, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5554602680850725, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7745649676018984, - "sentence_nr": 6 + "score": 0.41291750111233794, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.17181529671327242, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.6053635787005981, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.46426595961938383, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 + "score": 0.41238100267720657, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.34636800712900173, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5167955767158704, - "sentence_nr": 6 + "score": 0.41238100267720657, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5397693417183738, - "sentence_nr": 6 + "score": 0.4806367958084579, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.6562641136790542, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.425143650778693, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6674242019044293, - "sentence_nr": 6 + "score": 0.5013632657267051, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.421151249507493, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6938674571170766, - "sentence_nr": 6 + "score": 0.4224991954993499, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.5191362758854317, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.5379068753129642, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.41843795218458035, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6316283876832989, - "sentence_nr": 6 + "score": 0.3348758882377771, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.4803501444747088, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7417101158248365, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.4803501444747088, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.7417101158248365, - "sentence_nr": 6 + "score": 0.4601349893675622, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.17905278399134197, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.37257295447029826, - "sentence_nr": 7 + "score": 0.410846945789476, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 + "score": 0.39909989628767284, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.12620429887108936, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.35580703793872603, - "sentence_nr": 7 + "score": 0.39913709020460375, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.12872220631084524, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.33602633953270183, - "sentence_nr": 7 + "score": 0.40443357144012176, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 + "score": 0.5705717737418762, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.03037224815656603, - "sentence_nr": 7 + "score": 0.4121946181418776, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.10203846572325131, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.33381153680096753, - "sentence_nr": 7 + "score": 0.42422145417131013, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.39909989628767284, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.1685643537060726, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.36926449644166065, - "sentence_nr": 7 + "score": 0.5606044053771457, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.014935758919429663, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.08106107745254391, - "sentence_nr": 7 + "score": 0.40443357144012176, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.044304867337633724, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.20806974344498103, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.23889245981479848, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4543632408556633, - "sentence_nr": 7 + "score": 0.4121946181418776, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.1418524086391329, - "sentence_nr": 7 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.38295770773758747, - "sentence_nr": 7 + "score": 0.6758978744760765, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 + "score": 0.17181529671327242, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 + "score": 0.5293474685884572, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.030860166165309233, - "sentence_nr": 7 + "score": 0.23578316044531808, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.1100250143829584, - "sentence_nr": 7 + "score": 0.5821373704411671, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.21255327712152144, - "sentence_nr": 7 + "score": 0.14528679532351443, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.43272151570555034, - "sentence_nr": 7 + "score": 0.514952316880994, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 + "score": 0.6728506998168392, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.05918530850500025, - "sentence_nr": 7 + "score": 0.3128496839849598, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.1438459189500836, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.30693371625402605, - "sentence_nr": 7 + "score": 0.6371798394308665, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0979038733644086, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.30211704738953993, - "sentence_nr": 7 + "score": 0.30941048637024005, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2288990188897003, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.48933901443699584, - "sentence_nr": 7 + "score": 0.6423124418413864, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.009624974244068071, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.07318255686027669, - "sentence_nr": 7 + "score": 0.2970314818988727, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.043420474648595074, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.2884095690753619, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.24725159675471015, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5112120334550363, - "sentence_nr": 7 + "score": 0.3292499962917628, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.17382347640129553, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.4061580777885601, - "sentence_nr": 7 + "score": 0.4410492519530161, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 + "score": 0.4429196299668147, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.10361854845420869, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.32774802711076473, - "sentence_nr": 7 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.15186969315425305, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3458120002305796, - "sentence_nr": 7 + "score": 0.47465074831919213, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.19074380068002203, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.40566585096277824, - "sentence_nr": 7 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.20031726728306523, - "sentence_nr": 7 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.20485833586704885, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.468735805943922, - "sentence_nr": 7 + "score": 0.47465074831919213, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4373156210032521, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.192481383169461, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3799051443349615, - "sentence_nr": 7 + "score": 0.44830378475308, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.01252735726099625, - "sentence_nr": 7 + "score": 0.2623399284064729, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.273148644463442, - "sentence_nr": 7 + "score": 0.4373156210032521, + "sentence_nr": 4 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.16260451093454215, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3759026836547276, - "sentence_nr": 7 + "score": 0.3844263765000694, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.09431297723472011, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.3616856339096348, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.189717083187238, - "sentence_nr": 7 + "score": 0.3805770883173698, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.10266747466754884, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3364703638684802, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.22381487678101888, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5249370100068887, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.28912109037408523, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2144604484498437, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.48894052224175993, - "sentence_nr": 7 + "score": 0.3844263765000694, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.21001173689943997, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.423493931076046, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.006232910970143225, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.06317168666869727, - "sentence_nr": 7 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.06938388878349923, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.3541078046399395, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.20485833586704885, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.48732945706336717, - "sentence_nr": 7 + "score": 0.39818525322365445, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.14557808399334188, - "sentence_nr": 7 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.36598346755702993, - "sentence_nr": 7 + "score": 0.6474126202050918, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.06897533888461813, - "sentence_nr": 7 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2776666563000344, - "sentence_nr": 7 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.1665765483402476, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.4017968725013381, - "sentence_nr": 7 + "score": 0.5521590062829653, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.10666682719585797, - "sentence_nr": 7 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.33462901494141756, - "sentence_nr": 7 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.04151505758906764, - "sentence_nr": 7 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.12189363728567917, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.37595660827287636, - "sentence_nr": 7 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.35870004213153, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.697021248528644, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.18926971577178767, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.4931453714148122, - "sentence_nr": 7 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.004663531624960091, - "sentence_nr": 7 + "score": 0.1423071532720465, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.07262533604330305, - "sentence_nr": 7 + "score": 0.5673078468780355, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.037401300306846526, - "sentence_nr": 7 + "score": 0.19923405658137924, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.27395881217705964, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.26000287375180825, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.47354107832122266, - "sentence_nr": 7 + "score": 0.6211036406023237, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.16780109158842918, - "sentence_nr": 7 + "score": 0.2887138086538547, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3968694014697679, - "sentence_nr": 7 + "score": 0.6342291345998248, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1579497466001673, - "sentence_nr": 7 + "score": 0.7013062757071812, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5092928545844059, - "sentence_nr": 7 + "score": 0.9303769449292738, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.16286876096900815, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3422914837190449, - "sentence_nr": 7 + "score": 0.2381658499765768, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.20198948917565754, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.34858221035657466, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.06888992790640074, - "sentence_nr": 7 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.2874483621307283, - "sentence_nr": 7 + "score": 0.9063898435384111, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1631196072688366, - "sentence_nr": 7 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3502730667074754, - "sentence_nr": 7 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.21286836557101563, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.45055232014427626, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.031126201157905466, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.015970144454664378, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.06929847827527827, - "sentence_nr": 7 + "score": 0.9457416090031758, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.30185194035792856, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.1723692524265489, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.349893200245233, - "sentence_nr": 7 + "score": 0.9892952933418456, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.11697642623186386, - "sentence_nr": 7 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.37117753637984835, - "sentence_nr": 7 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.17621963873521423, - "sentence_nr": 7 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.09916146090364127, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3121110160693956, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.22669486951066523, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4484451941575473, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4234885228074744, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.011560595536104562, - "sentence_nr": 7 + "score": 0.7410180114887145, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.07368089078790738, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.41452613113710224, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17892846390928677, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.47088195615067674, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.012370537823050053, - "sentence_nr": 7 + "score": 0.4234885228074744, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.06660321132654005, - "sentence_nr": 7 + "score": 0.7410180114887145, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.09831093939330879, - "sentence_nr": 7 + "score": 0.47375069012411286, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.33203866499974327, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.22013459885748218, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.43986532876547135, - "sentence_nr": 7 + "score": 0.7543919667018285, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.13805615693046389, - "sentence_nr": 7 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40787998733941394, - "sentence_nr": 7 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.17643078314788999, - "sentence_nr": 7 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40757584786696294, - "sentence_nr": 7 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.1989414239237112, - "sentence_nr": 7 + "score": 0.6626129614342791, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3791567776918788, - "sentence_nr": 7 + "score": 0.8597893117683423, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.09916009482330297, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3032928217006101, - "sentence_nr": 7 + "score": 0.2828367156737383, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.44543578807748957, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.09453698369211004, - "sentence_nr": 7 + "score": 0.7513336773729535, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.1995980198896431, - "sentence_nr": 7 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4244503391142409, - "sentence_nr": 7 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.4625957988586645, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.7341375356694393, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.18216362398065106, - "sentence_nr": 7 + "score": 0.5461499540157965, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.36524832602306334, - "sentence_nr": 7 + "score": 0.7954823723658209, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.2404315522172745, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.08319287955437346, - "sentence_nr": 7 + "score": 0.49155714102395526, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.08383676689911676, - "sentence_nr": 7 + "score": 0.3477250470582593, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.2855329690010324, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.20126232208711145, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.39698346457743144, - "sentence_nr": 7 + "score": 0.7188419868243952, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.19148282873929853, - "sentence_nr": 7 + "score": 0.404727200247809, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4707949702068854, - "sentence_nr": 7 + "score": 0.6681898017773897, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 + "score": 0.40276720463657734, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 + "score": 0.6529271690805427, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.08183353655679478, - "sentence_nr": 7 + "score": 0.404727200247809, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.25007633393249695, - "sentence_nr": 7 + "score": 0.6681898017773897, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0981642545874085, - "sentence_nr": 7 + "score": 0.404727200247809, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31793222329793575, - "sentence_nr": 7 + "score": 0.6681898017773897, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.09478705591775652, - "sentence_nr": 7 + "score": 0.44897710722021167, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33293232395887284, - "sentence_nr": 7 + "score": 0.6862249089515978, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.11976209355757551, - "sentence_nr": 7 + "score": 0.404727200247809, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.27004759126600675, - "sentence_nr": 7 + "score": 0.6392900613840917, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.09142555538569784, - "sentence_nr": 7 + "score": 0.44897710722021167, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31371707771405133, - "sentence_nr": 7 + "score": 0.6862249089515978, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.05438497632520132, - "sentence_nr": 7 + "score": 0.404727200247809, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.26123506271154656, - "sentence_nr": 7 + "score": 0.6392900613840917, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.1490232164900303, - "sentence_nr": 7 + "score": 0.44897710722021167, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.43745835724045856, - "sentence_nr": 7 + "score": 0.6862249089515978, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.005606294971348417, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.06662245090541388, - "sentence_nr": 7 + "score": 0.4386229919587297, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.44897710722021167, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.12752236829255797, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.13462044240543036, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.39535559458710795, - "sentence_nr": 7 + "score": 0.6862249089515978, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.12157241570357182, - "sentence_nr": 7 + "score": 0.5379348324975908, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4080990097991491, - "sentence_nr": 7 + "score": 0.7703766110349561, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 + "score": 0.30188353873287377, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 + "score": 0.6086565367747951, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.10415298161056984, - "sentence_nr": 7 + "score": 0.22391522968021457, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.33452632923050557, - "sentence_nr": 7 + "score": 0.6087618281135659, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.21204239268527586, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3846197304420823, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.14057105892389254, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3028381427383384, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.004763623056487517, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.07485928007606017, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.11689600237805012, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.38258301195690664, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.19809535837880818, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.447539350421338, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.004718557257042585, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.08019304349523304, - "sentence_nr": 7 + "score": 0.4621757041594117, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.05614653993259943, - "sentence_nr": 7 + "score": 0.22067731046885494, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.26485323792360876, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.23158048156321728, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.47580042760181485, - "sentence_nr": 7 + "score": 0.5635661737033422, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.598931508663349, - "sentence_nr": 8 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7353063745802827, - "sentence_nr": 8 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 + "score": 0.6026286934891149, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 + "score": 0.8025775976044891, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1327526847508867, - "sentence_nr": 8 + "score": 0.6626129614342791, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.37850602486495205, - "sentence_nr": 8 + "score": 0.8597893117683423, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.18405035438430847, - "sentence_nr": 8 + "score": 0.6626129614342791, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4142901090120915, - "sentence_nr": 8 + "score": 0.8597893117683423, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.24239458593560292, - "sentence_nr": 8 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.32069132319909655, - "sentence_nr": 8 + "score": 0.7689532399280165, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.22478613858269392, - "sentence_nr": 8 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.44348101018104913, - "sentence_nr": 8 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.4625957988586645, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.7338978299765546, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4026159305424288, - "sentence_nr": 8 + "score": 0.5461499540157965, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5712560131047175, - "sentence_nr": 8 + "score": 0.7954823723658209, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.183687049781416, - "sentence_nr": 8 + "score": 0.24011079455637607, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.351911486970854, - "sentence_nr": 8 + "score": 0.19920494035049138, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.5181825846579515, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.6225705543415939, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7106977638931217, - "sentence_nr": 8 + "score": 0.614209720001149, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4262221594184117, - "sentence_nr": 8 + "score": 0.5896613549548209, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5886657414856064, - "sentence_nr": 8 + "score": 0.7528914749586836, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 + "score": 0.713787745993602, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.25861130592298187, - "sentence_nr": 8 + "score": 0.5300714512917181, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.39452644092432093, - "sentence_nr": 8 + "score": 0.7461630750708693, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.20379250618355427, - "sentence_nr": 8 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.41085414309816914, - "sentence_nr": 8 + "score": 0.713787745993602, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 + "score": 0.713787745993602, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.2990226215771518, - "sentence_nr": 8 + "score": 0.33359103227594633, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4323734152924571, - "sentence_nr": 8 + "score": 0.701102363286568, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 8 + "score": 0.5271017464925504, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3012789660952507, - "sentence_nr": 8 + "score": 0.7749613594649343, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17248715680799764, - "sentence_nr": 8 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.40043565243219187, - "sentence_nr": 8 + "score": 0.713787745993602, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4335364472118335, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.13835317113453516, - "sentence_nr": 8 + "score": 0.6878319610579101, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.16343842313572918, - "sentence_nr": 8 + "score": 0.480771131185851, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.3986641525285075, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.417372155782838, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.507980317618041, - "sentence_nr": 8 + "score": 0.7032048786770096, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.4801289744823913, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5792139686527714, - "sentence_nr": 8 + "score": 0.6766690087429765, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.3272712268138726, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.6272846474183881, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.3272712268138726, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.6272846474183881, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.25798723088167685, - "sentence_nr": 8 + "score": 0.30421485886156485, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5244854229988815, - "sentence_nr": 8 + "score": 0.566236392445952, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3315037521841549, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.468197879470805, - "sentence_nr": 8 + "score": 0.24706467963183681, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.460474309246715, - "sentence_nr": 8 + "score": 0.32965129549221617, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6213537794704693, - "sentence_nr": 8 + "score": 0.623436907204599, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.32078739729528816, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5676965183365866, - "sentence_nr": 8 + "score": 0.5817366082116868, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.3231203125477008, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5812275690118908, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3231203125477008, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5812275690118908, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.003172770121174655, - "sentence_nr": 8 + "score": 0.44332438338421004, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.3231203125477008, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.5834549494301647, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5834549494301647, - "sentence_nr": 8 + "score": 0.5812275690118908, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.46092611919700416, - "sentence_nr": 8 + "score": 0.28489318277723963, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6365915338629015, - "sentence_nr": 8 + "score": 0.5764325110247531, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 + "score": 0.2981792160679168, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 + "score": 0.5788026000794341, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3942058093215873, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3344305108778801, - "sentence_nr": 8 + "score": 0.5878575558111695, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2981792160679168, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.37017501464955627, - "sentence_nr": 8 + "score": 0.5788026000794341, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.22218130727359342, - "sentence_nr": 8 + "score": 0.3665134361137304, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.39929356245904674, - "sentence_nr": 8 + "score": 0.6118771029352303, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.3194331635465395, - "sentence_nr": 8 + "score": 0.3485799122645514, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5031092445628172, - "sentence_nr": 8 + "score": 0.6090575371936678, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.334422418242443, - "sentence_nr": 8 + "score": 0.3485799122645514, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.47577086062579566, - "sentence_nr": 8 + "score": 0.6090575371936678, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.06088829927112382, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4482907809719588, - "sentence_nr": 8 + "score": 0.4100134571476398, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5498272118133005, - "sentence_nr": 8 + "score": 0.5856608401367807, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.17098323692758396, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.02467424260792568, - "sentence_nr": 8 + "score": 0.5216877937894046, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.21902340561392236, - "sentence_nr": 8 + "score": 0.3527295712700594, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.43906671679239717, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.5104779149627351, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.6198275970742451, - "sentence_nr": 8 + "score": 0.6062826429226292, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5234484809182233, - "sentence_nr": 8 + "score": 0.6471892368478446, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6658297773613274, - "sentence_nr": 8 + "score": 0.8142499721936278, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 + "score": 0.7012294787544179, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 + "score": 0.8478115719875968, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.30007504691018483, - "sentence_nr": 8 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5153810823423555, - "sentence_nr": 8 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.14074957769288798, - "sentence_nr": 8 + "score": 0.40202477345336673, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3750035199199742, - "sentence_nr": 8 + "score": 0.7469480084357536, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.26538706048179084, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4982627378595717, - "sentence_nr": 8 + "score": 0.2799331151961311, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.232738415750697, - "sentence_nr": 8 + "score": 0.40157733283424196, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.49618971681248764, - "sentence_nr": 8 + "score": 0.7133166401137868, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.3488611533620711, - "sentence_nr": 8 + "score": 0.4625957988586645, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5550499651473632, - "sentence_nr": 8 + "score": 0.7494665344743727, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.42612283570374254, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.7185121839177114, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.31883477089875656, - "sentence_nr": 8 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5510450101159524, - "sentence_nr": 8 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.17729842264695017, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.038236956722392024, - "sentence_nr": 8 + "score": 0.5199388279318895, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.23141570376732995, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.31998097041178836, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.4220833561341287, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5956810507017879, - "sentence_nr": 8 + "score": 0.5938624587877649, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.3815250264738168, - "sentence_nr": 8 + "score": 0.23713320246552005, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.6516314751979607, - "sentence_nr": 8 + "score": 0.6106842970161642, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 + "score": 0.21690365808279138, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 + "score": 0.5384773678665918, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.189902924205034, - "sentence_nr": 8 + "score": 0.23114663823833642, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4072184389907138, - "sentence_nr": 8 + "score": 0.5786592584609213, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.18710260593933364, - "sentence_nr": 8 + "score": 0.22128776529156546, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.44334313717706003, - "sentence_nr": 8 + "score": 0.5609439249510223, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.15161074985415177, - "sentence_nr": 8 + "score": 0.23114663823833642, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.3796830006266126, - "sentence_nr": 8 + "score": 0.5786592584609213, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.07757069009917116, - "sentence_nr": 8 + "score": 0.1998573974138024, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.3253161209971999, - "sentence_nr": 8 + "score": 0.540043957078071, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.22168992033645996, - "sentence_nr": 8 + "score": 0.30752616970214336, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.523689661176845, - "sentence_nr": 8 + "score": 0.6051452460471443, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.2722704374402053, - "sentence_nr": 8 + "score": 0.310441435588881, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.596004654894533, - "sentence_nr": 8 + "score": 0.6413164971104282, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.23114663823833642, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.01008902035184167, - "sentence_nr": 8 + "score": 0.5814841210741494, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.10279947040838337, - "sentence_nr": 8 + "score": 0.22656720908801994, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.3569840483632983, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.4653583721345133, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.6142658860525915, - "sentence_nr": 8 + "score": 0.5465750236858569, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.40003810431098236, - "sentence_nr": 8 + "score": 0.6888365053466561, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5899097408105687, - "sentence_nr": 8 + "score": 0.8656273480576243, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 + "score": 0.25711386542134795, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 + "score": 0.6088853751738869, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2097387761551816, - "sentence_nr": 8 + "score": 0.25711386542134795, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.49663301508497226, - "sentence_nr": 8 + "score": 0.6088853751738869, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.27067168022307464, - "sentence_nr": 8 + "score": 0.3416581331218724, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5439625482235064, - "sentence_nr": 8 + "score": 0.6578570934289981, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.18679710353734788, - "sentence_nr": 8 + "score": 0.3060368950930089, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3876457319870774, - "sentence_nr": 8 + "score": 0.6736142284622013, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.2826204057042236, - "sentence_nr": 8 + "score": 0.3423591961656694, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5043062352893725, - "sentence_nr": 8 + "score": 0.6570214418399444, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.29588994069727786, - "sentence_nr": 8 + "score": 0.24456656109396324, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5527117669081858, - "sentence_nr": 8 + "score": 0.629934465484704, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.46732353406180216, - "sentence_nr": 8 + "score": 0.3060368950930089, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6059276585345114, - "sentence_nr": 8 + "score": 0.6736142284622013, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.06266083709457643, - "sentence_nr": 8 + "score": 0.46965980060137014, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.36565527196849945, - "sentence_nr": 8 + "score": 0.24456656109396324, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.4882803186347697, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.33661284377001893, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5885351264299764, - "sentence_nr": 8 + "score": 0.629934465484704, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.38047531731529327, - "sentence_nr": 8 + "score": 0.5069487414732323, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.49485723102957346, - "sentence_nr": 8 + "score": 0.7801245319017357, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 + "score": 0.5695988432761473, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 + "score": 0.7516103467926585, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 + "score": 0.6358974376699329, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.4672309378181727, - "sentence_nr": 8 + "score": 0.736661937085844, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5695988432761473, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.29886658673327365, - "sentence_nr": 8 + "score": 0.7516103467926585, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.21812881407613688, - "sentence_nr": 8 + "score": 0.45307778036928104, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3598346059855135, - "sentence_nr": 8 + "score": 0.6935397252637394, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1367498402979849, - "sentence_nr": 8 + "score": 0.5695988432761473, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3943841419148219, - "sentence_nr": 8 + "score": 0.7516103467926585, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.5676006714726635, - "sentence_nr": 8 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6880701448812352, - "sentence_nr": 8 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.45307778036928104, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6935397252637394, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.2974074484950165, - "sentence_nr": 8 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5121581247515657, - "sentence_nr": 8 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.30614023358320086, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.010162846529607748, - "sentence_nr": 8 + "score": 0.5870676308171808, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.21947959999379651, - "sentence_nr": 8 + "score": 0.2281399713503153, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3226457008913864, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.4265506545827786, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.5487572224993423, - "sentence_nr": 8 + "score": 0.6211104268881504, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4489235959690452, - "sentence_nr": 8 + "score": 0.7196315267102845, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5934678825154104, - "sentence_nr": 8 + "score": 0.8835331636515565, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.28800869328515505, - "sentence_nr": 8 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.49348678623542436, - "sentence_nr": 8 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2429163097293302, - "sentence_nr": 8 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5044329486461447, - "sentence_nr": 8 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.24586918158076287, - "sentence_nr": 8 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4658595745396681, - "sentence_nr": 8 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.23073085454808062, - "sentence_nr": 8 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.44142087654422146, - "sentence_nr": 8 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4406612884550454, - "sentence_nr": 8 + "score": 0.7196315267102845, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5676112112992767, - "sentence_nr": 8 + "score": 0.8835331636515565, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.7667541011433795, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4476950425126913, - "sentence_nr": 8 + "score": 0.7196315267102845, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5932980209045412, - "sentence_nr": 8 + "score": 0.8835331636515565, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.480771131185851, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.1483315516064897, - "sentence_nr": 8 + "score": 0.705252762035012, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.25249051585915977, - "sentence_nr": 8 + "score": 0.445107576642247, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.38558450790399557, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.4631700687380434, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.59196914119751, - "sentence_nr": 8 + "score": 0.6955301378913092, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 + "score": 0.40157733283424196, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 + "score": 0.6532350818978572, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.38091370416670794, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.6438225861756911, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.31374450602681464, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.6422405832556486, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4924584878270648, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0067104198717751464, - "sentence_nr": 8 + "score": 0.7062510642584722, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.30752616970214336, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5976254557718147, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.9025232868361638, - "sentence_nr": 8 + "score": 0.36227557436010244, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9169897590736298, - "sentence_nr": 8 + "score": 0.6470050797908481, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.9709835434146469, - "sentence_nr": 8 + "score": 0.419468515826214, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9951728990866464, - "sentence_nr": 8 + "score": 0.6664000694648706, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.010321080079207262, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.06492787287290114, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 + "score": 0.4938015541936678, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 + "score": 0.7820348786317745, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.8935248372106969, - "sentence_nr": 8 + "score": 0.12858902882463452, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9404428602061264, - "sentence_nr": 8 + "score": 0.35477908164501704, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.3942058093215873, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 + "score": 0.6316031412228033, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.18559542135951204, - "sentence_nr": 9 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3804842882867387, - "sentence_nr": 9 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.13087682931309413, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.19462952976787054, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -25606,1439 +24155,1327 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.013538497707846785, - "sentence_nr": 9 + "score": 0.3263040636562357, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.16678872216161894, - "sentence_nr": 9 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.38156158663679846, - "sentence_nr": 9 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.21940429389247643, - "sentence_nr": 9 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4343280866601455, - "sentence_nr": 9 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3300025916068812, - "sentence_nr": 9 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5052501972629104, - "sentence_nr": 9 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1824401863423467, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.36709433185688595, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3377854698776805, - "sentence_nr": 9 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.521201229892482, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.2992694690475121, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.521871374038439, - "sentence_nr": 9 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.15122189206102096, - "sentence_nr": 9 + "score": 0.3390387389794623, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.26750110507308866, - "sentence_nr": 9 + "score": 0.6170420596680538, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 + "score": 0.3142665434344143, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 + "score": 0.6466526067220029, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.07843772989359644, - "sentence_nr": 9 + "score": 0.3751840463233443, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.1324578891826276, - "sentence_nr": 9 + "score": 0.6279894552667558, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.19268479640608693, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.08163977068875294, - "sentence_nr": 9 + "score": 0.551397074868541, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.14134641571854575, - "sentence_nr": 9 + "score": 0.17470942957770763, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.22948919855739472, - "sentence_nr": 9 + "score": 0.5403400891349619, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.1475503033983142, - "sentence_nr": 9 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.22104108935973044, - "sentence_nr": 9 + "score": 0.5763410052067085, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.16434349396840395, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.28582614857210975, - "sentence_nr": 9 + "score": 0.5460240376042262, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.24911274612875411, - "sentence_nr": 9 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3603818786794888, - "sentence_nr": 9 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 + "score": 0.24343304284910333, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.23831215045289575, - "sentence_nr": 9 + "score": 0.6275577931282961, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17543744527808774, - "sentence_nr": 9 + "score": 0.28571962561926445, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.28201016956553354, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.17083255863912036, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.2797958336163538, - "sentence_nr": 9 + "score": 0.6431872581462166, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.21685485833927476, - "sentence_nr": 9 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3714219747170047, - "sentence_nr": 9 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.366137273378509, - "sentence_nr": 9 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12656494026948834, - "sentence_nr": 9 + "score": 0.3083012995502152, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3156355830822428, - "sentence_nr": 9 + "score": 0.6589376390020449, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.30327872414714485, - "sentence_nr": 9 + "score": 0.4216890913810254, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.49804213541579834, - "sentence_nr": 9 + "score": 0.6885217194158456, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.13237645860785527, - "sentence_nr": 9 + "score": 0.3083012995502152, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3818322535970043, - "sentence_nr": 9 + "score": 0.6589376390020449, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.18154235663145316, - "sentence_nr": 9 + "score": 0.29176300840900793, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3906877817743504, - "sentence_nr": 9 + "score": 0.6143650111703199, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12487405142186064, - "sentence_nr": 9 + "score": 0.43021236941942204, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.32817291858267583, - "sentence_nr": 9 + "score": 0.7142896582178452, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.16701570871784516, - "sentence_nr": 9 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4021286881032558, - "sentence_nr": 9 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.24090844358935917, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.29383139922210444, - "sentence_nr": 9 + "score": 0.5468852870478801, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.15799783604363904, - "sentence_nr": 9 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3949243937510492, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_from", - "metric": "chrf", - "score": 0.40100810859537644, - "sentence_nr": 9 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.112289032173749, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.17726100052085036, - "sentence_nr": 9 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.14276716121505195, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3191375424862687, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33752742535974617, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31758120882708796, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33753843688529356, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3593717322097392, - "sentence_nr": 9 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.010176705289341573, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.13628770358024436, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3124983184732695, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_from", - "metric": "chrf", - "score": 0.3227044640287027, - "sentence_nr": 9 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3710595252626966, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.18235247300784824, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.40779523977234755, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.013915288440632284, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.39962545473912425, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.39112369376374106, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.2202248274013358, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.48474965676300186, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.45813938111627356, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.39545121937832856, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.1593344703029041, - "sentence_nr": 9 + "score": 0.6963801389253689, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.22494952618128455, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4760660341798742, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_from", - "metric": "chrf", - "score": 0.3963858306295727, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12666372160329223, - "sentence_nr": 9 + "score": 0.4101479464529936, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2650373529479294, - "sentence_nr": 9 + "score": 0.7041976254287654, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 + "score": 0.4547900039222725, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 + "score": 0.6541971428810075, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1649662542496744, - "sentence_nr": 9 + "score": 0.2919394073770869, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3466546857451185, - "sentence_nr": 9 + "score": 0.5957961314949175, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.17537670874647399, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.01536966738773372, - "sentence_nr": 9 + "score": 0.4800889669735933, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.14326513489612383, - "sentence_nr": 9 + "score": 0.5088645484558708, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4034278533385552, - "sentence_nr": 9 + "score": 0.6991726442472661, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.13829446068705525, - "sentence_nr": 9 + "score": 0.22845493240080628, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.32059338352121075, - "sentence_nr": 9 + "score": 0.584996891148118, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.16521691795932783, - "sentence_nr": 9 + "score": 0.5088645484558708, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4134512022176617, - "sentence_nr": 9 + "score": 0.6991726442472661, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 + "score": 0.23272696712467975, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3435867188688158, - "sentence_nr": 9 + "score": 0.5794868721814046, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12366644075037489, - "sentence_nr": 9 + "score": 0.38785611216800814, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.37651342775995167, - "sentence_nr": 9 + "score": 0.6673259967761724, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.16331948281960493, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2962222000049211, - "sentence_nr": 9 + "score": 0.350650198151987, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1971903602140518, - "sentence_nr": 9 + "score": 0.8056920633274978, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.36269646528997446, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_from", - "metric": "chrf", - "score": 0.3473154676483541, - "sentence_nr": 9 + "score": 0.8391519966182309, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.2922087191170089, - "sentence_nr": 9 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.2244748716483542, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.007281906895508523, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18294117097472648, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4383387744769579, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17092467746295725, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4340281226634826, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.22860414459682069, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.47331131010100724, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17200673466668953, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.39948318545775324, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.10553225565626573, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.1763116500850642, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17730543118229922, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4201842844735916, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_from", - "metric": "chrf", - "score": 0.3744383822869251, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.1740044679403827, - "sentence_nr": 9 + "score": 0.595092211343687, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.36375152376157177, - "sentence_nr": 9 + "score": 0.7971172820981081, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 + "score": 0.7122562458056777, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.13269353024089545, - "sentence_nr": 9 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.28998089836851504, - "sentence_nr": 9 + "score": 0.7122562458056777, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.4207937380724192, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.01357525601063516, - "sentence_nr": 9 + "score": 0.6985308026285912, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.20298407172594946, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.427376330935813, - "sentence_nr": 9 + "score": 0.8020845125558708, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.382987159925022, - "sentence_nr": 9 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.17558199612672082, - "sentence_nr": 9 + "score": 0.3675667565747676, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.41334979014850587, - "sentence_nr": 9 + "score": 0.5700185304500285, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.20947801521367798, - "sentence_nr": 9 + "score": 0.595092211343687, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.37699245483283905, - "sentence_nr": 9 + "score": 0.7945212279546889, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.24318848592140954, - "sentence_nr": 9 + "score": 0.43011383006801057, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.501343318078065, - "sentence_nr": 9 + "score": 0.7140577175386648, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -27046,10335 +25483,9471 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3843363395779093, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.13784906211485343, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3161105981607342, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "bleu", - "score": 0.28433291815307693, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_from", - "metric": "chrf", - "score": 0.4589827303637465, - "sentence_nr": 9 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.322788951728102, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.40263021320001785, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 + "score": 0.32263864160302524, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 + "score": 0.6824395076981005, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1405026510197826, - "sentence_nr": 9 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.24785258181936404, - "sentence_nr": 9 + "score": 0.7719180936906627, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 + "score": 0.6590438071804039, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15626231814206226, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2918712789926548, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15325316503089068, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2756316951639811, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3210853623565359, - "sentence_nr": 9 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.009559007108143848, - "sentence_nr": 9 + "score": 0.32263864160302524, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.05937666456658802, - "sentence_nr": 9 + "score": 0.6824395076981005, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.28306950244125495, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.14063630555225284, - "sentence_nr": 9 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.24531520458611372, - "sentence_nr": 9 + "score": 0.7745649676018984, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.3264287329357334, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.41662443172249786, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "bleu", - "score": 0.2855471341725443, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_from", - "metric": "chrf", - "score": 0.40969820391967565, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.16925466459550803, - "sentence_nr": 9 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.35912398848424326, - "sentence_nr": 9 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 + "score": 0.38305978177479755, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 + "score": 0.6061131723054572, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.34636800712900173, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.21547697432588886, - "sentence_nr": 9 + "score": 0.5167955767158704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3675667565747676, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.18039960295364865, - "sentence_nr": 9 + "score": 0.5397693417183738, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.21397099133614067, - "sentence_nr": 9 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3568171392601981, - "sentence_nr": 9 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.14134641571854575, - "sentence_nr": 9 + "score": 0.425143650778693, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3078571099929154, - "sentence_nr": 9 + "score": 0.6674242019044293, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.15658994837053716, - "sentence_nr": 9 + "score": 0.421151249507493, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3084004707364603, - "sentence_nr": 9 + "score": 0.6938674571170766, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.20215771603666896, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.34483322672745376, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.16165057948216605, - "sentence_nr": 9 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.35172210628524053, - "sentence_nr": 9 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.41843795218458035, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.012458960343878354, - "sentence_nr": 9 + "score": 0.6316283876832989, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.20053583653512705, - "sentence_nr": 9 + "score": 0.4803501444747088, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3585550644386862, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "bleu", - "score": 0.22665851162885023, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_from", - "metric": "chrf", - "score": 0.4402646729409968, - "sentence_nr": 9 + "score": 0.7417101158248365, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6358921902612438, - "sentence_nr": 0 + "score": 0.42221847853238736, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8041899227402122, - "sentence_nr": 0 + "score": 0.6656008733100179, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6299285159340671, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.7993134129243716, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.710159574003633, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8462481747979111, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.5487830136896633, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.77238965036654, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.5745954681260859, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.7920051188244848, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6012475603804444, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.7990339788905771, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6660677740125452, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8293798371335214, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6012475603804444, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8102198011293434, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.5718247506430171, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.7570613392550647, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6241924127610678, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8031006153647919, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.6372502110149713, - "sentence_nr": 0 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.8187019874664503, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.5745954681260859, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8001969096241068, - "sentence_nr": 0 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.35059076445515835, - "sentence_nr": 0 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.40219803477483124, - "sentence_nr": 0 + "score": 0.7994721822064033, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.41316127706749806, - "sentence_nr": 0 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4430321339435623, - "sentence_nr": 0 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3993284843242707, - "sentence_nr": 0 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4224738565076288, - "sentence_nr": 0 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.2908087026261561, - "sentence_nr": 0 + "score": 0.38754077501151757, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3411361400094189, - "sentence_nr": 0 + "score": 0.598503332887995, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3572514590810421, - "sentence_nr": 0 + "score": 0.5124776602965491, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.40312319760122833, - "sentence_nr": 0 + "score": 0.7722874800637285, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.2996868226086902, - "sentence_nr": 0 + "score": 0.3291598889023262, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3355531727847081, - "sentence_nr": 0 + "score": 0.6085546680624175, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.4199243020508202, - "sentence_nr": 0 + "score": 0.6173766800527999, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4310330650643179, - "sentence_nr": 0 + "score": 0.857390040146912, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.38146085172952343, - "sentence_nr": 0 + "score": 0.6173766800527999, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.40030269579783606, - "sentence_nr": 0 + "score": 0.857390040146912, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.2613520653232399, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.292974388325607, - "sentence_nr": 0 + "score": 0.3470839302425112, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.38876512474558916, - "sentence_nr": 0 + "score": 0.4845766087853281, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.41342876789412997, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.36631135849378577, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3947683748805251, - "sentence_nr": 0 + "score": 0.7138566289355139, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.8780634320789833, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.926946700115022, - "sentence_nr": 0 + "score": 0.5512324461754572, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.7964573357809173, - "sentence_nr": 0 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8458636471716781, - "sentence_nr": 0 + "score": 0.7289444696770301, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 0 + "score": 0.3737098172408067, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "score": 0.6832201170000932, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.9452996322890763, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9463396364218181, - "sentence_nr": 0 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.6537803976048806, - "sentence_nr": 0 + "score": 0.22436571657855092, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.7742226743967544, - "sentence_nr": 0 + "score": 0.61166969974579, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.738238064391125, - "sentence_nr": 0 + "score": 0.22423870508323301, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8637738769684485, - "sentence_nr": 0 + "score": 0.6366515193698862, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.40673971192998765, - "sentence_nr": 0 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6897190926100627, - "sentence_nr": 0 + "score": 0.8215788698315908, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3707525915417785, - "sentence_nr": 0 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6481906761834414, - "sentence_nr": 0 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.4405434565828979, - "sentence_nr": 0 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6872423435487918, - "sentence_nr": 0 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.34070519401434163, - "sentence_nr": 0 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6376396416993303, - "sentence_nr": 0 + "score": 0.8215788698315908, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.35601247064914876, - "sentence_nr": 0 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6528728847159075, - "sentence_nr": 0 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3535276144718208, - "sentence_nr": 0 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6399338911163, - "sentence_nr": 0 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.29793763405666984, - "sentence_nr": 0 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5878658443031616, - "sentence_nr": 0 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2737856702715042, - "sentence_nr": 0 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6091441790112126, - "sentence_nr": 0 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2060740184460064, - "sentence_nr": 0 + "score": 0.251696695878184, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5566122985381202, - "sentence_nr": 0 + "score": 0.6180491939580447, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3764145740138264, - "sentence_nr": 0 + "score": 0.5967384019266717, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.660406350984819, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.3552824817180132, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6650963330720984, - "sentence_nr": 0 + "score": 0.8544348080833218, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.703373719677874, - "sentence_nr": 0 + "score": 0.2719326877457978, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7784050705257474, - "sentence_nr": 0 + "score": 0.6002086362682414, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.469958733898233, - "sentence_nr": 0 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5843756060033074, - "sentence_nr": 0 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.6034601376302852, - "sentence_nr": 0 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7074074363255227, - "sentence_nr": 0 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5200692650497809, - "sentence_nr": 0 + "score": 0.29170205300854224, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6586847274336591, - "sentence_nr": 0 + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5724622291345857, - "sentence_nr": 0 + "score": 0.29170205300854224, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6818279156433621, - "sentence_nr": 0 + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5439803529976158, - "sentence_nr": 0 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.657598922173703, - "sentence_nr": 0 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5447800851151646, - "sentence_nr": 0 + "score": 0.28592291256793106, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6845859707632784, - "sentence_nr": 0 + "score": 0.6102727682426059, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.5073374020380702, - "sentence_nr": 0 + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2774290545068997, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6713451965832894, - "sentence_nr": 0 + "score": 0.6397454944654261, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.63457045351243, - "sentence_nr": 0 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.760139991277541, - "sentence_nr": 0 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.2747017431249852, - "sentence_nr": 0 + "score": 0.2748202507307579, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4456826256200505, - "sentence_nr": 0 + "score": 0.5810363959809548, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.4834220366915352, - "sentence_nr": 0 + "score": 0.28571962561926445, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.678862671476654, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.5941142117182071, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6930129129388155, - "sentence_nr": 0 + "score": 0.693456244639743, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.3489926819498492, - "sentence_nr": 0 + "score": 0.17905278399134197, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5715668842319502, - "sentence_nr": 0 + "score": 0.37257295447029826, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2786169604662155, - "sentence_nr": 0 + "score": 0.15521606028436608, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5267252236203236, - "sentence_nr": 0 + "score": 0.37645329404497957, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.35446322216812387, - "sentence_nr": 0 + "score": 0.12620429887108936, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5208748527454148, - "sentence_nr": 0 + "score": 0.35580703793872603, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2679728611808951, - "sentence_nr": 0 + "score": 0.12872220631084524, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.466691372759197, - "sentence_nr": 0 + "score": 0.33602633953270183, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2676232320051144, - "sentence_nr": 0 + "score": 0.1582866049832572, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5440246804235981, - "sentence_nr": 0 + "score": 0.34487142413575794, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2328598163544389, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.46604753989124215, - "sentence_nr": 0 + "score": 0.03037224815656603, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.3249989390135794, - "sentence_nr": 0 + "score": 0.10203846572325131, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5532261012182782, - "sentence_nr": 0 + "score": 0.33381153680096753, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.04043358226234485, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.178130317890244, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.31400830186120793, - "sentence_nr": 0 + "score": 0.1685643537060726, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5436299115609682, - "sentence_nr": 0 + "score": 0.36926449644166065, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.180038135256147, - "sentence_nr": 0 + "score": 0.014935758919429663, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.42760668286140896, - "sentence_nr": 0 + "score": 0.08106107745254391, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2702404890575711, - "sentence_nr": 0 + "score": 0.044304867337633724, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.505948742808373, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.3127320650917403, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5820474024058695, - "sentence_nr": 0 + "score": 0.20806974344498103, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3410244689880313, - "sentence_nr": 0 + "score": 0.1418524086391329, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5816669416914216, - "sentence_nr": 0 + "score": 0.38295770773758747, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4234343012313773, - "sentence_nr": 0 + "score": 0.15268019045355535, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6625289905598352, - "sentence_nr": 0 + "score": 0.41028757620299977, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3885765192359091, - "sentence_nr": 0 + "score": 0.030860166165309233, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6554470157301392, - "sentence_nr": 0 + "score": 0.1100250143829584, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.38108864298853723, - "sentence_nr": 0 + "score": 0.21255327712152144, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6485553379227472, - "sentence_nr": 0 + "score": 0.43272151570555034, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3765213224289163, - "sentence_nr": 0 + "score": 0.08860973467526746, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6469521424555786, - "sentence_nr": 0 + "score": 0.3178004360288637, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.36247466608675993, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6011484151165629, - "sentence_nr": 0 + "score": 0.05918530850500025, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4079926989572759, - "sentence_nr": 0 + "score": 0.1438459189500836, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6592699047005666, - "sentence_nr": 0 + "score": 0.30693371625402605, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.30494536158123264, - "sentence_nr": 0 + "score": 0.0979038733644086, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5857538582551342, - "sentence_nr": 0 + "score": 0.30211704738953993, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.26075652499067425, - "sentence_nr": 0 + "score": 0.2288990188897003, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5605305670545515, - "sentence_nr": 0 + "score": 0.48933901443699584, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.13339786348528015, - "sentence_nr": 0 + "score": 0.009624974244068071, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.46778689835182324, - "sentence_nr": 0 + "score": 0.07318255686027669, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2602768294269028, - "sentence_nr": 0 + "score": 0.043420474648595074, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.5310567541651178, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.32577646359654405, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6034691061493307, - "sentence_nr": 0 + "score": 0.2884095690753619, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4324680011853555, - "sentence_nr": 0 + "score": 0.17382347640129553, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5877600878871951, - "sentence_nr": 0 + "score": 0.4061580777885601, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4493940083619696, - "sentence_nr": 0 + "score": 0.13868172938464635, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6230960824462234, - "sentence_nr": 0 + "score": 0.3094469764260441, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4116575552858724, - "sentence_nr": 0 + "score": 0.10361854845420869, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5665759692366567, - "sentence_nr": 0 + "score": 0.32774802711076473, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3533147318401534, - "sentence_nr": 0 + "score": 0.15186969315425305, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5972951640947346, - "sentence_nr": 0 + "score": 0.3458120002305796, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.44219732271776674, - "sentence_nr": 0 + "score": 0.19074380068002203, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6193429426274062, - "sentence_nr": 0 + "score": 0.40566585096277824, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.41852674506584964, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6035836275599532, - "sentence_nr": 0 + "score": 0.20031726728306523, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.433056028408153, - "sentence_nr": 0 + "score": 0.20485833586704885, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6068335862669254, - "sentence_nr": 0 + "score": 0.468735805943922, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.37696437834356655, - "sentence_nr": 0 + "score": 0.192481383169461, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5617832488367239, - "sentence_nr": 0 + "score": 0.3799051443349615, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.1810501938660849, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.4154005351684647, - "sentence_nr": 0 + "score": 0.01252735726099625, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.5018386916018573, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.6673891538739279, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.43320553917029947, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6050369991278077, - "sentence_nr": 0 + "score": 0.273148644463442, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6947677373756656, - "sentence_nr": 0 + "score": 0.09431297723472011, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7941300666655116, - "sentence_nr": 0 + "score": 0.3616856339096348, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6412098671661826, - "sentence_nr": 0 + "score": 0.11091252683001185, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7665040244283648, - "sentence_nr": 0 + "score": 0.26607634610445896, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6045639360711837, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7576570567798335, - "sentence_nr": 0 + "score": 0.189717083187238, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.5438238038060724, - "sentence_nr": 0 + "score": 0.10266747466754884, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7060850657954441, - "sentence_nr": 0 + "score": 0.3364703638684802, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6638859619095425, - "sentence_nr": 0 + "score": 0.22381487678101888, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7874224590682172, - "sentence_nr": 0 + "score": 0.5249370100068887, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6543739381048754, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7768522458527362, - "sentence_nr": 0 + "score": 0.28912109037408523, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.5886489119980793, - "sentence_nr": 0 + "score": 0.2144604484498437, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7611944709376643, - "sentence_nr": 0 + "score": 0.48894052224175993, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.5162974106233954, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.748545216109632, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6561309661336588, - "sentence_nr": 0 + "score": 0.21001173689943997, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7849652413082676, - "sentence_nr": 0 + "score": 0.423493931076046, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.24311976929452217, - "sentence_nr": 0 + "score": 0.006232910970143225, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5332455436874994, - "sentence_nr": 0 + "score": 0.06317168666869727, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.5821415139431849, - "sentence_nr": 0 + "score": 0.06938388878349923, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.7329539842616807, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.6665468808142623, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.8018370160729217, - "sentence_nr": 0 + "score": 0.3541078046399395, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3861375213265022, - "sentence_nr": 0 + "score": 0.14557808399334188, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5122109329134508, - "sentence_nr": 0 + "score": 0.36598346755702993, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.32539921259497445, - "sentence_nr": 0 + "score": 0.18154954789336694, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5133457276293165, - "sentence_nr": 0 + "score": 0.4557483776072868, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.375079512706724, - "sentence_nr": 0 + "score": 0.06897533888461813, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5371301483272257, - "sentence_nr": 0 + "score": 0.2776666563000344, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.48456463733283883, - "sentence_nr": 0 + "score": 0.1665765483402476, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5906105668854662, - "sentence_nr": 0 + "score": 0.4017968725013381, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4845227999608418, - "sentence_nr": 0 + "score": 0.10666682719585797, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5968050469845498, - "sentence_nr": 0 + "score": 0.33462901494141756, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4494703452336724, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5696298539086213, - "sentence_nr": 0 + "score": 0.04151505758906764, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4295014616287586, - "sentence_nr": 0 + "score": 0.12189363728567917, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5957510678657648, - "sentence_nr": 0 + "score": 0.37595660827287636, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.47727404239076743, - "sentence_nr": 0 + "score": 0.18926971577178767, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.6081867525552255, - "sentence_nr": 0 + "score": 0.4931453714148122, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.01656048993031311, - "sentence_nr": 0 + "score": 0.004663531624960091, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.11323797713183678, - "sentence_nr": 0 + "score": 0.07262533604330305, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4124136266900752, - "sentence_nr": 0 + "score": 0.037401300306846526, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.5714981155807188, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.461887670717865, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.594188645494074, - "sentence_nr": 0 + "score": 0.27395881217705964, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3880515884750121, - "sentence_nr": 1 + "score": 0.16780109158842918, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6587916715823183, - "sentence_nr": 1 + "score": 0.3968694014697679, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.5142726846179982, - "sentence_nr": 1 + "score": 0.1381751568911733, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.7344716263345912, - "sentence_nr": 1 + "score": 0.3121557499162649, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.6066498620510337, - "sentence_nr": 1 + "score": 0.1579497466001673, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.7812137754227463, - "sentence_nr": 1 + "score": 0.5092928545844059, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4342750764549485, - "sentence_nr": 1 + "score": 0.16286876096900815, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.7115011221714777, - "sentence_nr": 1 + "score": 0.3422914837190449, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.28822910320599077, - "sentence_nr": 1 + "score": 0.20198948917565754, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6087031937056202, - "sentence_nr": 1 + "score": 0.34858221035657466, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.5011893046413795, - "sentence_nr": 1 + "score": 0.06888992790640074, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.7089203664957927, - "sentence_nr": 1 + "score": 0.2874483621307283, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3406014428030703, - "sentence_nr": 1 + "score": 0.1631196072688366, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6514548680180557, - "sentence_nr": 1 + "score": 0.3502730667074754, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4738611152748619, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.7293997939434749, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4492327786840591, - "sentence_nr": 1 + "score": 0.21286836557101563, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6917786880624969, - "sentence_nr": 1 + "score": 0.45055232014427626, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.031126201157905466, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5105553787243322, - "sentence_nr": 1 + "score": 0.015970144454664378, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.44571331402556874, - "sentence_nr": 1 + "score": 0.06929847827527827, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.67235059873138, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.4241047637225085, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6844709246396142, - "sentence_nr": 1 + "score": 0.30185194035792856, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4487746167679644, - "sentence_nr": 1 + "score": 0.11697642623186386, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4476730201191672, - "sentence_nr": 1 + "score": 0.37117753637984835, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2836623400057614, - "sentence_nr": 1 + "score": 0.20065115069964384, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.29147337237183046, - "sentence_nr": 1 + "score": 0.4084885616013531, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2775905064108025, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.3165767280260291, - "sentence_nr": 1 + "score": 0.17621963873521423, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.28912432952036243, - "sentence_nr": 1 + "score": 0.09916146090364127, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.31119603942667584, - "sentence_nr": 1 + "score": 0.3121110160693956, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4094748015187699, - "sentence_nr": 1 + "score": 0.22669486951066523, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4288513205758089, - "sentence_nr": 1 + "score": 0.4484451941575473, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.35430370029300495, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.3864890531682498, - "sentence_nr": 1 + "score": 0.011560595536104562, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.37405604379521823, - "sentence_nr": 1 + "score": 0.07368089078790738, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.409758558051675, - "sentence_nr": 1 + "score": 0.41452613113710224, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4082186610925126, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4042514356445265, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3835611536417376, - "sentence_nr": 1 + "score": 0.17892846390928677, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.41360439536029553, - "sentence_nr": 1 + "score": 0.47088195615067674, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.19333361726926898, - "sentence_nr": 1 + "score": 0.012370537823050053, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.28056620588920506, - "sentence_nr": 1 + "score": 0.06660321132654005, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3541652369790141, - "sentence_nr": 1 + "score": 0.09831093939330879, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.38739546241623046, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.5554441727233942, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5410106254032345, - "sentence_nr": 1 + "score": 0.33203866499974327, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2113054108348111, - "sentence_nr": 1 + "score": 0.13805615693046389, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.44238229987470284, - "sentence_nr": 1 + "score": 0.40787998733941394, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.26207903587847736, - "sentence_nr": 1 + "score": 0.12291219097556666, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.50073123223194, - "sentence_nr": 1 + "score": 0.3448002180666873, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.16098073041469485, - "sentence_nr": 1 + "score": 0.17643078314788999, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.39710375075643284, - "sentence_nr": 1 + "score": 0.40757584786696294, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.11465623153412556, - "sentence_nr": 1 + "score": 0.1989414239237112, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4497512968651573, - "sentence_nr": 1 + "score": 0.3791567776918788, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.16950698451288215, - "sentence_nr": 1 + "score": 0.09916009482330297, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.48668984177868246, - "sentence_nr": 1 + "score": 0.3032928217006101, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.23516650478671175, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4885052730214997, - "sentence_nr": 1 + "score": 0.09453698369211004, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.23477037244978113, - "sentence_nr": 1 + "score": 0.1995980198896431, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5165217514090542, - "sentence_nr": 1 + "score": 0.4244503391142409, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.21585895003952446, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.48140875917864023, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2711981710401392, - "sentence_nr": 1 + "score": 0.18216362398065106, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5555651822168547, - "sentence_nr": 1 + "score": 0.36524832602306334, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.168777027092081, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4368481165562445, - "sentence_nr": 1 + "score": 0.08319287955437346, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.22415590998535484, - "sentence_nr": 1 + "score": 0.08383676689911676, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.49981791926190994, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.3231139066663432, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6056704743332197, - "sentence_nr": 1 + "score": 0.2855329690010324, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4556160153884204, - "sentence_nr": 1 + "score": 0.19148282873929853, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6661994452325181, - "sentence_nr": 1 + "score": 0.4707949702068854, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3216756020053242, - "sentence_nr": 1 + "score": 0.20608572305725564, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6141241026166391, - "sentence_nr": 1 + "score": 0.4704943905570542, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.43369048469848437, - "sentence_nr": 1 + "score": 0.08183353655679478, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6586872889176818, - "sentence_nr": 1 + "score": 0.25007633393249695, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4098419224543478, - "sentence_nr": 1 + "score": 0.0981642545874085, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6358736384460296, - "sentence_nr": 1 + "score": 0.31793222329793575, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4527112325797497, - "sentence_nr": 1 + "score": 0.09478705591775652, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6708989870027865, - "sentence_nr": 1 + "score": 0.33293232395887284, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.403282335120862, - "sentence_nr": 1 + "score": 0.11976209355757551, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6319223068216205, - "sentence_nr": 1 + "score": 0.27004759126600675, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.44234482870142466, - "sentence_nr": 1 + "score": 0.09142555538569784, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6617260327319175, - "sentence_nr": 1 + "score": 0.31371707771405133, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3870043562676652, - "sentence_nr": 1 + "score": 0.05438497632520132, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.631536050216449, - "sentence_nr": 1 + "score": 0.26123506271154656, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.465541200947692, - "sentence_nr": 1 + "score": 0.1490232164900303, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6735988737803571, - "sentence_nr": 1 + "score": 0.43745835724045856, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.37462930793644134, - "sentence_nr": 1 + "score": 0.005606294971348417, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6297969107438809, - "sentence_nr": 1 + "score": 0.06662245090541388, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.45236333724230443, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.6557435747309683, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.4197376433963966, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6581729857740523, - "sentence_nr": 1 + "score": 0.12752236829255797, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.37570809340937233, - "sentence_nr": 1 + "score": 0.12157241570357182, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6339141734561076, - "sentence_nr": 1 + "score": 0.4080990097991491, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.559332422592187, - "sentence_nr": 1 + "score": 0.085416483900781, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.733291190094771, - "sentence_nr": 1 + "score": 0.2825804066750608, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.48457382450313924, - "sentence_nr": 1 + "score": 0.10415298161056984, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.7144409873446065, - "sentence_nr": 1 + "score": 0.33452632923050557, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.4881942815467274, - "sentence_nr": 1 + "score": 0.21204239268527586, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6662053431593723, - "sentence_nr": 1 + "score": 0.3846197304420823, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.3977038258772401, - "sentence_nr": 1 + "score": 0.14057105892389254, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6202897864314184, - "sentence_nr": 1 + "score": 0.3028381427383384, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.5024073848733999, - "sentence_nr": 1 + "score": 0.004763623056487517, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6805608953669952, - "sentence_nr": 1 + "score": 0.07485928007606017, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.4198435178617755, - "sentence_nr": 1 + "score": 0.11689600237805012, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6615330486958915, - "sentence_nr": 1 + "score": 0.38258301195690664, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.4423392581565186, - "sentence_nr": 1 + "score": 0.19809535837880818, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.663370348519268, - "sentence_nr": 1 + "score": 0.447539350421338, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.42298863290550076, - "sentence_nr": 1 + "score": 0.004718557257042585, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6260112466527037, - "sentence_nr": 1 + "score": 0.08019304349523304, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.422714343026006, - "sentence_nr": 1 + "score": 0.05614653993259943, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.6266965858252854, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.3887113653056583, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6222111159250625, - "sentence_nr": 1 + "score": 0.26485323792360876, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.3446592076818278, - "sentence_nr": 1 + "score": 0.16431887969160053, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5819912583909785, - "sentence_nr": 1 + "score": 0.4088971379214799, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.23270938096152352, - "sentence_nr": 1 + "score": 0.11452508920842025, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4490269267329941, - "sentence_nr": 1 + "score": 0.3212742401272785, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.08218359452575877, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.11634468327243708, - "sentence_nr": 1 + "score": 0.23905391762860753, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.1766119944524977, - "sentence_nr": 1 + "score": 0.17673835621668263, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3986479587107995, - "sentence_nr": 1 + "score": 0.3902085179927465, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.3167585643537871, - "sentence_nr": 1 + "score": 0.13635319583999642, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5076869840147092, - "sentence_nr": 1 + "score": 0.2850432830231861, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.051272222858601425, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3292454551002283, - "sentence_nr": 1 + "score": 0.21925629669878902, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.25751023494151143, - "sentence_nr": 1 + "score": 0.20362195873137665, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4697665795408892, - "sentence_nr": 1 + "score": 0.4504603915919526, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.3212983212315964, - "sentence_nr": 1 + "score": 0.20927351091825444, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5599573621112933, - "sentence_nr": 1 + "score": 0.41232284529686536, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.006488743008712295, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.2560040742784669, - "sentence_nr": 1 + "score": 0.056679733231823716, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.09236883467211593, - "sentence_nr": 1 + "score": 0.04209313835422283, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.3472719365557752, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.392653200684027, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6103547064240303, - "sentence_nr": 1 + "score": 0.26913406771501547, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.529527758323629, - "sentence_nr": 1 + "score": 0.15184278721506198, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6540432510655854, - "sentence_nr": 1 + "score": 0.4093399937921707, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.49704232910799745, - "sentence_nr": 1 + "score": 0.15478222669012726, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6453248294274054, - "sentence_nr": 1 + "score": 0.3550584759508654, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.49704232910799745, - "sentence_nr": 1 + "score": 0.06244445123318812, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6453248294274054, - "sentence_nr": 1 + "score": 0.28239834932587327, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.5494410974163585, - "sentence_nr": 1 + "score": 0.15685632649880807, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6853937472090788, - "sentence_nr": 1 + "score": 0.34378295878971765, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.44114781827798216, - "sentence_nr": 1 + "score": 0.06070088845782673, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6241365710582877, - "sentence_nr": 1 + "score": 0.2584364364927186, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.4286794450695727, - "sentence_nr": 1 + "score": 0.06467646497347093, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6365023289177463, - "sentence_nr": 1 + "score": 0.2374647159547877, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.4744991305294048, - "sentence_nr": 1 + "score": 0.05675489168243481, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6720481841701565, - "sentence_nr": 1 + "score": 0.3098329822024127, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.4946489712934811, - "sentence_nr": 1 + "score": 0.14459834065375157, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6635756951391838, - "sentence_nr": 1 + "score": 0.4652483976219767, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.32345422777393923, - "sentence_nr": 1 + "score": 0.056417721736162135, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5652905380017423, - "sentence_nr": 1 + "score": 0.14707146406788849, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.473424955479643, - "sentence_nr": 1 + "score": 0.050577564370191244, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.6791725069180572, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.4946489712934811, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6642621312047408, - "sentence_nr": 1 + "score": 0.26455598459911367, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.3479698393875884, - "sentence_nr": 1 + "score": 0.16758563722627876, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5760833125751785, - "sentence_nr": 1 + "score": 0.4598125962895632, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.24373253714463095, - "sentence_nr": 1 + "score": 0.07875433150726119, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.49482039214573803, - "sentence_nr": 1 + "score": 0.2638954513805452, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.312050635062637, - "sentence_nr": 1 + "score": 0.12334630141873701, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5390444512132623, - "sentence_nr": 1 + "score": 0.3570869171580578, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.24229889794871173, - "sentence_nr": 1 + "score": 0.19153195331287226, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4853505495636382, - "sentence_nr": 1 + "score": 0.4035796398628449, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.3542266508664836, - "sentence_nr": 1 + "score": 0.1054433514098504, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5643413028542406, - "sentence_nr": 1 + "score": 0.2840946641780818, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.18282456123768265, - "sentence_nr": 1 + "score": 0.005649824351905227, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.47540661243586124, - "sentence_nr": 1 + "score": 0.09384599631616997, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.2921982022041547, - "sentence_nr": 1 + "score": 0.1738582449442553, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5264166199754001, - "sentence_nr": 1 + "score": 0.40161714405254456, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.3142825719425009, - "sentence_nr": 1 + "score": 0.16120676251405475, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.584353897647861, - "sentence_nr": 1 + "score": 0.3934823211441987, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.006102253115653432, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.08614490649176082, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.17473028966988555, - "sentence_nr": 1 + "score": 0.05052791122570277, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.400425072418037, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.2902817248447081, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5778883542136447, - "sentence_nr": 1 + "score": 0.25244788085139286, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2988697040013311, - "sentence_nr": 1 + "score": 0.18041700926694673, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5442522660489195, - "sentence_nr": 1 + "score": 0.43852448917973136, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.500703635659656, - "sentence_nr": 1 + "score": 0.10734088848154077, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6501904887399698, - "sentence_nr": 1 + "score": 0.33946796348247366, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.4876463179677598, - "sentence_nr": 1 + "score": 0.12499287263993265, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6113405963585182, - "sentence_nr": 1 + "score": 0.3031531068573407, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.5199813503697857, - "sentence_nr": 1 + "score": 0.14318317227039934, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6584629522606407, - "sentence_nr": 1 + "score": 0.356756117753337, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3995439803178399, - "sentence_nr": 1 + "score": 0.12768613576122964, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6021193793256325, - "sentence_nr": 1 + "score": 0.3279857505284436, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.40656183899584336, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5890799945028116, - "sentence_nr": 1 + "score": 0.05915285533036862, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.528547004876945, - "sentence_nr": 1 + "score": 0.15720527174368754, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6459593469343872, - "sentence_nr": 1 + "score": 0.4715103005986015, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.45002572171222577, - "sentence_nr": 1 + "score": 0.1853793533058344, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.620458099259989, - "sentence_nr": 1 + "score": 0.47839321418703307, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.27875207406965286, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5095968928696253, - "sentence_nr": 1 + "score": 0.21349841283886073, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.5576102993622991, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.6640761861237344, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.512463054128702, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6527479377010996, - "sentence_nr": 1 + "score": 0.27204846616025496, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.43330223254789785, - "sentence_nr": 1 + "score": 0.15197436941722972, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5564499529933307, - "sentence_nr": 1 + "score": 0.37271000364127155, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.1466607445607986, - "sentence_nr": 1 + "score": 0.17795920517030017, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.36552963821230766, - "sentence_nr": 1 + "score": 0.41862955401967455, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.20527494029659898, - "sentence_nr": 1 + "score": 0.17060644184287996, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.43586475049009993, - "sentence_nr": 1 + "score": 0.4054584763100862, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.3047577636054668, - "sentence_nr": 1 + "score": 0.17670199390439656, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.48318512703629857, - "sentence_nr": 1 + "score": 0.36682227371085463, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.2993081268625724, - "sentence_nr": 1 + "score": 0.16136987880724096, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.47777429598730525, - "sentence_nr": 1 + "score": 0.33626920748765377, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12340057804403023, - "sentence_nr": 1 + "score": 0.05468777721214362, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3331532512757645, - "sentence_nr": 1 + "score": 0.2495519218392036, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.3196191720459511, - "sentence_nr": 1 + "score": 0.2195372587354865, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4758634857690128, - "sentence_nr": 1 + "score": 0.3664303672465512, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.06692436199443168, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.23947877713211682, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.3095674062940522, - "sentence_nr": 1 + "score": 0.17524367912943578, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.49847201920427264, - "sentence_nr": 1 + "score": 0.3908643084796051, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.018807992767181335, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.08748671768279999, - "sentence_nr": 1 + "score": 0.0887797545718027, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.01250047619586174, - "sentence_nr": 1 + "score": 0.06437840881729344, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.12383271014582256, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.38609988647757243, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5480418778026874, - "sentence_nr": 1 + "score": 0.26576141148273813, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7017829861193574, - "sentence_nr": 2 + "score": 0.598931508663349, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7743327021667388, - "sentence_nr": 2 + "score": 0.7353063745802827, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6961795371760597, - "sentence_nr": 2 + "score": 0.30677064886592076, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7859480663394858, - "sentence_nr": 2 + "score": 0.5308555945242818, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5643442092080923, - "sentence_nr": 2 + "score": 0.1327526847508867, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7005543453411931, - "sentence_nr": 2 + "score": 0.37850602486495205, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5432312750246535, - "sentence_nr": 2 + "score": 0.18405035438430847, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6677259864784132, - "sentence_nr": 2 + "score": 0.4142901090120915, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6837528314895732, - "sentence_nr": 2 + "score": 0.13410301071131794, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7968789890147058, - "sentence_nr": 2 + "score": 0.3942932268034351, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4391684160269219, - "sentence_nr": 2 + "score": 0.24239458593560292, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6371098202414471, - "sentence_nr": 2 + "score": 0.32069132319909655, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7555875294328935, - "sentence_nr": 2 + "score": 0.22478613858269392, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8049022687045564, - "sentence_nr": 2 + "score": 0.44348101018104913, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6443411340522405, - "sentence_nr": 2 + "score": 0.4026159305424288, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7335999563315522, - "sentence_nr": 2 + "score": 0.5712560131047175, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2285369650225378, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4750387664265888, - "sentence_nr": 2 + "score": 0.183687049781416, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5975003598259766, - "sentence_nr": 2 + "score": 0.351911486970854, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.7683913390959731, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.7108527311307847, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8209448175222175, - "sentence_nr": 2 + "score": 0.5181825846579515, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14118350058219528, - "sentence_nr": 2 + "score": 0.4262221594184117, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20431837779877604, - "sentence_nr": 2 + "score": 0.5886657414856064, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1811004938014804, - "sentence_nr": 2 + "score": 0.286608441075188, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2649993136544717, - "sentence_nr": 2 + "score": 0.4579283646292802, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14089011087858522, - "sentence_nr": 2 + "score": 0.25861130592298187, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.21944603811527294, - "sentence_nr": 2 + "score": 0.39452644092432093, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.20379250618355427, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.12501819027374758, - "sentence_nr": 2 + "score": 0.41085414309816914, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2360941227140328, - "sentence_nr": 2 + "score": 0.17328174803055044, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35939098278145853, - "sentence_nr": 2 + "score": 0.3178268797869574, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1273192735797341, - "sentence_nr": 2 + "score": 0.2990226215771518, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.22231961416584312, - "sentence_nr": 2 + "score": 0.4323734152924571, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3360376952328008, - "sentence_nr": 2 + "score": 0.10434360980785336, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35297640449956286, - "sentence_nr": 2 + "score": 0.3012789660952507, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13680836462007476, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.24537888283181183, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21687218788036394, - "sentence_nr": 2 + "score": 0.17248715680799764, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35659125027777805, - "sentence_nr": 2 + "score": 0.40043565243219187, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1017839169529136, - "sentence_nr": 2 + "score": 0.13835317113453516, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14549060082020032, - "sentence_nr": 2 + "score": 0.16343842313572918, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.22054620758680943, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.2085590894856562, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3200949564949597, - "sentence_nr": 2 + "score": 0.3986641525285075, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20721924345714232, - "sentence_nr": 2 + "score": 0.43186481103649477, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36475932190367044, - "sentence_nr": 2 + "score": 0.5792139686527714, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11386607947762988, - "sentence_nr": 2 + "score": 0.30890092021323623, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.33564583347921473, - "sentence_nr": 2 + "score": 0.5553909583113487, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.16862356321891248, - "sentence_nr": 2 + "score": 0.30890092021323623, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3637462812267946, - "sentence_nr": 2 + "score": 0.5553909583113487, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.25798723088167685, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1485432117087218, - "sentence_nr": 2 + "score": 0.5244854229988815, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23649053182388327, - "sentence_nr": 2 + "score": 0.3315037521841549, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4127382174759535, - "sentence_nr": 2 + "score": 0.468197879470805, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2392792151449317, - "sentence_nr": 2 + "score": 0.460474309246715, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.388678103641788, - "sentence_nr": 2 + "score": 0.6213537794704693, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34101364633474157, - "sentence_nr": 2 + "score": 0.30890092021323623, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5758572581135913, - "sentence_nr": 2 + "score": 0.5676965183365866, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2026639468552004, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4471011187469559, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11622323415479685, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.10826694406224016, - "sentence_nr": 2 + "score": 0.003172770121174655, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.180048782148418, - "sentence_nr": 2 + "score": 0.43186481103649477, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.3772586334343914, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.28939737284723716, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44550999966826343, - "sentence_nr": 2 + "score": 0.5834549494301647, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46442643702863534, - "sentence_nr": 2 + "score": 0.46092611919700416, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5519480629125156, - "sentence_nr": 2 + "score": 0.6365915338629015, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6268941789647348, - "sentence_nr": 2 + "score": 0.1690979933029136, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6958291103494518, - "sentence_nr": 2 + "score": 0.3751861276375209, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4554740717077828, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5498766350188072, - "sentence_nr": 2 + "score": 0.3344305108778801, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35817810808590844, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5012707040525209, - "sentence_nr": 2 + "score": 0.37017501464955627, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.570135897056151, - "sentence_nr": 2 + "score": 0.22218130727359342, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6801332690579707, - "sentence_nr": 2 + "score": 0.39929356245904674, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.349335635815966, - "sentence_nr": 2 + "score": 0.3194331635465395, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4827709277987172, - "sentence_nr": 2 + "score": 0.5031092445628172, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4692880637764782, - "sentence_nr": 2 + "score": 0.334422418242443, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5773610754678101, - "sentence_nr": 2 + "score": 0.47577086062579566, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34182319563232233, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5052410644804232, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5796814083647206, - "sentence_nr": 2 + "score": 0.4482907809719588, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6364369549208913, - "sentence_nr": 2 + "score": 0.5498272118133005, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2985280444159845, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5072627289039213, - "sentence_nr": 2 + "score": 0.02467424260792568, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49402195020645817, - "sentence_nr": 2 + "score": 0.21902340561392236, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.583821485566765, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.6297960258710876, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7117676662366008, - "sentence_nr": 2 + "score": 0.43906671679239717, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22837680015088951, - "sentence_nr": 2 + "score": 0.5234484809182233, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44164180234500505, - "sentence_nr": 2 + "score": 0.6658297773613274, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4151474543103342, - "sentence_nr": 2 + "score": 0.2840563956846642, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.638952468710771, - "sentence_nr": 2 + "score": 0.5110250591004448, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4355097603079957, - "sentence_nr": 2 + "score": 0.30007504691018483, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6511365998081735, - "sentence_nr": 2 + "score": 0.5153810823423555, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2468185992183292, - "sentence_nr": 2 + "score": 0.14074957769288798, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46792167630295967, - "sentence_nr": 2 + "score": 0.3750035199199742, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21305368975019265, - "sentence_nr": 2 + "score": 0.26538706048179084, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4371748197696026, - "sentence_nr": 2 + "score": 0.4982627378595717, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.08919951949408464, - "sentence_nr": 2 + "score": 0.232738415750697, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2986174009048306, - "sentence_nr": 2 + "score": 0.49618971681248764, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.38791552573256816, - "sentence_nr": 2 + "score": 0.3488611533620711, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5723637874192081, - "sentence_nr": 2 + "score": 0.5550499651473632, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4305675865000082, - "sentence_nr": 2 + "score": 0.31883477089875656, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6312508299648723, - "sentence_nr": 2 + "score": 0.5510450101159524, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.011973456545827533, - "sentence_nr": 2 + "score": 0.038236956722392024, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3002149853465536, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.5378189160780977, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.42866719142206977, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6045654191304047, - "sentence_nr": 2 + "score": 0.31998097041178836, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11634129390828839, - "sentence_nr": 2 + "score": 0.3815250264738168, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.31530902302000635, - "sentence_nr": 2 + "score": 0.6516314751979607, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19544795798162903, - "sentence_nr": 2 + "score": 0.19920413481788912, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3835451743665027, - "sentence_nr": 2 + "score": 0.42537796926163113, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19889333501994313, - "sentence_nr": 2 + "score": 0.189902924205034, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3885583772632557, - "sentence_nr": 2 + "score": 0.4072184389907138, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.18710260593933364, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.24480102898506534, - "sentence_nr": 2 + "score": 0.44334313717706003, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20876900081884944, - "sentence_nr": 2 + "score": 0.15161074985415177, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3981381071356935, - "sentence_nr": 2 + "score": 0.3796830006266126, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.07757069009917116, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1582263258709324, - "sentence_nr": 2 + "score": 0.3253161209971999, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3530704022752377, - "sentence_nr": 2 + "score": 0.22168992033645996, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.521530381948501, - "sentence_nr": 2 + "score": 0.523689661176845, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21574854574751035, - "sentence_nr": 2 + "score": 0.2722704374402053, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41940323708656974, - "sentence_nr": 2 + "score": 0.596004654894533, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.058854097785805734, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.17240019222052141, - "sentence_nr": 2 + "score": 0.01008902035184167, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.10279947040838337, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.23425891587078498, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.3268258845598709, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4863358380144881, - "sentence_nr": 2 + "score": 0.3569840483632983, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.359355103997122, - "sentence_nr": 2 + "score": 0.40003810431098236, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5589602235417395, - "sentence_nr": 2 + "score": 0.5899097408105687, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4267520229161, - "sentence_nr": 2 + "score": 0.20401796878756984, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5518115366540288, - "sentence_nr": 2 + "score": 0.43317630453631556, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4248870612387681, - "sentence_nr": 2 + "score": 0.2097387761551816, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5641041633033193, - "sentence_nr": 2 + "score": 0.49663301508497226, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.40429429626811253, - "sentence_nr": 2 + "score": 0.27067168022307464, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.575799986766, - "sentence_nr": 2 + "score": 0.5439625482235064, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37917766663411384, - "sentence_nr": 2 + "score": 0.18679710353734788, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5365794450039074, - "sentence_nr": 2 + "score": 0.3876457319870774, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23329145933277767, - "sentence_nr": 2 + "score": 0.2826204057042236, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44291475401588093, - "sentence_nr": 2 + "score": 0.5043062352893725, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30626379803308257, - "sentence_nr": 2 + "score": 0.29588994069727786, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5077543267123376, - "sentence_nr": 2 + "score": 0.5527117669081858, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.39963516628793516, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5372822043426468, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.38861707449775285, - "sentence_nr": 2 + "score": 0.46732353406180216, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5432656354167995, - "sentence_nr": 2 + "score": 0.6059276585345114, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.17706333085447226, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4047932836379997, - "sentence_nr": 2 + "score": 0.06266083709457643, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30904104300309865, - "sentence_nr": 2 + "score": 0.36565527196849945, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.48677056338263186, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.5565087025816967, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7009254382359046, - "sentence_nr": 2 + "score": 0.4882803186347697, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35551034193127495, - "sentence_nr": 2 + "score": 0.38047531731529327, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5627284645723449, - "sentence_nr": 2 + "score": 0.49485723102957346, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5039752490702457, - "sentence_nr": 2 + "score": 0.2044887070217883, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.613669501327356, - "sentence_nr": 2 + "score": 0.38471585132587544, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.36932295883897953, - "sentence_nr": 2 + "score": 0.2309552734743087, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5524455184773474, - "sentence_nr": 2 + "score": 0.4672309378181727, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.080331199191236, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.23021641289829473, - "sentence_nr": 2 + "score": 0.29886658673327365, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3221305290185444, - "sentence_nr": 2 + "score": 0.21812881407613688, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4866081657424789, - "sentence_nr": 2 + "score": 0.3598346059855135, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.10563809356628297, - "sentence_nr": 2 + "score": 0.1367498402979849, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2323275601638909, - "sentence_nr": 2 + "score": 0.3943841419148219, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37818447598700816, - "sentence_nr": 2 + "score": 0.5676006714726635, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5516941276443429, - "sentence_nr": 2 + "score": 0.6880701448812352, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34591973979258805, - "sentence_nr": 2 + "score": 0.2974074484950165, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5197016245837053, - "sentence_nr": 2 + "score": 0.5121581247515657, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.10020997712284248, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.16327778043310373, - "sentence_nr": 2 + "score": 0.010162846529607748, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24470192769722524, - "sentence_nr": 2 + "score": 0.21947959999379651, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.4583472827584427, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.4247248638956501, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5970793788386907, - "sentence_nr": 2 + "score": 0.3226457008913864, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5088535943352446, - "sentence_nr": 2 + "score": 0.4489235959690452, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.625202596789752, - "sentence_nr": 2 + "score": 0.5934678825154104, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.562048819850726, - "sentence_nr": 2 + "score": 0.2980504190448601, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7192054483864224, - "sentence_nr": 2 + "score": 0.5101268920225042, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5550041554031738, - "sentence_nr": 2 + "score": 0.28800869328515505, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6554946147279708, - "sentence_nr": 2 + "score": 0.49348678623542436, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4406896260480816, - "sentence_nr": 2 + "score": 0.2429163097293302, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.571328063702761, - "sentence_nr": 2 + "score": 0.5044329486461447, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6260375038358343, - "sentence_nr": 2 + "score": 0.24586918158076287, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7803415401430737, - "sentence_nr": 2 + "score": 0.4658595745396681, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2961648173595504, - "sentence_nr": 2 + "score": 0.23073085454808062, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5022745285039809, - "sentence_nr": 2 + "score": 0.44142087654422146, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37446819995007063, - "sentence_nr": 2 + "score": 0.4406612884550454, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5965995710194948, - "sentence_nr": 2 + "score": 0.5676112112992767, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.41110950985436373, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6710923400142267, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.47237086893932345, - "sentence_nr": 2 + "score": 0.4476950425126913, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6521003933528818, - "sentence_nr": 2 + "score": 0.5932980209045412, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.31867018346252723, - "sentence_nr": 2 + "score": 0.1483315516064897, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5261433842307197, - "sentence_nr": 2 + "score": 0.25249051585915977, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.709255033821849, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.5512181178347816, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7607059998582948, - "sentence_nr": 2 + "score": 0.38558450790399557, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3941975148525721, - "sentence_nr": 2 + "score": 0.46832763312452297, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5191046479503385, - "sentence_nr": 2 + "score": 0.8176110134774669, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.262633940062176, - "sentence_nr": 2 + "score": 0.9436043261706615, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41923206553744197, - "sentence_nr": 2 + "score": 0.9880191679951993, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3850172427136058, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5264633431241114, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.15363234192450648, - "sentence_nr": 2 + "score": 0.0067104198717751464, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3146726146646545, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4709531555683, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.9025232868361638, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.13978782442553714, - "sentence_nr": 2 + "score": 0.9169897590736298, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3223419048219805, - "sentence_nr": 2 + "score": 0.9709835434146469, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5205977846006183, - "sentence_nr": 2 + "score": 0.9951728990866464, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1764046491640527, - "sentence_nr": 2 + "score": 0.9154051169199643, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3464061249457313, - "sentence_nr": 2 + "score": 0.9757471794927451, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.8935248372106969, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.18856799944599728, - "sentence_nr": 2 + "score": 0.9404428602061264, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20466701735848536, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.3025868321081519, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.27417618121875437, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.42108960466757744, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.3642482472579296, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.5342538783335161, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.7505336182671021, - "sentence_nr": 3 + "score": 0.18831933500600306, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.8401910628269498, - "sentence_nr": 3 + "score": 0.4318025704181776, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.4439623527529193, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.9289416300153619, - "sentence_nr": 3 + "score": 0.5309137918519957, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.4450050658086207, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.7558874882119336, - "sentence_nr": 3 + "score": 0.2628849077177109, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.8107492451395732, - "sentence_nr": 3 + "score": 0.21039673882735752, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.900032747778274, - "sentence_nr": 3 + "score": 0.3872019296036794, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.17879309995151985, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.2568045428196672, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.6099084961389527, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.658015760514539, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.7406375008540003, - "sentence_nr": 3 + "score": 0.2464380578618272, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.9160988509714175, - "sentence_nr": 3 + "score": 0.4278399263644655, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3132252321342574, - "sentence_nr": 3 + "score": 0.1879604201975219, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.35025412310639736, - "sentence_nr": 3 + "score": 0.11956615218925931, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.6825372617659788, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.3305337714496588, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.13453927150397377, - "sentence_nr": 3 + "score": 0.28406136898728457, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.10522974272748564, - "sentence_nr": 3 + "score": 0.5649283064490618, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.22055493694673897, - "sentence_nr": 3 + "score": 0.21544027588567594, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3931965048763613, - "sentence_nr": 3 + "score": 0.5040038440508637, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.13755274871304535, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.10397715306705207, - "sentence_nr": 3 + "score": 0.28552127890094825, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.13899941210887606, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.10947303419437356, - "sentence_nr": 3 + "score": 0.16448947606185552, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.13725861056573663, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, - "sentence_nr": 3 + "score": 0.3395693620772222, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.1350501875730652, - "sentence_nr": 3 + "score": 0.11823053204772466, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, - "sentence_nr": 3 + "score": 0.24615921057796505, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3491726680217181, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.09408024740752835, - "sentence_nr": 3 + "score": 0.4946434087697324, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.054674609450212665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.22916123454514536, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.09895358918308976, - "sentence_nr": 3 + "score": 0.4192305796685782, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.03586767012087445, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.11538184104597694, - "sentence_nr": 3 + "score": 0.10393938326032184, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.09204268041910899, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.1350501875730652, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, - "sentence_nr": 3 + "score": 0.33864898055191395, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.6885326214539055, - "sentence_nr": 3 + "score": 0.23843418577408987, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8229812189228393, - "sentence_nr": 3 + "score": 0.4082320855803597, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.26970223719007375, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.5172978597562362, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.7267072830982378, - "sentence_nr": 3 + "score": 0.2372622545962587, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8396959977515368, - "sentence_nr": 3 + "score": 0.49004864454711367, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.6374950652411382, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6643984252563968, - "sentence_nr": 3 + "score": 0.190140357671548, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.22556860731509948, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.4747086049005634, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.5821747317554493, - "sentence_nr": 3 + "score": 0.23647235972003527, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.7539119883011114, - "sentence_nr": 3 + "score": 0.41650969469918997, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.6885326214539055, - "sentence_nr": 3 + "score": 0.42662911848025076, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8328652216139806, - "sentence_nr": 3 + "score": 0.5800596652250789, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.39420326688847324, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.6371076304605184, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.251696695878184, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5710821658681214, - "sentence_nr": 3 + "score": 0.030501743754356173, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.7498810286408993, - "sentence_nr": 3 + "score": 0.19352792845274666, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.7886148242134857, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.760856626273165, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8852144067617798, - "sentence_nr": 3 + "score": 0.3925864519770825, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3480442076026084, - "sentence_nr": 3 + "score": 0.49458876622696707, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6142483232997242, - "sentence_nr": 3 + "score": 0.6179893617801274, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2861853478258715, - "sentence_nr": 3 + "score": 0.30630098078522544, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6401604432917332, - "sentence_nr": 3 + "score": 0.5439056051092116, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2861853478258715, - "sentence_nr": 3 + "score": 0.3059872016765634, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6401604432917332, - "sentence_nr": 3 + "score": 0.5499457869553984, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2852636439147137, - "sentence_nr": 3 + "score": 0.20076347441707354, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6732018003142922, - "sentence_nr": 3 + "score": 0.49625515445592083, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.412295470431275, - "sentence_nr": 3 + "score": 0.25449674462950855, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.705800771033924, - "sentence_nr": 3 + "score": 0.44805409822643144, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2104783778565715, - "sentence_nr": 3 + "score": 0.18665948437666813, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6117499551501043, - "sentence_nr": 3 + "score": 0.4437597552815582, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.412295470431275, - "sentence_nr": 3 + "score": 0.48670274592792, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.705800771033924, - "sentence_nr": 3 + "score": 0.6717971788322309, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.18814785746917081, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5692328972915052, - "sentence_nr": 3 + "score": 0.08002354055277362, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.417372155782838, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.589811312024197, - "sentence_nr": 3 + "score": 0.5043502592801646, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.31072931460421827, - "sentence_nr": 3 + "score": 0.06680433144407034, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2919280798407827, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.4306285422638574, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.412295470431275, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6879589052239306, - "sentence_nr": 3 + "score": 0.4076170046499833, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.17401517708317762, - "sentence_nr": 3 + "score": 0.41520313827696, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.45006261596496794, - "sentence_nr": 3 + "score": 0.6485212540886613, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.151240443751577, - "sentence_nr": 3 + "score": 0.19850842371858787, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4224869587588239, - "sentence_nr": 3 + "score": 0.43584341835040474, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.151240443751577, - "sentence_nr": 3 + "score": 0.284161309400485, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4224869587588239, - "sentence_nr": 3 + "score": 0.5096201523229312, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1134451991138546, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.26860011657329247, - "sentence_nr": 3 + "score": 0.4186167762559285, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.14172292406325543, - "sentence_nr": 3 + "score": 0.3312570339636223, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4762857001428092, - "sentence_nr": 3 + "score": 0.45442661484375735, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.24088562704853508, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4800955244005148, - "sentence_nr": 3 + "score": 0.3796021685415706, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.16234676720992364, - "sentence_nr": 3 + "score": 0.43874832905672956, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.48137970077362496, - "sentence_nr": 3 + "score": 0.5882858748700781, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.26652403565303173, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.16853790965501372, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5242065098084487, - "sentence_nr": 3 + "score": 0.3270207865532903, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.07810235385630719, - "sentence_nr": 3 + "score": 0.006769280526888359, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.10401577613691954, - "sentence_nr": 3 + "score": 0.35465713644381464, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.32252336426814965, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.15094813209726435, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.41808466373264913, - "sentence_nr": 3 + "score": 0.4911561718424494, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, - "sentence_nr": 3 + "score": 0.18559542135951204, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3614856639698008, - "sentence_nr": 3 + "score": 0.3804842882867387, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2255489037266197, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3954925749722234, - "sentence_nr": 3 + "score": 0.35369375385786006, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2927057121559396, - "sentence_nr": 3 + "score": 0.13087682931309413, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4330945753016968, - "sentence_nr": 3 + "score": 0.19462952976787054, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.18038302998635977, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.320678468026793, - "sentence_nr": 3 + "score": 0.013538497707846785, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.21850594525107195, - "sentence_nr": 3 + "score": 0.20972571494011877, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4049269026117245, - "sentence_nr": 3 + "score": 0.395894071208527, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.21233470585998818, - "sentence_nr": 3 + "score": 0.16678872216161894, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3630016390465325, - "sentence_nr": 3 + "score": 0.38156158663679846, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.22325877055095214, - "sentence_nr": 3 + "score": 0.21940429389247643, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.38760873730223866, - "sentence_nr": 3 + "score": 0.4343280866601455, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2741229265391949, - "sentence_nr": 3 + "score": 0.3300025916068812, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3721657350281369, - "sentence_nr": 3 + "score": 0.5052501972629104, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 3 + "score": 0.1824401863423467, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.33805023952655533, - "sentence_nr": 3 + "score": 0.36709433185688595, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3377854698776805, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.23398197530631124, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.272143800067929, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4262772266504184, - "sentence_nr": 3 + "score": 0.521201229892482, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2929807168354841, - "sentence_nr": 3 + "score": 0.15122189206102096, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5975595069845072, - "sentence_nr": 3 + "score": 0.26750110507308866, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6689604664235209, - "sentence_nr": 3 + "score": 0.10759927692349745, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8010329764520807, - "sentence_nr": 3 + "score": 0.21065794536310511, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6689604664235209, - "sentence_nr": 3 + "score": 0.07843772989359644, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8010329764520807, - "sentence_nr": 3 + "score": 0.1324578891826276, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.14908960803395838, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4761746966391582, - "sentence_nr": 3 + "score": 0.08163977068875294, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.12475846123062707, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.2466674257522263, - "sentence_nr": 3 + "score": 0.27823340731817514, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.5677534942306638, - "sentence_nr": 3 + "score": 0.14134641571854575, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.684329671666446, - "sentence_nr": 3 + "score": 0.22948919855739472, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6666935927206881, - "sentence_nr": 3 + "score": 0.1475503033983142, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7886059879769752, - "sentence_nr": 3 + "score": 0.22104108935973044, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.16434349396840395, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.28582614857210975, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3854501214118697, - "sentence_nr": 3 + "score": 0.24911274612875411, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.595779023757305, - "sentence_nr": 3 + "score": 0.3603818786794888, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.10085167559661873, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.1999934463074552, - "sentence_nr": 3 + "score": 0.23831215045289575, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.17543744527808774, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.30520457148036917, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.6141797522526763, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8112468273360185, - "sentence_nr": 3 + "score": 0.28201016956553354, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4113125177363443, - "sentence_nr": 3 + "score": 0.21685485833927476, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.42808075762838727, - "sentence_nr": 3 + "score": 0.3714219747170047, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.32685141385924577, - "sentence_nr": 3 + "score": 0.12274092982883021, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.3758692873615971, - "sentence_nr": 3 + "score": 0.3385513651938691, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3734832062562986, - "sentence_nr": 3 + "score": 0.1463197333291977, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.383916695249631, - "sentence_nr": 3 + "score": 0.366137273378509, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3804672236690253, - "sentence_nr": 3 + "score": 0.12656494026948834, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.433708341935832, - "sentence_nr": 3 + "score": 0.3156355830822428, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4678134833959513, - "sentence_nr": 3 + "score": 0.30327872414714485, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5051480556620123, - "sentence_nr": 3 + "score": 0.49804213541579834, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4028998029112093, - "sentence_nr": 3 + "score": 0.13237645860785527, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.43422338821405304, - "sentence_nr": 3 + "score": 0.3818322535970043, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.46360731056064436, - "sentence_nr": 3 + "score": 0.18154235663145316, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5726015901952585, - "sentence_nr": 3 + "score": 0.3906877817743504, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.12487405142186064, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.32817291858267583, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3734832062562986, - "sentence_nr": 3 + "score": 0.16701570871784516, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.43771936994910393, - "sentence_nr": 3 + "score": 0.4021286881032558, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.2041405149858879, - "sentence_nr": 3 + "score": 0.29383139922210444, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.2728627798814474, - "sentence_nr": 3 + "score": 0.15799783604363904, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.36592034784584504, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.4836940239497908, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5186946866114049, - "sentence_nr": 3 + "score": 0.3949243937510492, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6237774736059616, - "sentence_nr": 3 + "score": 0.112289032173749, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.8500131524897436, - "sentence_nr": 3 + "score": 0.17726100052085036, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.573764722928549, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.817979859532479, - "sentence_nr": 3 + "score": 0.31017716089889963, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.2320305803246989, - "sentence_nr": 3 + "score": 0.14276716121505195, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6224956012824276, - "sentence_nr": 3 + "score": 0.3191375424862687, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.21449459478473423, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6528501353073614, - "sentence_nr": 3 - }, + "score": 0.022303919896869945, + "sentence_nr": 9 + }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.27341185048222727, - "sentence_nr": 3 + "score": 0.12787395553510186, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6411651849711889, - "sentence_nr": 3 + "score": 0.33752742535974617, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.4041187386794465, - "sentence_nr": 3 + "score": 0.13181313433495553, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6888233111124319, - "sentence_nr": 3 + "score": 0.31758120882708796, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.5645815242299279, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.8151453923340255, - "sentence_nr": 3 + "score": 0.33753843688529356, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1463197333291977, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5972046851135996, - "sentence_nr": 3 + "score": 0.3593717322097392, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.24570408832734913, - "sentence_nr": 3 + "score": 0.010176705289341573, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 3 + "score": 0.13628770358024436, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.5699365673055954, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.7743810851655712, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.8921783635360989, - "sentence_nr": 3 + "score": 0.3124983184732695, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.3710595252626966, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.21688283061839067, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.41775824162589076, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.18235247300784824, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.40779523977234755, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.013915288440632284, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.39962545473912425, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.39112369376374106, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.8363600587440573, - "sentence_nr": 3 + "score": 0.2202248274013358, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.9912737182609732, - "sentence_nr": 3 + "score": 0.48474965676300186, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.45813938111627356, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.6018154975998465, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.7669980679050217, - "sentence_nr": 3 + "score": 0.1593344703029041, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.22494952618128455, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4760660341798742, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.12666372160329223, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.2650373529479294, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.12162779391619735, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3228288840559658, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.1649662542496744, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3466546857451185, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 + "score": 0.01536966738773372, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.14326513489612383, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.4034278533385552, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.13829446068705525, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.32059338352121075, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 + "score": 0.16521691795932783, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 + "score": 0.4134512022176617, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.203264842568494, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3435867188688158, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.12366644075037489, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.37651342775995167, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.2962222000049211, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.1971903602140518, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.36269646528997446, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4765874091118851, - "sentence_nr": 4 + "score": 0.203264842568494, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.45911557772276623, - "sentence_nr": 4 + "score": 0.2922087191170089, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.45022125383821326, - "sentence_nr": 4 + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.46874267375238576, - "sentence_nr": 4 + "score": 0.3740403511567824, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.6350593429017282, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6022395694696409, - "sentence_nr": 4 + "score": 0.2244748716483542, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3407065041529668, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.3482814151315599, - "sentence_nr": 4 + "score": 0.007281906895508523, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.7215691881328408, - "sentence_nr": 4 + "score": 0.18294117097472648, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.735100789804592, - "sentence_nr": 4 + "score": 0.4383387744769579, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.700487718300918, - "sentence_nr": 4 + "score": 0.17092467746295725, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7205373993220106, - "sentence_nr": 4 + "score": 0.4340281226634826, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.5894567062209923, - "sentence_nr": 4 + "score": 0.22860414459682069, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6051783687131701, - "sentence_nr": 4 + "score": 0.47331131010100724, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.5614660831213585, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5781117871636209, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.5907010930652489, - "sentence_nr": 4 + "score": 0.17200673466668953, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5995581839975431, - "sentence_nr": 4 + "score": 0.39948318545775324, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.33500599401126563, - "sentence_nr": 4 + "score": 0.10553225565626573, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.34371117385240735, - "sentence_nr": 4 + "score": 0.1763116500850642, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.630923553986829, - "sentence_nr": 4 + "score": 0.17730543118229922, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.6458808155334796, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.4687776643329939, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4435402570986094, - "sentence_nr": 4 + "score": 0.4201842844735916, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1740044679403827, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3013901676230198, - "sentence_nr": 4 + "score": 0.36375152376157177, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12876689524369925, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.38106012955734714, - "sentence_nr": 4 + "score": 0.3253153379449275, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13269353024089545, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.35187745073108273, - "sentence_nr": 4 + "score": 0.28998089836851504, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2850647115160651, - "sentence_nr": 4 + "score": 0.01357525601063516, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, - "sentence_nr": 4 + "score": 0.20298407172594946, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.43458947791319813, - "sentence_nr": 4 + "score": 0.427376330935813, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4033902612785559, - "sentence_nr": 4 + "score": 0.382987159925022, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17558199612672082, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3101159279982649, - "sentence_nr": 4 + "score": 0.41334979014850587, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.20947801521367798, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37699245483283905, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24318848592140954, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3909330178955319, - "sentence_nr": 4 + "score": 0.501343318078065, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.217295409663537, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.12173115521158184, - "sentence_nr": 4 + "score": 0.13784906211485343, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.3904544509639755, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3411846484329862, - "sentence_nr": 4 + "score": 0.3161105981607342, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, - "sentence_nr": 4 + "score": 0.322788951728102, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, - "sentence_nr": 4 + "score": 0.40263021320001785, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 4 + "score": 0.119159749312327, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.540550443602966, - "sentence_nr": 4 + "score": 0.21297942664093145, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, - "sentence_nr": 4 + "score": 0.1405026510197826, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, - "sentence_nr": 4 + "score": 0.24785258181936404, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.36314748337164254, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5392658386159207, - "sentence_nr": 4 + "score": 0.022303919896869945, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, - "sentence_nr": 4 + "score": 0.15626231814206226, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, - "sentence_nr": 4 + "score": 0.2918712789926548, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, - "sentence_nr": 4 + "score": 0.15325316503089068, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, - "sentence_nr": 4 + "score": 0.2756316951639811, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.35459684529390034, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5519360558961294, - "sentence_nr": 4 + "score": 0.3210853623565359, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.23610158425430544, - "sentence_nr": 4 + "score": 0.009559007108143848, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.46869487580371916, - "sentence_nr": 4 + "score": 0.05937666456658802, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.35459684529390034, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5499993547125768, - "sentence_nr": 4 + "score": 0.28306950244125495, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.27062395495883934, - "sentence_nr": 4 + "score": 0.14063630555225284, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4407436716645838, - "sentence_nr": 4 + "score": 0.24531520458611372, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3711335186021823, - "sentence_nr": 4 + "score": 0.3264287329357334, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.5595427509161435, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.35459684529390034, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5499993547125768, - "sentence_nr": 4 + "score": 0.41662443172249786, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.2294068720558097, - "sentence_nr": 4 + "score": 0.16925466459550803, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.40458364050078693, - "sentence_nr": 4 + "score": 0.35912398848424326, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2036348471340078, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.32588643749980295, - "sentence_nr": 4 + "score": 0.3472831655579266, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.32003170276441123, - "sentence_nr": 4 + "score": 0.21547697432588886, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.23088247483586974, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4093450185788297, - "sentence_nr": 4 + "score": 0.18039960295364865, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.23189835231884592, - "sentence_nr": 4 + "score": 0.21397099133614067, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.44157797833899437, - "sentence_nr": 4 + "score": 0.3568171392601981, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.11917756990194882, - "sentence_nr": 4 + "score": 0.14134641571854575, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.37850093315889116, - "sentence_nr": 4 + "score": 0.3078571099929154, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15658994837053716, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.3330732444230803, - "sentence_nr": 4 + "score": 0.3084004707364603, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.2550184675066243, - "sentence_nr": 4 + "score": 0.20215771603666896, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4392529322675216, - "sentence_nr": 4 + "score": 0.34483322672745376, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.23721317187079113, - "sentence_nr": 4 + "score": 0.16165057948216605, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4319465813689286, - "sentence_nr": 4 + "score": 0.35172210628524053, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.29417113956364643, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4653698220842079, - "sentence_nr": 4 + "score": 0.012458960343878354, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.23088247483586974, - "sentence_nr": 4 + "score": 0.20053583653512705, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.42828303349678104, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.23251355381714656, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.43190915325898727, - "sentence_nr": 4 + "score": 0.3585550644386862, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.45237912327122276, - "sentence_nr": 4 + "score": 0.2327080490816513, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6299071573751139, - "sentence_nr": 4 + "score": 0.4213315211213489, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.23736810439041953, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4004852416401387, - "sentence_nr": 4 + "score": 0.24362353508932386, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.2916261378761629, - "sentence_nr": 4 + "score": 0.14005830765988142, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4106520926894174, - "sentence_nr": 4 + "score": 0.28271314565258726, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.282764733088686, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3743678965131091, - "sentence_nr": 4 + "score": 0.01943377856541192, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.31430120091187586, - "sentence_nr": 4 + "score": 0.2298971389591186, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5013155459452984, - "sentence_nr": 4 + "score": 0.45764667682340326, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.16451929399933107, - "sentence_nr": 4 + "score": 0.22183437291807073, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.2957279302594959, - "sentence_nr": 4 + "score": 0.4690517750319636, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.11401282249739858, - "sentence_nr": 4 + "score": 0.15089318423122547, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3117911565455793, - "sentence_nr": 4 + "score": 0.4354703980715437, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.34019506273883837, - "sentence_nr": 4 + "score": 0.3931382365355541, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.48708558391259515, - "sentence_nr": 4 + "score": 0.5429286385993002, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.33184166448858593, - "sentence_nr": 4 + "score": 0.007237155276460672, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.1676136890247661, - "sentence_nr": 4 + "score": 0.22233922818300378, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.38914692664434314, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3579553000756425, - "sentence_nr": 4 + "score": 0.46750271079023087, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.5605065818946205, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.586853267829013, - "sentence_nr": 4 + "score": 0.2568191876426829, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.36300296341860155, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5816676674074003, - "sentence_nr": 4 + "score": 0.28135849152758385, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3525399760372503, - "sentence_nr": 4 + "score": 0.13585608692428647, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5798116969849163, - "sentence_nr": 4 + "score": 0.24602093467402117, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3927053212677373, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5108598154804425, - "sentence_nr": 4 + "score": 0.014749122939855126, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3722001929300059, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5252698638532942, - "sentence_nr": 4 + "score": 0.2769725060346048, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3514475288270508, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5361569875660316, - "sentence_nr": 4 + "score": 0.22585782564798598, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.27914759735007616, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3958350231734361, - "sentence_nr": 4 + "score": 0.2462954618610128, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.20974733068050955, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.41455868084196934, - "sentence_nr": 4 + "score": 0.25512324153300714, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.16086531618356015, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.2797876941198672, - "sentence_nr": 4 + "score": 0.006249447069096045, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3277803741755935, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.4267708983045122, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.275187053569825, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.40511706867577885, - "sentence_nr": 4 + "score": 0.17131793456589922, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.28765408533715414, - "sentence_nr": 4 + "score": 0.12832055613623328, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4046608868073569, - "sentence_nr": 4 + "score": 0.19433944404681203, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.1819722649161304, - "sentence_nr": 4 + "score": 0.14482189302397735, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.44254730215235283, - "sentence_nr": 4 + "score": 0.2913876815877049, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.25291831689404154, - "sentence_nr": 4 + "score": 0.1217802106941195, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4482360279074225, - "sentence_nr": 4 + "score": 0.19314598726036322, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.279600269133294, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.48172049854477195, - "sentence_nr": 4 + "score": 0.022303919896869945, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.1258907882951215, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.34143648068854054, - "sentence_nr": 4 + "score": 0.3238973846683935, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.11986809949741643, - "sentence_nr": 4 + "score": 0.1361658548186748, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.34629467658248214, - "sentence_nr": 4 + "score": 0.3295167855876769, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.32666181171942305, - "sentence_nr": 4 + "score": 0.12819825042984195, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5670457942911707, - "sentence_nr": 4 + "score": 0.36515328991507745, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.29175929784144866, - "sentence_nr": 4 + "score": 0.21743769222637532, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.45299010750030405, - "sentence_nr": 4 + "score": 0.4131100936190792, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11914562165195522, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.24463910693302512, - "sentence_nr": 4 + "score": 0.36399317085467314, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1302352098354987, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.2908660541001102, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.17432585713050458, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.435271111238395, - "sentence_nr": 4 + "score": 0.326035134708999, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.13305199541830684, - "sentence_nr": 4 + "score": 0.14410670132605607, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.43244987270004115, - "sentence_nr": 4 + "score": 0.24025207593480963, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3377385620641691, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5256128450453542, - "sentence_nr": 4 + "score": 0.16306957103469613, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.44776047557667586, - "sentence_nr": 4 + "score": 0.14219389639501667, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5801193947715436, - "sentence_nr": 4 + "score": 0.3550945020345845, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2875583820017638, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6087635830564418, - "sentence_nr": 4 + "score": 0.013501937941345124, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.28295596283263513, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6067794553589253, - "sentence_nr": 4 + "score": 0.37693028676849333, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.20863283213455547, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5109257435313587, - "sentence_nr": 4 + "score": 0.3791177761741048, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.2810551683573811, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6477048453606161, - "sentence_nr": 4 + "score": 0.3284649068899757, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.20988785322505515, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4323069807401831, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.473265686519562, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.7021422985630228, - "sentence_nr": 4 + "score": 0.3397876134677058, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.11810019511256618, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.3708545152745943, - "sentence_nr": 4 + "score": 0.009628007582726738, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3423375720396189, - "sentence_nr": 4 + "score": 0.20247469739337648, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.5343801172775681, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.4190284595730208, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6790829001019099, - "sentence_nr": 4 + "score": 0.4418847146430419, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12546912767038895, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.2651343523961406, - "sentence_nr": 4 + "score": 0.9199349282509897, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12407216162020399, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.2664864612493293, - "sentence_nr": 4 + "score": 0.9199349282509897, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.11261597894135422, - "sentence_nr": 4 + "score": 0.8827916928185874, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.25234827342962907, - "sentence_nr": 4 + "score": 0.9278293769424701, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12757855945289526, - "sentence_nr": 4 + "score": 0.5919743410620021, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3385577201847465, - "sentence_nr": 4 + "score": 0.8142101616656354, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.1175771442804648, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3103572690939351, - "sentence_nr": 4 + "score": 0.24942094354139677, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12629279972753293, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.2933944065312711, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12363251371327445, - "sentence_nr": 4 + "score": 0.6642718379939968, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.2692822154793075, - "sentence_nr": 4 + "score": 0.7768492311706325, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.24470651147480013, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.09410612421964877, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.13696035837771334, - "sentence_nr": 4 + "score": 0.7660237942267061, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.3335388002918436, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.24296155543954379, - "sentence_nr": 4 + "score": 0.8523393041110139, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.6358921902612438, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.8041899227402122, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.6299285159340671, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.7993134129243716, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 + "score": 0.710159574003633, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 + "score": 0.8462481747979111, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.5487830136896633, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 + "score": 0.77238965036654, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5745954681260859, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.7920051188244848, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.6012475603804444, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 + "score": 0.7990339788905771, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 + "score": 0.6660677740125452, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 + "score": 0.8293798371335214, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.6012475603804444, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.8102198011293434, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5718247506430171, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.7570613392550647, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.6241924127610678, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.8031006153647919, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 + "score": 0.6372502110149713, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.8187019874664503, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.37589902061551017, - "sentence_nr": 5 + "score": 0.35059076445515835, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.42554151277542873, - "sentence_nr": 5 + "score": 0.40219803477483124, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.41316127706749806, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.4430321339435623, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.3993284843242707, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.4224738565076288, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.19272923456045185, - "sentence_nr": 5 + "score": 0.2908087026261561, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.314589204347422, - "sentence_nr": 5 + "score": 0.3411361400094189, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.3572514590810421, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.40312319760122833, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.2562402498959597, - "sentence_nr": 5 + "score": 0.2996868226086902, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.3462132320098601, - "sentence_nr": 5 + "score": 0.3355531727847081, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.30613574556266654, - "sentence_nr": 5 + "score": 0.4199243020508202, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.36162356523761796, - "sentence_nr": 5 + "score": 0.4310330650643179, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", @@ -37382,7 +34955,7 @@ "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", @@ -37390,1527 +34963,1399 @@ "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.30613574556266654, - "sentence_nr": 5 + "score": 0.38146085172952343, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.36162356523761796, - "sentence_nr": 5 + "score": 0.40030269579783606, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.1277700534498365, - "sentence_nr": 5 + "score": 0.2613520653232399, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.20846991452438368, - "sentence_nr": 5 + "score": 0.292974388325607, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.25985341959039815, - "sentence_nr": 5 + "score": 0.38876512474558916, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.3462132320098601, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.3645334083305707, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.4348353905978472, - "sentence_nr": 5 + "score": 0.41342876789412997, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.42988105429544615, - "sentence_nr": 5 + "score": 0.8780634320789833, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.7577244658187771, - "sentence_nr": 5 + "score": 0.926946700115022, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.5366411241731205, - "sentence_nr": 5 + "score": 0.7964573357809173, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.825566494253596, - "sentence_nr": 5 + "score": 0.8458636471716781, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.617939643800199, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.8665162960307256, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.4360038791211645, - "sentence_nr": 5 + "score": 0.9452996322890763, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.7669087484597642, - "sentence_nr": 5 + "score": 0.9463396364218181, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.4135171000263379, - "sentence_nr": 5 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.7050151549073953, - "sentence_nr": 5 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3345794609803645, - "sentence_nr": 5 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.7523344918083558, - "sentence_nr": 5 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.46997395980026974, - "sentence_nr": 5 + "score": 0.6537803976048806, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.8114935753258365, - "sentence_nr": 5 + "score": 0.7742226743967544, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.690216773228096, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.4547722460981925, - "sentence_nr": 5 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.793631811653261, - "sentence_nr": 5 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.738238064391125, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.44338575968779337, - "sentence_nr": 5 + "score": 0.8637738769684485, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.34537865578685034, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6736450219247083, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.617939643800199, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.8665162960307256, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.4165530720734658, - "sentence_nr": 5 + "score": 0.40673971192998765, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7027805129995731, - "sentence_nr": 5 + "score": 0.6897190926100627, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.4027788021844849, - "sentence_nr": 5 + "score": 0.3707525915417785, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.6872835607174038, - "sentence_nr": 5 + "score": 0.6481906761834414, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.40245827940445855, - "sentence_nr": 5 + "score": 0.4405434565828979, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.6664090181705107, - "sentence_nr": 5 + "score": 0.6872423435487918, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.34070519401434163, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.5552412314880962, - "sentence_nr": 5 + "score": 0.6376396416993303, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.6976333495952621, - "sentence_nr": 5 + "score": 0.35601247064914876, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.8331572107884448, - "sentence_nr": 5 + "score": 0.6528728847159075, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.40245827940445855, - "sentence_nr": 5 + "score": 0.3535276144718208, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.6664090181705107, - "sentence_nr": 5 + "score": 0.6399338911163, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.5440766840557734, - "sentence_nr": 5 + "score": 0.29793763405666984, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7530101164980872, - "sentence_nr": 5 + "score": 0.5878658443031616, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.482878209362615, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7054264546871626, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.22447836580911282, - "sentence_nr": 5 + "score": 0.2737856702715042, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.635962708232662, - "sentence_nr": 5 + "score": 0.6091441790112126, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.3964122180109575, - "sentence_nr": 5 + "score": 0.2060740184460064, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.584540734626554, - "sentence_nr": 5 + "score": 0.5566122985381202, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.6537813760269277, - "sentence_nr": 5 + "score": 0.3764145740138264, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7617489761353242, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.5440766840557734, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.7530101164980872, - "sentence_nr": 5 + "score": 0.660406350984819, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.41307323705325416, - "sentence_nr": 5 + "score": 0.703373719677874, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5785653391533346, - "sentence_nr": 5 + "score": 0.7784050705257474, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.5248587176134882, - "sentence_nr": 5 + "score": 0.469958733898233, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6664855309004869, - "sentence_nr": 5 + "score": 0.5843756060033074, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.5248587176134882, - "sentence_nr": 5 + "score": 0.6034601376302852, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6664855309004869, - "sentence_nr": 5 + "score": 0.7074074363255227, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.32797138117025904, - "sentence_nr": 5 + "score": 0.5200692650497809, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.48645628248697975, - "sentence_nr": 5 + "score": 0.6586847274336591, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.498704623570665, - "sentence_nr": 5 + "score": 0.5724622291345857, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6478746389895599, - "sentence_nr": 5 + "score": 0.6818279156433621, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.2280299254440877, - "sentence_nr": 5 + "score": 0.5439803529976158, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.4447177675003817, - "sentence_nr": 5 + "score": 0.657598922173703, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.4549681528678131, - "sentence_nr": 5 + "score": 0.5447800851151646, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6164314607426773, - "sentence_nr": 5 + "score": 0.6845859707632784, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.5073374020380702, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.05173101600908794, - "sentence_nr": 5 - }, + "score": 0.6713451965832894, + "sentence_nr": 0 + }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.498704623570665, - "sentence_nr": 5 + "score": 0.63457045351243, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6478746389895599, - "sentence_nr": 5 + "score": 0.760139991277541, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.2747017431249852, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.062313574266204104, - "sentence_nr": 5 + "score": 0.4456826256200505, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.35012358768277246, - "sentence_nr": 5 + "score": 0.4834220366915352, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5816664251371266, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.6412236038065892, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.7394688674718397, - "sentence_nr": 5 + "score": 0.678862671476654, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 5 + "score": 0.3489926819498492, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.4103582047611184, - "sentence_nr": 5 + "score": 0.5715668842319502, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.34895836374229405, - "sentence_nr": 5 + "score": 0.2786169604662155, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.4767378358574124, - "sentence_nr": 5 + "score": 0.5267252236203236, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.4010889714538991, - "sentence_nr": 5 + "score": 0.35446322216812387, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.5642546048162433, - "sentence_nr": 5 + "score": 0.5208748527454148, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.30145280436636923, - "sentence_nr": 5 + "score": 0.2679728611808951, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.4729753929525169, - "sentence_nr": 5 + "score": 0.466691372759197, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.35210829264331733, - "sentence_nr": 5 + "score": 0.2676232320051144, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.5239651686730163, - "sentence_nr": 5 + "score": 0.5440246804235981, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.2328598163544389, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.38732841080078323, - "sentence_nr": 5 + "score": 0.46604753989124215, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 5 + "score": 0.3249989390135794, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.38584042605633057, - "sentence_nr": 5 + "score": 0.5532261012182782, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 5 + "score": 0.04043358226234485, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 + "score": 0.178130317890244, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.35548377438423956, - "sentence_nr": 5 + "score": 0.31400830186120793, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.5150536106864393, - "sentence_nr": 5 + "score": 0.5436299115609682, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.180038135256147, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.35098096867859657, - "sentence_nr": 5 + "score": 0.42760668286140896, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.2702404890575711, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.38561859819475125, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.21992062963866632, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.42619491111236635, - "sentence_nr": 5 + "score": 0.505948742808373, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.3410244689880313, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5816669416914216, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.8958039312312598, - "sentence_nr": 5 + "score": 0.4234343012313773, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.9382091007325469, - "sentence_nr": 5 + "score": 0.6625289905598352, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.6433799261824519, - "sentence_nr": 5 + "score": 0.3885765192359091, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.81037697367602, - "sentence_nr": 5 + "score": 0.6554470157301392, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.5403356450597102, - "sentence_nr": 5 + "score": 0.38108864298853723, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.7639130574395125, - "sentence_nr": 5 + "score": 0.6485553379227472, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.833078701050083, - "sentence_nr": 5 + "score": 0.3765213224289163, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.9482515348146272, - "sentence_nr": 5 + "score": 0.6469521424555786, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.5815699184831468, - "sentence_nr": 5 + "score": 0.36247466608675993, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.8158797976578578, - "sentence_nr": 5 + "score": 0.6011484151165629, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.833078701050083, - "sentence_nr": 5 + "score": 0.4079926989572759, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.9482515348146272, - "sentence_nr": 5 + "score": 0.6592699047005666, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 5 + "score": 0.30494536158123264, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 + "score": 0.5857538582551342, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.8097013849965253, - "sentence_nr": 5 + "score": 0.26075652499067425, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.8896806148658662, - "sentence_nr": 5 + "score": 0.5605305670545515, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.6161420984415483, - "sentence_nr": 5 + "score": 0.13339786348528015, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.6945809713247855, - "sentence_nr": 5 + "score": 0.46778689835182324, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.833078701050083, - "sentence_nr": 5 + "score": 0.2602768294269028, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.9482515348146272, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.833078701050083, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.9482515348146272, - "sentence_nr": 5 + "score": 0.5310567541651178, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.4500531895417844, - "sentence_nr": 5 + "score": 0.4324680011853555, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.43027065541050147, - "sentence_nr": 5 + "score": 0.5877600878871951, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.4933292241270431, - "sentence_nr": 5 + "score": 0.4493940083619696, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5225247297523148, - "sentence_nr": 5 + "score": 0.6230960824462234, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.23487811400114963, - "sentence_nr": 5 + "score": 0.4116575552858724, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4062284746604391, - "sentence_nr": 5 + "score": 0.5665759692366567, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.3763743474188506, - "sentence_nr": 5 + "score": 0.3533147318401534, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4017565065239436, - "sentence_nr": 5 + "score": 0.5972951640947346, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.5805399561362194, - "sentence_nr": 5 + "score": 0.44219732271776674, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4810464260105228, - "sentence_nr": 5 + "score": 0.6193429426274062, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.21510618470971102, - "sentence_nr": 5 + "score": 0.41852674506584964, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.41380245501613677, - "sentence_nr": 5 + "score": 0.6035836275599532, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.3489214645008508, - "sentence_nr": 5 + "score": 0.433056028408153, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4519496200669607, - "sentence_nr": 5 + "score": 0.6068335862669254, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.15415064977510756, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.48192435154139673, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.49539605131242165, - "sentence_nr": 5 + "score": 0.37696437834356655, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.44728880966754114, - "sentence_nr": 5 + "score": 0.5617832488367239, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.1810501938660849, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4825434542324755, - "sentence_nr": 5 + "score": 0.4154005351684647, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.5617848264135781, - "sentence_nr": 5 + "score": 0.5018386916018573, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5570886750436929, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.49539605131242165, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.44728880966754114, - "sentence_nr": 5 + "score": 0.6673891538739279, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.6947677373756656, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5212982931053122, - "sentence_nr": 5 + "score": 0.7941300666655116, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.35319015092357736, - "sentence_nr": 5 + "score": 0.6412098671661826, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5822934956325967, - "sentence_nr": 5 + "score": 0.7665040244283648, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.35319015092357736, - "sentence_nr": 5 + "score": 0.6045639360711837, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5822934956325967, - "sentence_nr": 5 + "score": 0.7576570567798335, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.5438238038060724, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5095895501997145, - "sentence_nr": 5 + "score": 0.7060850657954441, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.5064127215831256, - "sentence_nr": 5 + "score": 0.6638859619095425, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.6516332048338376, - "sentence_nr": 5 + "score": 0.7874224590682172, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.35319015092357736, - "sentence_nr": 5 + "score": 0.6543739381048754, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5822934956325967, - "sentence_nr": 5 + "score": 0.7768522458527362, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.5064127215831256, - "sentence_nr": 5 + "score": 0.5886489119980793, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.6516332048338376, - "sentence_nr": 5 + "score": 0.7611944709376643, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 5 + "score": 0.5162974106233954, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 + "score": 0.748545216109632, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.6561309661336588, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5212982931053122, - "sentence_nr": 5 + "score": 0.7849652413082676, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.24311976929452217, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.4647137781420131, - "sentence_nr": 5 + "score": 0.5332455436874994, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.5064127215831256, - "sentence_nr": 5 + "score": 0.5821415139431849, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.6516332048338376, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.5062835959915031, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.649790991083579, - "sentence_nr": 5 + "score": 0.7329539842616807, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.3855522725905196, - "sentence_nr": 5 + "score": 0.3861375213265022, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.587260566914102, - "sentence_nr": 5 + "score": 0.5122109329134508, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.4426623526629488, - "sentence_nr": 5 + "score": 0.32539921259497445, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.6368371029698285, - "sentence_nr": 5 + "score": 0.5133457276293165, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.3666340989897011, - "sentence_nr": 5 + "score": 0.375079512706724, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.5782960278998768, - "sentence_nr": 5 + "score": 0.5371301483272257, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.32892676518285585, - "sentence_nr": 5 + "score": 0.48456463733283883, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.5800761309604682, - "sentence_nr": 5 + "score": 0.5906105668854662, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.4845227999608418, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.4592978565863154, - "sentence_nr": 5 + "score": 0.5968050469845498, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.29456425448249246, - "sentence_nr": 5 + "score": 0.4494703452336724, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.5691358329649412, - "sentence_nr": 5 + "score": 0.5696298539086213, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.15083364266523736, - "sentence_nr": 5 + "score": 0.4295014616287586, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.4714472446464193, - "sentence_nr": 5 + "score": 0.5957510678657648, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", @@ -38918,7 +36363,7 @@ "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", @@ -38926,4151 +36371,3799 @@ "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.25376192011637994, - "sentence_nr": 5 + "score": 0.47727404239076743, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.47199515498282607, - "sentence_nr": 5 + "score": 0.6081867525552255, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.01656048993031311, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.26349889713915725, - "sentence_nr": 5 + "score": 0.11323797713183678, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.12514328743841557, - "sentence_nr": 5 + "score": 0.4124136266900752, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.34961836061490087, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.20280903279060938, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.49374400062508916, - "sentence_nr": 5 + "score": 0.5714981155807188, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.4354194543126476, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6656175329857803, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5941815558294462, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.7983203558832153, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.4524360012660941, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6715594086896963, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.3355428780074198, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 + "score": 0.6348737822745005, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.47269414327373943, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.7451099574206652, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.4216321717480383, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6897801155729197, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.43249073282965117, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6444897357478733, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.28607621833944535, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6463343859934777, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.415412929081021, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6437233280372863, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.27979942810555614, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5926090214839685, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.2995846558655927, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.599404823793189, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.5561195823338172, - "sentence_nr": 6 + "score": 0.3983045920261205, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.5362935676066722, - "sentence_nr": 6 + "score": 0.6521777108605036, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.5803515898273521, - "sentence_nr": 6 + "score": 0.268203877206376, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.5422220468910552, - "sentence_nr": 6 + "score": 0.5791297455379081, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.4204739940979302, - "sentence_nr": 6 + "score": 0.22766536738739604, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.5173824078732066, - "sentence_nr": 6 + "score": 0.5321260120854782, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.4177866849157374, - "sentence_nr": 6 + "score": 0.14981855747310632, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.5243375045345786, - "sentence_nr": 6 + "score": 0.5073561650857479, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.3450219162509876, - "sentence_nr": 6 + "score": 0.4143449478847806, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.3993348853061597, - "sentence_nr": 6 + "score": 0.6736569430464404, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.30978068501889056, - "sentence_nr": 6 + "score": 0.20430195455630867, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.28295274449167956, - "sentence_nr": 6 + "score": 0.5336103118914343, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.33498389276277546, - "sentence_nr": 6 + "score": 0.38187621052323667, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.3296536654279081, - "sentence_nr": 6 + "score": 0.6423162801762098, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.42670493571995677, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.4251985835808586, - "sentence_nr": 6 + "score": 0.2550511802903305, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.2368693821608258, - "sentence_nr": 6 + "score": 0.3491792142373769, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.320909989176825, - "sentence_nr": 6 + "score": 0.6341002242155772, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.3223833286593516, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.39475158383309167, - "sentence_nr": 6 + "score": 0.001753155680224404, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.46146548771819573, - "sentence_nr": 6 + "score": 0.19348048287912908, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.4019452398054806, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.3973758238312869, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.3937751818988156, - "sentence_nr": 6 + "score": 0.5498004622015639, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.33438299066966715, - "sentence_nr": 6 + "score": 0.17329990217896798, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5409759573191787, - "sentence_nr": 6 + "score": 0.583781848253705, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.37854068916316835, - "sentence_nr": 6 + "score": 0.3373556859342653, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5743796566387722, - "sentence_nr": 6 + "score": 0.6738789170291255, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.37854068916316835, - "sentence_nr": 6 + "score": 0.4059531821242849, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5768306472334509, - "sentence_nr": 6 + "score": 0.7392702727394752, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.32134504358579785, - "sentence_nr": 6 + "score": 0.4658089028058827, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5076725973953424, - "sentence_nr": 6 + "score": 0.7584810978753719, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.2562150245540302, - "sentence_nr": 6 + "score": 0.3720001389308944, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.47046477830594896, - "sentence_nr": 6 + "score": 0.7013845085492982, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.27182849679730653, - "sentence_nr": 6 + "score": 0.3649036594689345, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5497265770945076, - "sentence_nr": 6 + "score": 0.6887265942100023, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.3509258729305825, - "sentence_nr": 6 + "score": 0.33490167163730483, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5379703355059909, - "sentence_nr": 6 + "score": 0.6738850345838133, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.19135220621724439, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.4724042181215377, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.38936263771250235, - "sentence_nr": 6 + "score": 0.45475498440082013, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5443518219250745, - "sentence_nr": 6 + "score": 0.7890162768101745, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.28555753499459907, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.3416445560351976, - "sentence_nr": 6 + "score": 0.001410039481105471, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.4049402235047407, - "sentence_nr": 6 + "score": 0.33415579274035306, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.5871644977560334, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.3060614307377341, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.4885853123228743, - "sentence_nr": 6 + "score": 0.676792081658235, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.45506803308128024, - "sentence_nr": 6 + "score": 0.4446730260276365, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6477506541284608, - "sentence_nr": 6 + "score": 0.755908468739292, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.592313615748771, - "sentence_nr": 6 + "score": 0.43478132178539325, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7382416555842614, - "sentence_nr": 6 + "score": 0.7018196083546635, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.592313615748771, - "sentence_nr": 6 + "score": 0.3877278798081724, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7568286018427376, - "sentence_nr": 6 + "score": 0.6833268596614586, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.37529811264444257, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.6378039240510625, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.4494597917400064, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.7073438158390085, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.35036401492739216, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.6426566938482169, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.4226108216696222, - "sentence_nr": 6 + "score": 0.46625439194767143, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6331414171574684, - "sentence_nr": 6 + "score": 0.7058896647604742, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.05246151962748318, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.3468871001472823, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.4226108216696222, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6331414171574684, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.602867050301643, - "sentence_nr": 6 + "score": 0.17314327152587822, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7367363357155757, - "sentence_nr": 6 + "score": 0.5307830562814513, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.42907605083910527, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.45506803308128024, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.6608670586710113, - "sentence_nr": 6 + "score": 0.6942504376084577, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.28038937103419465, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.5038494750471553, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 6 + "score": 0.25718495991757767, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.7542976177437886, - "sentence_nr": 6 + "score": 0.4369258127692308, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.3035527425754751, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.47611727660210634, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.526357446896968, - "sentence_nr": 6 + "score": 0.003762227238525207, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.3268845394941929, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.5177343510524726, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.6666823117022298, - "sentence_nr": 6 + "score": 0.3293368889413212, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.7571125338649978, - "sentence_nr": 6 + "score": 0.4868090313482157, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.3864572432237816, - "sentence_nr": 6 + "score": 0.3597572114060291, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5849342936087653, - "sentence_nr": 6 + "score": 0.5404525494108054, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.5406438522344627, - "sentence_nr": 6 + "score": 0.06607086144978286, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.6520694800788391, - "sentence_nr": 6 + "score": 0.2742600239887939, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.6977240390484037, - "sentence_nr": 6 + "score": 0.3276986983391062, + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.818984467219358, - "sentence_nr": 6 + "score": 0.47017897902052863, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.7019499719108448, - "sentence_nr": 6 + "score": 0.24830299714202062, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.8450280883390384, - "sentence_nr": 6 + "score": 0.39688820338442954, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.7397087417978795, - "sentence_nr": 6 + "score": 0.31950891970955725, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.8865031414920428, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.5364361872901348, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.7164775580022767, - "sentence_nr": 6 + "score": 0.48966432562692086, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.37717457428685847, - "sentence_nr": 6 + "score": 0.3880515884750121, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5554130492458337, - "sentence_nr": 6 + "score": 0.6587916715823183, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.5142726846179982, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.31598923484911084, - "sentence_nr": 6 + "score": 0.7344716263345912, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.18953162992336403, - "sentence_nr": 6 + "score": 0.6066498620510337, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.45876745950873354, - "sentence_nr": 6 + "score": 0.7812137754227463, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2383770504614087, - "sentence_nr": 6 + "score": 0.4342750764549485, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.466645869611307, - "sentence_nr": 6 + "score": 0.7115011221714777, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.18207052811092134, - "sentence_nr": 6 + "score": 0.28822910320599077, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.4504432021668592, - "sentence_nr": 6 + "score": 0.6087031937056202, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.14291173574075158, - "sentence_nr": 6 + "score": 0.5011893046413795, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.45184360988354105, - "sentence_nr": 6 + "score": 0.7089203664957927, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.3406014428030703, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.34419514726440925, - "sentence_nr": 6 + "score": 0.6514548680180557, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.4738611152748619, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.7293997939434749, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2128497674847141, - "sentence_nr": 6 + "score": 0.4492327786840591, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.438591227628555, - "sentence_nr": 6 + "score": 0.6917786880624969, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.27743662258385243, - "sentence_nr": 6 + "score": 0.5105553787243322, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.21081851067789198, - "sentence_nr": 6 + "score": 0.44571331402556874, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.4042801758173556, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.2603965252496297, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5017583605709452, - "sentence_nr": 6 + "score": 0.67235059873138, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.7629273292796576, - "sentence_nr": 6 + "score": 0.4487746167679644, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.8510385544954956, - "sentence_nr": 6 + "score": 0.4476730201191672, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.2836623400057614, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.29147337237183046, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.2775905064108025, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.90941532255964, - "sentence_nr": 6 + "score": 0.3165767280260291, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.28912432952036243, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.31119603942667584, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.6425503166524515, - "sentence_nr": 6 + "score": 0.4094748015187699, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.8078891929749037, - "sentence_nr": 6 + "score": 0.4288513205758089, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.6425503166524515, - "sentence_nr": 6 + "score": 0.35430370029300495, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.8078891929749037, - "sentence_nr": 6 + "score": 0.3864890531682498, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.6888074582865503, - "sentence_nr": 6 + "score": 0.37405604379521823, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.8055061207769505, - "sentence_nr": 6 + "score": 0.409758558051675, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.6888074582865503, - "sentence_nr": 6 + "score": 0.4082186610925126, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.8151715541788959, - "sentence_nr": 6 + "score": 0.4042514356445265, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.3835611536417376, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.41360439536029553, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.6978429290017016, - "sentence_nr": 6 + "score": 0.19333361726926898, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.7717858931341154, - "sentence_nr": 6 + "score": 0.28056620588920506, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.3541652369790141, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.38739546241623046, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.49349163706233623, - "sentence_nr": 6 + "score": 0.2113054108348111, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.694445271037971, - "sentence_nr": 6 + "score": 0.44238229987470284, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3037643089519314, - "sentence_nr": 6 + "score": 0.26207903587847736, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.5183662698462751, - "sentence_nr": 6 + "score": 0.50073123223194, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.18376711147874328, - "sentence_nr": 6 + "score": 0.16098073041469485, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.3981272326046884, - "sentence_nr": 6 + "score": 0.39710375075643284, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.340960560695735, - "sentence_nr": 6 + "score": 0.11465623153412556, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.5217663812589132, - "sentence_nr": 6 + "score": 0.4497512968651573, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.2147607499133801, - "sentence_nr": 6 + "score": 0.16950698451288215, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.3976144917079093, - "sentence_nr": 6 + "score": 0.48668984177868246, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3941175366175992, - "sentence_nr": 6 + "score": 0.23516650478671175, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.5506555496793699, - "sentence_nr": 6 + "score": 0.4885052730214997, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3339087646492816, - "sentence_nr": 6 + "score": 0.23477037244978113, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.5995623358499859, - "sentence_nr": 6 + "score": 0.5165217514090542, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.21585895003952446, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.48140875917864023, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.4122974402951816, - "sentence_nr": 6 + "score": 0.2711981710401392, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6170911690364487, - "sentence_nr": 6 + "score": 0.5555651822168547, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.168777027092081, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.2643854378698732, - "sentence_nr": 6 + "score": 0.4368481165562445, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.4066689638009577, - "sentence_nr": 6 + "score": 0.22415590998535484, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.5267604642487788, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.18485450668488082, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.46452791098932883, - "sentence_nr": 6 + "score": 0.49981791926190994, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.8253498772794055, - "sentence_nr": 6 + "score": 0.4556160153884204, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.8529564805429163, - "sentence_nr": 6 + "score": 0.6661994452325181, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.7944837206494969, - "sentence_nr": 6 + "score": 0.3216756020053242, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.8784531740275225, - "sentence_nr": 6 + "score": 0.6141241026166391, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.7944837206494969, - "sentence_nr": 6 + "score": 0.43369048469848437, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.8784531740275225, - "sentence_nr": 6 + "score": 0.6586872889176818, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.6941268297866866, - "sentence_nr": 6 + "score": 0.4098419224543478, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7679844670813416, - "sentence_nr": 6 + "score": 0.6358736384460296, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.4527112325797497, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6708989870027865, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.403282335120862, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6319223068216205, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.7072172847953276, - "sentence_nr": 6 + "score": 0.44234482870142466, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7914639887327892, - "sentence_nr": 6 + "score": 0.6617260327319175, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 6 + "score": 0.3870043562676652, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7668993520558344, - "sentence_nr": 6 + "score": 0.631536050216449, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.465541200947692, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6735988737803571, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.175538121835486, - "sentence_nr": 6 + "score": 0.37462930793644134, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.44197441533246407, - "sentence_nr": 6 + "score": 0.6297969107438809, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.45236333724230443, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.7072172847953276, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.7914639887327892, - "sentence_nr": 6 + "score": 0.6557435747309683, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.5828833474188783, - "sentence_nr": 6 + "score": 0.37570809340937233, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.7908226509294533, - "sentence_nr": 6 + "score": 0.6339141734561076, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.38694317759010316, - "sentence_nr": 6 + "score": 0.559332422592187, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5953878513137957, - "sentence_nr": 6 + "score": 0.733291190094771, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.2620499195763038, - "sentence_nr": 6 + "score": 0.48457382450313924, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.48937240022909234, - "sentence_nr": 6 + "score": 0.7144409873446065, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.22128776529156546, - "sentence_nr": 6 + "score": 0.4881942815467274, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.4999323991212311, - "sentence_nr": 6 + "score": 0.6662053431593723, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 6 + "score": 0.3977038258772401, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5924993690004501, - "sentence_nr": 6 + "score": 0.6202897864314184, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.2745762486209681, - "sentence_nr": 6 + "score": 0.5024073848733999, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5730023382770898, - "sentence_nr": 6 + "score": 0.6805608953669952, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.3070898761263382, - "sentence_nr": 6 + "score": 0.4198435178617755, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5791648909423264, - "sentence_nr": 6 + "score": 0.6615330486958915, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.31600229153053044, - "sentence_nr": 6 + "score": 0.4423392581565186, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5374439094267343, - "sentence_nr": 6 + "score": 0.663370348519268, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.42298863290550076, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.27733310601709266, - "sentence_nr": 6 + "score": 0.6260112466527037, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.4703077247331959, - "sentence_nr": 6 + "score": 0.422714343026006, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.7203673717155472, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.3029928206533524, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5600962993297164, - "sentence_nr": 6 + "score": 0.6266965858252854, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.49023502313124495, - "sentence_nr": 7 + "score": 0.3446592076818278, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.7638414724136195, - "sentence_nr": 7 + "score": 0.5819912583909785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.4424906782646928, - "sentence_nr": 7 + "score": 0.23270938096152352, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.705507971295129, - "sentence_nr": 7 + "score": 0.4490269267329941, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.41452787844405115, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.6973605663974715, - "sentence_nr": 7 + "score": 0.11634468327243708, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.41032302768839235, - "sentence_nr": 7 + "score": 0.1766119944524977, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.6634154486532953, - "sentence_nr": 7 + "score": 0.3986479587107995, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.44711013370113256, - "sentence_nr": 7 + "score": 0.3167585643537871, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.7319347493436125, - "sentence_nr": 7 + "score": 0.5076869840147092, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.4275810014748856, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.6481070648129139, - "sentence_nr": 7 + "score": 0.3292454551002283, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.46409619603227925, - "sentence_nr": 7 + "score": 0.25751023494151143, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.7474126325188408, - "sentence_nr": 7 + "score": 0.4697665795408892, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.0422060018445322, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.27278456488226854, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.42803425515420807, - "sentence_nr": 7 + "score": 0.3212983212315964, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.7260183442795153, - "sentence_nr": 7 + "score": 0.5599573621112933, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.23962966980870534, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.5138361143222901, - "sentence_nr": 7 + "score": 0.2560040742784669, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.3597862823053843, - "sentence_nr": 7 + "score": 0.09236883467211593, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.7212767938301806, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.73702431000915, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.8608238485042174, - "sentence_nr": 7 + "score": 0.3472719365557752, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.07860105393900486, - "sentence_nr": 7 + "score": 0.529527758323629, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.09678377693633947, - "sentence_nr": 7 + "score": 0.6540432510655854, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.11601141307045003, - "sentence_nr": 7 + "score": 0.49704232910799745, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.21671187566850864, - "sentence_nr": 7 + "score": 0.6453248294274054, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.2390076354901812, - "sentence_nr": 7 + "score": 0.49704232910799745, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.33570154125476054, - "sentence_nr": 7 + "score": 0.6453248294274054, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.1371661844308428, - "sentence_nr": 7 + "score": 0.5494410974163585, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.23455679137513727, - "sentence_nr": 7 + "score": 0.6853937472090788, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.21326369102393236, - "sentence_nr": 7 + "score": 0.44114781827798216, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.24781828193168487, - "sentence_nr": 7 + "score": 0.6241365710582877, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.12394460940540938, - "sentence_nr": 7 + "score": 0.4286794450695727, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.26662620996190534, - "sentence_nr": 7 + "score": 0.6365023289177463, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.14891504773093184, - "sentence_nr": 7 + "score": 0.4744991305294048, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.2618919111168516, - "sentence_nr": 7 + "score": 0.6720481841701565, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.1406879778177777, - "sentence_nr": 7 + "score": 0.4946489712934811, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.24227488458492952, - "sentence_nr": 7 + "score": 0.6635756951391838, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.04114212836378985, - "sentence_nr": 7 + "score": 0.32345422777393923, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.1070604518443882, - "sentence_nr": 7 + "score": 0.5652905380017423, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.26411327741267115, - "sentence_nr": 7 + "score": 0.473424955479643, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.2898946819245943, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.24248913939867353, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.26831514794764233, - "sentence_nr": 7 + "score": 0.6791725069180572, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.35015224715252113, - "sentence_nr": 7 + "score": 0.3479698393875884, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5701648579139658, - "sentence_nr": 7 + "score": 0.5760833125751785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.3349252032650068, - "sentence_nr": 7 + "score": 0.24373253714463095, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5908087431574293, - "sentence_nr": 7 + "score": 0.49482039214573803, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.3258812297722265, - "sentence_nr": 7 + "score": 0.312050635062637, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5753985304712377, - "sentence_nr": 7 + "score": 0.5390444512132623, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.2883113322808919, - "sentence_nr": 7 + "score": 0.24229889794871173, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5835478395499368, - "sentence_nr": 7 + "score": 0.4853505495636382, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.2577716972449781, - "sentence_nr": 7 + "score": 0.3542266508664836, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5171901208397282, - "sentence_nr": 7 + "score": 0.5643413028542406, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.04631732527976412, - "sentence_nr": 7 + "score": 0.18282456123768265, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.21558480215297515, - "sentence_nr": 7 + "score": 0.47540661243586124, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.23287896954139942, - "sentence_nr": 7 + "score": 0.2921982022041547, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5168980964497457, - "sentence_nr": 7 + "score": 0.5264166199754001, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.24643585808835486, - "sentence_nr": 7 + "score": 0.3142825719425009, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5459613462641708, - "sentence_nr": 7 + "score": 0.584353897647861, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.004597701149425286, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.23325505861671614, - "sentence_nr": 7 + "score": 0.17473028966988555, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.503948422566616, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.29496488310853664, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.6102579121305785, - "sentence_nr": 7 + "score": 0.400425072418037, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.3449058130015412, - "sentence_nr": 7 + "score": 0.2988697040013311, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5365619830343804, - "sentence_nr": 7 + "score": 0.5442522660489195, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.29688845677442144, - "sentence_nr": 7 + "score": 0.500703635659656, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5494319015457763, - "sentence_nr": 7 + "score": 0.6501904887399698, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.3257602417321556, - "sentence_nr": 7 + "score": 0.4876463179677598, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5666596539835803, - "sentence_nr": 7 + "score": 0.6113405963585182, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.22511140285349446, - "sentence_nr": 7 + "score": 0.5199813503697857, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.553839023223762, - "sentence_nr": 7 + "score": 0.6584629522606407, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.3274016883618531, - "sentence_nr": 7 + "score": 0.3995439803178399, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5570399656004248, - "sentence_nr": 7 + "score": 0.6021193793256325, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.2211880505010663, - "sentence_nr": 7 + "score": 0.40656183899584336, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.4573855767208229, - "sentence_nr": 7 + "score": 0.5890799945028116, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.399477857457097, - "sentence_nr": 7 + "score": 0.528547004876945, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5617218895807364, - "sentence_nr": 7 + "score": 0.6459593469343872, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.10125638619893, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.3191349966700777, - "sentence_nr": 7 + "score": 0.45002572171222577, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5329750656706205, - "sentence_nr": 7 + "score": 0.620458099259989, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.09175663647957763, - "sentence_nr": 7 + "score": 0.27875207406965286, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.32499940569388225, - "sentence_nr": 7 + "score": 0.5095968928696253, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.162496560019558, - "sentence_nr": 7 + "score": 0.5576102993622991, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.4637542439867255, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.3764579689992314, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5818293561882879, - "sentence_nr": 7 + "score": 0.6640761861237344, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.25376032254696296, - "sentence_nr": 7 + "score": 0.43330223254789785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.5334329403985332, - "sentence_nr": 7 + "score": 0.5564499529933307, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.20039141607873007, - "sentence_nr": 7 + "score": 0.1466607445607986, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.36123312088832493, - "sentence_nr": 7 + "score": 0.36552963821230766, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.20527494029659898, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.0564437248458207, - "sentence_nr": 7 + "score": 0.43586475049009993, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.15026037463138217, - "sentence_nr": 7 + "score": 0.3047577636054668, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.4303467795130825, - "sentence_nr": 7 + "score": 0.48318512703629857, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.207314191412716, - "sentence_nr": 7 + "score": 0.2993081268625724, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.4360555836773355, - "sentence_nr": 7 + "score": 0.47777429598730525, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.08070632004040007, - "sentence_nr": 7 + "score": 0.12340057804403023, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.35911678207067443, - "sentence_nr": 7 + "score": 0.3331532512757645, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.4398690431123469, - "sentence_nr": 7 + "score": 0.3196191720459511, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.6046405925677363, - "sentence_nr": 7 + "score": 0.4758634857690128, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.30594422683254774, - "sentence_nr": 7 + "score": 0.06692436199443168, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.5439400651386468, - "sentence_nr": 7 + "score": 0.23947877713211682, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.24968557018529272, - "sentence_nr": 7 + "score": 0.3095674062940522, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.5511430757077329, - "sentence_nr": 7 + "score": 0.49847201920427264, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.036093834539820895, - "sentence_nr": 7 + "score": 0.08748671768279999, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.1939545119098376, - "sentence_nr": 7 + "score": 0.01250047619586174, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.48320036215224016, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.4686201168430013, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.6206226468359836, - "sentence_nr": 7 + "score": 0.12383271014582256, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.32707695373369694, - "sentence_nr": 7 + "score": 0.41807822202441103, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5166643606783462, - "sentence_nr": 7 + "score": 0.577545891208518, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.3069937936246452, - "sentence_nr": 7 + "score": 0.3475258894340562, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5024648105961349, - "sentence_nr": 7 + "score": 0.5729813197277963, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.3575909322256676, - "sentence_nr": 7 + "score": 0.4610791064938662, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5409483829147745, - "sentence_nr": 7 + "score": 0.5982792041883009, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.2228729825024992, - "sentence_nr": 7 + "score": 0.4642320266834861, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.4466759653076362, - "sentence_nr": 7 + "score": 0.6555816107178817, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.3235473265529593, - "sentence_nr": 7 + "score": 0.35312894221988256, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5441122251341168, - "sentence_nr": 7 + "score": 0.5483853808672988, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.2781578586520005, - "sentence_nr": 7 + "score": 0.5148124488217735, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.3796663901127053, - "sentence_nr": 7 + "score": 0.6443063241500353, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.3288143137394372, - "sentence_nr": 7 + "score": 0.6427952895393818, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5596092732231619, - "sentence_nr": 7 + "score": 0.7611062226622591, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.5772497332582994, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6924288924783911, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.4122335241726334, - "sentence_nr": 7 + "score": 0.5908781325191875, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6323888082640657, - "sentence_nr": 7 + "score": 0.7135251491959551, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.12858902882463447, - "sentence_nr": 7 + "score": 0.3202249300680136, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.3148709023566568, - "sentence_nr": 7 + "score": 0.5240877863757325, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.17140863043800483, - "sentence_nr": 7 + "score": 0.3021474642420404, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.39681418211766745, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.3722876193273297, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5622245182354383, - "sentence_nr": 7 + "score": 0.49488718577709084, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.3927237741677927, - "sentence_nr": 7 + "score": 0.20238796310390209, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.7451438087039315, - "sentence_nr": 7 + "score": 0.5665467522687606, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.5570357635362685, - "sentence_nr": 7 + "score": 0.4612469192468151, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.8116469942298856, - "sentence_nr": 7 + "score": 0.689540484203802, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.3508597296865219, - "sentence_nr": 7 + "score": 0.4688149931788708, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.6833592152043626, - "sentence_nr": 7 + "score": 0.6935921004770637, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.21259470439331316, - "sentence_nr": 7 + "score": 0.31907001507985117, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.5863866793721222, - "sentence_nr": 7 + "score": 0.6396787125445289, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.26513488970168847, - "sentence_nr": 7 + "score": 0.3473313422920779, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.6029932145447834, - "sentence_nr": 7 + "score": 0.6495220842154038, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.4432782054917686, - "sentence_nr": 7 + "score": 0.15605718228191343, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.7181569025811343, - "sentence_nr": 7 + "score": 0.48559902973042135, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.5112867162620864, - "sentence_nr": 7 + "score": 0.48361343491637904, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.7497537018148864, - "sentence_nr": 7 + "score": 0.7036055457806847, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.32547291366749675, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.6738469931497133, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.4667782254569818, - "sentence_nr": 7 + "score": 0.38248883198762607, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.7754094279644977, - "sentence_nr": 7 + "score": 0.6879676788132258, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.10177931989613292, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.3335479382455017, - "sentence_nr": 7 + "score": 0.13828520206051664, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.3459167762620119, - "sentence_nr": 7 + "score": 0.1943642066325126, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.7155724078484401, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.8588886238396082, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.9454872546266168, - "sentence_nr": 7 + "score": 0.5437845506934148, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.3826576187198625, - "sentence_nr": 7 + "score": 0.45718638941364104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.6071841372061269, - "sentence_nr": 7 + "score": 0.6765316874457515, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.3447241447679157, - "sentence_nr": 7 + "score": 0.2842437601270078, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5531085140985558, - "sentence_nr": 7 + "score": 0.616554183160495, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.3178743908080705, - "sentence_nr": 7 + "score": 0.2790392444035496, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5513949312034092, - "sentence_nr": 7 + "score": 0.61628786229813, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.1883251048230039, - "sentence_nr": 7 + "score": 0.1969296745448077, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.45303225382772006, - "sentence_nr": 7 + "score": 0.5972281991369082, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.3860973950960897, - "sentence_nr": 7 + "score": 0.3319181496626261, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.6271680934322363, - "sentence_nr": 7 + "score": 0.6296213700542458, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.24967756802190116, - "sentence_nr": 7 + "score": 0.3239007562192641, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.559682285505658, - "sentence_nr": 7 + "score": 0.632048088218684, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.38048895490051765, - "sentence_nr": 7 + "score": 0.3668099116926436, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.6349497388372479, - "sentence_nr": 7 + "score": 0.6472192230833502, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.3437020087720264, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6253594148257299, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.4324371049196428, - "sentence_nr": 7 + "score": 0.2912353795089198, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.6305851137521162, - "sentence_nr": 7 + "score": 0.6107975281071784, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.2493518765085485, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.01937817581496422, - "sentence_nr": 7 + "score": 0.5378176060849199, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.1805414152287055, - "sentence_nr": 7 + "score": 0.3199241796546606, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.4894585255537274, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.391005181589246, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.6383641960193629, - "sentence_nr": 7 + "score": 0.6151079711025308, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.31128635710849173, - "sentence_nr": 7 + "score": 0.6260866791475674, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6304411194127884, - "sentence_nr": 7 + "score": 0.7122695616091047, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.13308561809919006, - "sentence_nr": 7 + "score": 0.3856140451435003, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5312476702183977, - "sentence_nr": 7 + "score": 0.5942230347389459, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.2579124920342433, - "sentence_nr": 7 + "score": 0.47809357926196877, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5545120254366757, - "sentence_nr": 7 + "score": 0.6515239202890919, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.1595487507830045, - "sentence_nr": 7 + "score": 0.45010740912530395, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.45111566089364774, - "sentence_nr": 7 + "score": 0.6258219368613708, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.27710310401156996, - "sentence_nr": 7 + "score": 0.5145500336945869, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5216248191624099, - "sentence_nr": 7 + "score": 0.6710712793486331, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.303998162324503, - "sentence_nr": 7 + "score": 0.6152720493266995, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5931856951819833, - "sentence_nr": 7 + "score": 0.7135410085152943, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.39631066492420963, - "sentence_nr": 7 + "score": 0.6505785903453039, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6670602127484115, - "sentence_nr": 7 + "score": 0.7498687328904513, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.2625805454451497, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5886806140244891, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.06797010899515823, - "sentence_nr": 7 + "score": 0.32481728488530576, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.27154181329396565, - "sentence_nr": 7 + "score": 0.5086455217662933, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.19568007857684672, - "sentence_nr": 7 + "score": 0.5609895346624986, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5228407307909605, - "sentence_nr": 7 + "score": 0.681370247009325, + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.49680276687617775, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.7190025833795584, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.2879556779114461, - "sentence_nr": 7 + "score": 0.025127088788317715, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.4554184077174173, - "sentence_nr": 7 + "score": 0.21031980892802613, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.09578921953028982, - "sentence_nr": 7 + "score": 0.14721260533033206, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.40472887922389433, - "sentence_nr": 7 + "score": 0.2721389124032325, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.22965669823067916, - "sentence_nr": 7 + "score": 0.31315822356102974, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.46247819390492995, - "sentence_nr": 7 + "score": 0.37596627611906025, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.08920952468433085, - "sentence_nr": 7 + "score": 0.16777402882140335, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.32241875701400735, - "sentence_nr": 7 + "score": 0.2760884586584135, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.20475739007221866, - "sentence_nr": 7 + "score": 0.29427156769985635, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.3934874462686164, - "sentence_nr": 7 + "score": 0.38674923884011136, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.1719646079342664, - "sentence_nr": 7 + "score": 0.2923799210557074, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.308102700736633, - "sentence_nr": 7 + "score": 0.37275082068184157, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.11684343186914438, - "sentence_nr": 7 + "score": 0.28256258503905557, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.40293579310759836, - "sentence_nr": 7 + "score": 0.4403352184603733, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.26468987958610807, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.3655402609394103, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.28255079601170635, - "sentence_nr": 7 + "score": 0.08149835562288019, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.4828223682720399, - "sentence_nr": 7 + "score": 0.22869808598813696, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.12394169238067233, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.2023651649328507, - "sentence_nr": 7 + "score": 0.19833625372206998, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.1689706894436884, - "sentence_nr": 7 + "score": 0.17671397515361123, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.32609144958957464, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.3881647386960232, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5417015630917802, - "sentence_nr": 7 + "score": 0.2956531697676844, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.348007986647201, - "sentence_nr": 8 + "score": 0.7017829861193574, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.6148736550683231, - "sentence_nr": 8 + "score": 0.7743327021667388, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.31222258402876674, - "sentence_nr": 8 + "score": 0.6961795371760597, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5549937870516303, - "sentence_nr": 8 + "score": 0.7859480663394858, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2706573913259733, - "sentence_nr": 8 + "score": 0.5643442092080923, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5619563043714905, - "sentence_nr": 8 + "score": 0.7005543453411931, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.21331098311931576, - "sentence_nr": 8 + "score": 0.5432312750246535, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.47660259733052845, - "sentence_nr": 8 + "score": 0.6677259864784132, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.22816849039973935, - "sentence_nr": 8 + "score": 0.6837528314895732, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5295534280606148, - "sentence_nr": 8 + "score": 0.7968789890147058, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2320305803246989, - "sentence_nr": 8 + "score": 0.4391684160269219, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5719371199531044, - "sentence_nr": 8 + "score": 0.6371098202414471, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.7555875294328935, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5196627001050362, - "sentence_nr": 8 + "score": 0.8049022687045564, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.18627639656696823, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.44356601067804086, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.27048170758554296, - "sentence_nr": 8 + "score": 0.6443411340522405, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.5452157067944216, - "sentence_nr": 8 + "score": 0.7335999563315522, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2285369650225378, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.3324437360240581, - "sentence_nr": 8 + "score": 0.4750387664265888, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5975003598259766, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.3472164938104332, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.33464494273746426, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.6019539804372768, - "sentence_nr": 8 + "score": 0.7683913390959731, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.23683075175361493, - "sentence_nr": 8 + "score": 0.14118350058219528, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.2631328190836655, - "sentence_nr": 8 + "score": 0.20431837779877604, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.1811004938014804, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.16455392433653304, - "sentence_nr": 8 + "score": 0.2649993136544717, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.14089011087858522, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.13673885815184886, - "sentence_nr": 8 + "score": 0.21944603811527294, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -43078,95 +40171,95 @@ "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.052821402483564636, - "sentence_nr": 8 + "score": 0.12501819027374758, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2360941227140328, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.10721126066665879, - "sentence_nr": 8 + "score": 0.35939098278145853, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.1273192735797341, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.20388486867467934, - "sentence_nr": 8 + "score": 0.22231961416584312, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3360376952328008, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.14973178994918127, - "sentence_nr": 8 + "score": 0.35297640449956286, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.13680836462007476, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.1337840368142243, - "sentence_nr": 8 + "score": 0.24537888283181183, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.21687218788036394, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.2143764616947716, - "sentence_nr": 8 + "score": 0.35659125027777805, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -43174,159 +40267,143 @@ "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.014262006975939606, - "sentence_nr": 8 + "score": 0.1017839169529136, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.14549060082020032, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.11557977235371186, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.3291598889023262, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.31026575785653826, - "sentence_nr": 8 + "score": 0.22054620758680943, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.461597801606675, - "sentence_nr": 8 + "score": 0.20721924345714232, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6280777654467244, - "sentence_nr": 8 + "score": 0.36475932190367044, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.4224298950114519, - "sentence_nr": 8 + "score": 0.11386607947762988, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.60823085524287, - "sentence_nr": 8 + "score": 0.33564583347921473, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.16862356321891248, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.3637462812267946, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.30451258861070496, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.4983778740634126, - "sentence_nr": 8 + "score": 0.1485432117087218, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 8 + "score": 0.23649053182388327, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.5707860320039717, - "sentence_nr": 8 + "score": 0.4127382174759535, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.2392792151449317, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.388678103641788, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.45886678012586496, - "sentence_nr": 8 + "score": 0.34101364633474157, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6234514801756209, - "sentence_nr": 8 + "score": 0.5758572581135913, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -43334,7 +40411,7 @@ "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -43342,375 +40419,343 @@ "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.2026639468552004, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.4471011187469559, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.15465401249808575, - "sentence_nr": 8 + "score": 0.11622323415479685, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.42501995363729067, - "sentence_nr": 8 + "score": 0.10826694406224016, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.30004556274899286, - "sentence_nr": 8 + "score": 0.180048782148418, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.561482333900969, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.4501609222100726, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.6301404717605862, - "sentence_nr": 8 + "score": 0.3772586334343914, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.46442643702863534, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.36769040719718776, - "sentence_nr": 8 + "score": 0.5519480629125156, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6268941789647348, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.4064141882459388, - "sentence_nr": 8 + "score": 0.6958291103494518, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4554740717077828, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.34722897369611144, - "sentence_nr": 8 + "score": 0.5498766350188072, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.35817810808590844, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.4103553163121394, - "sentence_nr": 8 + "score": 0.5012707040525209, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.570135897056151, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.4141871474340027, - "sentence_nr": 8 + "score": 0.6801332690579707, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.349335635815966, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.36586001924521905, - "sentence_nr": 8 + "score": 0.4827709277987172, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4692880637764782, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.44328515185259987, - "sentence_nr": 8 + "score": 0.5773610754678101, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.34182319563232233, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.2870169689559038, - "sentence_nr": 8 + "score": 0.5052410644804232, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5796814083647206, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.37150797394258683, - "sentence_nr": 8 + "score": 0.6364369549208913, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2985280444159845, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.006130367300589213, - "sentence_nr": 8 + "score": 0.5072627289039213, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.49402195020645817, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.29038853710161877, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.35427389686155986, - "sentence_nr": 8 + "score": 0.583821485566765, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.3286711939680359, - "sentence_nr": 8 + "score": 0.22837680015088951, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5944310794747374, - "sentence_nr": 8 + "score": 0.44164180234500505, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.13547277341758465, - "sentence_nr": 8 + "score": 0.4151474543103342, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.4830189619506113, - "sentence_nr": 8 + "score": 0.638952468710771, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.22970092088416938, - "sentence_nr": 8 + "score": 0.4355097603079957, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5537467826528029, - "sentence_nr": 8 + "score": 0.6511365998081735, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.13547277341758465, - "sentence_nr": 8 + "score": 0.2468185992183292, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.4685134392551311, - "sentence_nr": 8 + "score": 0.46792167630295967, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.3096036988813059, - "sentence_nr": 8 + "score": 0.21305368975019265, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5894510883198948, - "sentence_nr": 8 + "score": 0.4371748197696026, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.14957644445778928, - "sentence_nr": 8 + "score": 0.08919951949408464, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.4378856092523028, - "sentence_nr": 8 + "score": 0.2986174009048306, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.22481074167380632, - "sentence_nr": 8 + "score": 0.38791552573256816, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.49840634234674935, - "sentence_nr": 8 + "score": 0.5723637874192081, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -43718,7 +40763,7 @@ "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -43726,23 +40771,23 @@ "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.26751157705127454, - "sentence_nr": 8 + "score": 0.4305675865000082, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5494472552960327, - "sentence_nr": 8 + "score": 0.6312508299648723, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -43750,207 +40795,191 @@ "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.0066610108556241394, - "sentence_nr": 8 + "score": 0.011973456545827533, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.22536453058221606, - "sentence_nr": 8 + "score": 0.3002149853465536, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.4365811373563711, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.5354135417523515, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.6499871908570826, - "sentence_nr": 8 + "score": 0.5378189160780977, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.11634129390828839, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.3874773378787974, - "sentence_nr": 8 + "score": 0.31530902302000635, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.31747697264511426, - "sentence_nr": 8 + "score": 0.19544795798162903, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.40797778663955364, - "sentence_nr": 8 + "score": 0.3835451743665027, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.19889333501994313, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.3818556455365969, - "sentence_nr": 8 + "score": 0.3885583772632557, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.3785761836985817, - "sentence_nr": 8 + "score": 0.24480102898506534, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.23841754841770157, - "sentence_nr": 8 + "score": 0.20876900081884944, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.34481325534410395, - "sentence_nr": 8 + "score": 0.3981381071356935, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.3818556455365969, - "sentence_nr": 8 + "score": 0.1582263258709324, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.24796413807329218, - "sentence_nr": 8 + "score": 0.3530704022752377, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.3530186228211094, - "sentence_nr": 8 + "score": 0.521530381948501, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.09821019441701705, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.274825378700542, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.2807763229912453, - "sentence_nr": 8 + "score": 0.21574854574751035, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.38395145132718883, - "sentence_nr": 8 + "score": 0.41940323708656974, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.07218766113019179, - "sentence_nr": 8 + "score": 0.058854097785805734, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.24018250025773352, - "sentence_nr": 8 + "score": 0.17240019222052141, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", @@ -43958,559 +40987,511 @@ "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.2096419313570871, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.3673119644292626, - "sentence_nr": 8 + "score": 0.23425891587078498, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.46935933364934335, - "sentence_nr": 8 + "score": 0.359355103997122, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.773055573548356, - "sentence_nr": 8 + "score": 0.5589602235417395, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.4267520229161, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.640995178057518, - "sentence_nr": 8 + "score": 0.5518115366540288, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.3681829215408091, - "sentence_nr": 8 + "score": 0.4248870612387681, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.6273930299436508, - "sentence_nr": 8 + "score": 0.5641041633033193, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.4389321784429702, - "sentence_nr": 8 + "score": 0.40429429626811253, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.646847036932526, - "sentence_nr": 8 + "score": 0.575799986766, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.37917766663411384, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.6793717376740783, - "sentence_nr": 8 + "score": 0.5365794450039074, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.3595137194874952, - "sentence_nr": 8 + "score": 0.23329145933277767, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.5619162673780028, - "sentence_nr": 8 + "score": 0.44291475401588093, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.30626379803308257, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.6793717376740783, - "sentence_nr": 8 + "score": 0.5077543267123376, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.39963516628793516, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.2927181624015055, - "sentence_nr": 8 + "score": 0.5372822043426468, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.43383878173729606, - "sentence_nr": 8 + "score": 0.38861707449775285, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.721993849834018, - "sentence_nr": 8 + "score": 0.5432656354167995, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.17706333085447226, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.007378883018336222, - "sentence_nr": 8 + "score": 0.4047932836379997, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.30904104300309865, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_to", "metric": "chrf", - "score": 0.16440791304482247, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.4027271257521195, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.7244660400837248, - "sentence_nr": 8 + "score": 0.48677056338263186, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.3160213610127146, - "sentence_nr": 8 + "score": 0.35551034193127495, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5165614670038283, - "sentence_nr": 8 + "score": 0.5627284645723449, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.30758744700466467, - "sentence_nr": 8 + "score": 0.5039752490702457, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4684197705189288, - "sentence_nr": 8 + "score": 0.613669501327356, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.37169237058440824, - "sentence_nr": 8 + "score": 0.36932295883897953, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5383668331525606, - "sentence_nr": 8 + "score": 0.5524455184773474, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.18655267161524258, - "sentence_nr": 8 + "score": 0.080331199191236, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.3640275543948514, - "sentence_nr": 8 + "score": 0.23021641289829473, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.20050320605789015, - "sentence_nr": 8 + "score": 0.3221305290185444, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.4046291070099031, - "sentence_nr": 8 + "score": 0.4866081657424789, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.14579837024705408, - "sentence_nr": 8 + "score": 0.10563809356628297, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.3459916112351503, - "sentence_nr": 8 + "score": 0.2323275601638909, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.2761603007895394, - "sentence_nr": 8 + "score": 0.37818447598700816, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.49207696507318593, - "sentence_nr": 8 + "score": 0.5516941276443429, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.20630721151497294, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.31518520840312125, - "sentence_nr": 8 + "score": 0.34591973979258805, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.5072004558983904, - "sentence_nr": 8 + "score": 0.5197016245837053, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.10020997712284248, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.004516711833785005, - "sentence_nr": 8 + "score": 0.16327778043310373, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_to", "metric": "bleu", - "score": 0.16951909200513385, - "sentence_nr": 8 + "score": 0.24470192769722524, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_to", "metric": "chrf", - "score": 0.3380125247643079, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "bleu", - "score": 0.3163330109126403, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "translation_to", - "metric": "chrf", - "score": 0.4797767411663947, - "sentence_nr": 8 + "score": 0.4583472827584427, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.2961516536011624, - "sentence_nr": 8 + "score": 0.5088535943352446, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.49803924348035766, - "sentence_nr": 8 + "score": 0.625202596789752, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.3459667618766101, - "sentence_nr": 8 + "score": 0.562048819850726, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.6170810606402402, - "sentence_nr": 8 + "score": 0.7192054483864224, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.23623790626704147, - "sentence_nr": 8 + "score": 0.5550041554031738, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.5441149448679464, - "sentence_nr": 8 + "score": 0.6554946147279708, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.35936994872479583, - "sentence_nr": 8 + "score": 0.4406896260480816, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.6492026440953677, - "sentence_nr": 8 + "score": 0.571328063702761, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.4161791450287817, - "sentence_nr": 8 + "score": 0.6260375038358343, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.7054426787013603, - "sentence_nr": 8 + "score": 0.7803415401430737, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 8 + "score": 0.2961648173595504, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.577852219465442, - "sentence_nr": 8 + "score": 0.5022745285039809, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.39688965270008814, - "sentence_nr": 8 + "score": 0.37446819995007063, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.646373332434726, - "sentence_nr": 8 + "score": 0.5965995710194948, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.41110950985436373, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6710923400142267, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.4272870063962341, - "sentence_nr": 8 + "score": 0.47237086893932345, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.6682855797405902, - "sentence_nr": 8 + "score": 0.6521003933528818, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -44518,95 +41499,79 @@ "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.008777992747819234, - "sentence_nr": 8 + "score": 0.31867018346252723, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_to", "metric": "bleu", - "score": 0.12288887055424895, - "sentence_nr": 8 + "score": 0.5261433842307197, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_to", "metric": "chrf", - "score": 0.4148619356639114, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.5640009831507545, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.793311073470687, - "sentence_nr": 8 + "score": 0.709255033821849, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 + "score": 0.3941975148525721, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 + "score": 0.5191046479503385, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.262633940062176, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.41923206553744197, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.3850172427136058, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5264633431241114, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -44614,63 +41579,63 @@ "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.0067104198717751464, - "sentence_nr": 8 + "score": 0.15363234192450648, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.3146726146646545, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.4709531555683, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.9025232868361638, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.9169897590736298, - "sentence_nr": 8 + "score": 0.13978782442553714, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.9709835434146469, - "sentence_nr": 8 + "score": 0.3223419048219805, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.9951728990866464, - "sentence_nr": 8 + "score": 0.5205977846006183, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -44678,7 +41643,7 @@ "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", @@ -44686,2951 +41651,28655 @@ "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 + "score": 0.1764046491640527, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 + "score": 0.3464061249457313, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 0.8935248372106969, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 0.9404428602061264, - "sentence_nr": 8 + "score": 0.18856799944599728, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.20466701735848536, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 + "score": 0.3025868321081519, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.6031612036218008, - "sentence_nr": 9 + "score": 0.2618161850312308, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.736286703381354, - "sentence_nr": 9 + "score": 0.46946589430056646, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.39432344823662835, - "sentence_nr": 9 + "score": 0.4395347891601966, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5943452555220106, - "sentence_nr": 9 + "score": 0.6124294442602769, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.34437686643287496, - "sentence_nr": 9 + "score": 0.33711507396378565, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6090402109312658, - "sentence_nr": 9 + "score": 0.568383173179082, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.39205580893266934, - "sentence_nr": 9 + "score": 0.3222423455530638, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6772940233934857, - "sentence_nr": 9 + "score": 0.5643748237802169, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.43103580001357805, - "sentence_nr": 9 + "score": 0.44898438516407524, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6690742226623104, - "sentence_nr": 9 + "score": 0.6143783254714975, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.478854281434795, - "sentence_nr": 9 + "score": 0.3688091032179454, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6734455797843703, - "sentence_nr": 9 + "score": 0.5618587771651018, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.2981426768485538, - "sentence_nr": 9 + "score": 0.4918904748281632, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.5882799317365235, - "sentence_nr": 9 + "score": 0.6551168488426827, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.22739562220830442, - "sentence_nr": 9 + "score": 0.44411177326152307, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.49805301036023364, - "sentence_nr": 9 + "score": 0.6264654386006935, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.4263005628892719, - "sentence_nr": 9 + "score": 0.1456085160245154, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6688425476017256, - "sentence_nr": 9 + "score": 0.4152567008092657, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.4579102348988084, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6417119032346416, - "sentence_nr": 9 + "score": 0.30284835181827113, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "bleu", - "score": 0.6252078221435556, - "sentence_nr": 9 + "score": 0.3991070933698779, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.7406162627381982, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.509851600045062, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "translation_to", "metric": "chrf", - "score": 0.6689059150119564, - "sentence_nr": 9 + "score": 0.5925781167136664, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.11254397891886614, - "sentence_nr": 9 + "score": 0.13139413594401378, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.20623288988983426, - "sentence_nr": 9 + "score": 0.2490406851204271, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.1100081929352474, - "sentence_nr": 9 + "score": 0.3112317271723676, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.18967061672400035, - "sentence_nr": 9 + "score": 0.4361597730424806, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.10772332006118607, - "sentence_nr": 9 + "score": 0.36717349445307196, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.23609036869909603, - "sentence_nr": 9 + "score": 0.4938275160496472, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.1498435848533153, - "sentence_nr": 9 + "score": 0.32679491753274487, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.21051700087939107, - "sentence_nr": 9 + "score": 0.5061357551531296, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.10772332006118607, - "sentence_nr": 9 + "score": 0.4850978822371748, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.17652714369664665, - "sentence_nr": 9 + "score": 0.6352541213631081, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.1864036495127383, - "sentence_nr": 9 + "score": 0.1597896899620504, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.28188465375440136, - "sentence_nr": 9 + "score": 0.49086962788909555, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.1445047538382198, - "sentence_nr": 9 + "score": 0.5420104447784688, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.2737322242154943, - "sentence_nr": 9 + "score": 0.6785083781968722, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.10686832559533661, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.20609270360853799, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.1957899789117337, - "sentence_nr": 9 + "score": 0.36033300506928556, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.32253417440653254, - "sentence_nr": 9 + "score": 0.5012711439020835, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.137248043368656, - "sentence_nr": 9 + "score": 0.1975941306622024, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.22329074990170197, - "sentence_nr": 9 + "score": 0.27520597189594015, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "bleu", - "score": 0.14969363386531168, - "sentence_nr": 9 + "score": 0.2983588344542972, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.27820986095394096, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "translation_to", "metric": "chrf", - "score": 0.21051971584146587, - "sentence_nr": 9 + "score": 0.5041326432925124, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.24493390281390082, - "sentence_nr": 9 + "score": 0.39997687282627975, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.48113625107113883, - "sentence_nr": 9 + "score": 0.5906362815628093, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.19476681308252697, - "sentence_nr": 9 + "score": 0.5208833700498166, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.42030407727741037, - "sentence_nr": 9 + "score": 0.6368157603637512, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.33600502687041833, - "sentence_nr": 9 + "score": 0.5248317090186142, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5162346121569341, - "sentence_nr": 9 + "score": 0.6599904808886127, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.10336049249219333, - "sentence_nr": 9 + "score": 0.3727105527986878, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.3088863284587533, - "sentence_nr": 9 + "score": 0.5475770179024447, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.27190910124573536, - "sentence_nr": 9 + "score": 0.49612267717096975, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5173567851798608, - "sentence_nr": 9 + "score": 0.6218353723304708, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.09851325694216304, - "sentence_nr": 9 + "score": 0.425433767253164, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.3616605984753398, - "sentence_nr": 9 + "score": 0.5818873909634904, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.34854547753540127, - "sentence_nr": 9 + "score": 0.5781534325005774, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.5565027260893921, - "sentence_nr": 9 + "score": 0.744306267545875, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.11215313654295675, - "sentence_nr": 9 + "score": 0.5567668766994894, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.36001328873605765, - "sentence_nr": 9 + "score": 0.7126872503504181, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.19593487880196195, - "sentence_nr": 9 + "score": 0.5809669142768361, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.4136765523891332, - "sentence_nr": 9 + "score": 0.7168714328499641, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.21025696416672812, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.08839512340686698, - "sentence_nr": 9 + "score": 0.39080193524562357, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "bleu", - "score": 0.21177781620127928, - "sentence_nr": 9 + "score": 0.43929751176084064, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", - "metric": "chrf", - "score": 0.4460741740050364, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.19202937871014814, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "id", "task": "translation_to", "metric": "chrf", - "score": 0.4392268366970299, - "sentence_nr": 9 + "score": 0.5889147220911218, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.3459789902390003, - "sentence_nr": 9 + "score": 0.1743988338080954, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5620330456296532, - "sentence_nr": 9 + "score": 0.4684683280769817, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.37825713491091884, - "sentence_nr": 9 + "score": 0.3335763231736967, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5584414289480568, - "sentence_nr": 9 + "score": 0.4822714438205533, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.3695375029926146, - "sentence_nr": 9 + "score": 0.24628203589181794, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.556875129479421, - "sentence_nr": 9 + "score": 0.47490747232624714, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.47923168144435746, - "sentence_nr": 9 + "score": 0.2865635502271963, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6534660189132082, - "sentence_nr": 9 + "score": 0.43207538722163397, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.429512074830509, - "sentence_nr": 9 + "score": 0.3501847839621347, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6066779955199886, - "sentence_nr": 9 + "score": 0.5180344374850399, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.4257605183794877, - "sentence_nr": 9 + "score": 0.21195371406845798, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6026940597371309, - "sentence_nr": 9 + "score": 0.4061072764676573, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.40518022025671885, - "sentence_nr": 9 + "score": 0.3483718729405163, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5707666164180741, - "sentence_nr": 9 + "score": 0.4980927055323402, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.27460305577138294, - "sentence_nr": 9 + "score": 0.4522982965509296, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5251472574042976, - "sentence_nr": 9 + "score": 0.5291817462027207, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.4257605183794877, - "sentence_nr": 9 + "score": 0.32488958976180393, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.6072620760408021, - "sentence_nr": 9 + "score": 0.5442618574958996, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.24287220388451114, - "sentence_nr": 9 + "score": 0.11781301843777481, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.4829182994799567, - "sentence_nr": 9 + "score": 0.20219186255257193, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "bleu", - "score": 0.27309322054464596, - "sentence_nr": 9 + "score": 0.35111125142401484, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", - "metric": "chrf", - "score": 0.5162255850430824, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "translation_to", - "metric": "bleu", - "score": 0.370828716498988, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "de", "task": "translation_to", "metric": "chrf", - "score": 0.5662656160148991, - "sentence_nr": 9 + "score": 0.47360088422177105, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.1845747513433909, - "sentence_nr": 9 + "score": 0.30485765641951534, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.44379971518505973, - "sentence_nr": 9 + "score": 0.4770499606054267, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.18212463619188357, - "sentence_nr": 9 + "score": 0.33469420519942356, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.469592540371137, - "sentence_nr": 9 + "score": 0.40802446160905737, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.20734616999079872, - "sentence_nr": 9 + "score": 0.4747315561546192, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5036833880605232, - "sentence_nr": 9 + "score": 0.5626366882998202, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.2817686971402115, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5170853673805775, - "sentence_nr": 9 + "score": 0.13084917716157846, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.09596136927307748, - "sentence_nr": 9 + "score": 0.41618377742781326, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.40849147213099996, - "sentence_nr": 9 + "score": 0.5456804815374756, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.216062485604554, - "sentence_nr": 9 + "score": 0.23360210277843085, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.4780977009860418, - "sentence_nr": 9 + "score": 0.3931394603219493, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.23502778906204924, - "sentence_nr": 9 + "score": 0.22256734278223791, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5533644883224328, - "sentence_nr": 9 + "score": 0.3765773373060594, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.23288432092807593, - "sentence_nr": 9 + "score": 0.16910165945030708, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.4777685664632553, - "sentence_nr": 9 + "score": 0.2980085795836986, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.3288562544630599, - "sentence_nr": 9 + "score": 0.4575089222077589, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5875530351959068, - "sentence_nr": 9 + "score": 0.5374740662953226, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.05780839041831641, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.024449792954766115, - "sentence_nr": 9 + "score": 0.09190080998528566, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "bleu", - "score": 0.25748397762867226, - "sentence_nr": 9 + "score": 0.1704823945286264, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.49702079004924316, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "translation_to", - "metric": "bleu", - "score": 0.33663600853613573, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation_to", "metric": "chrf", - "score": 0.5875950506541368, - "sentence_nr": 9 + "score": 0.33422996692133256, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2517176762753373, - "sentence_nr": 9 + "score": 0.8761560783209453, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.45137344500317134, - "sentence_nr": 9 + "score": 0.9484564543183253, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.3128384316903283, - "sentence_nr": 9 + "score": 0.7505336182671021, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.48016279207050283, - "sentence_nr": 9 + "score": 0.8401910628269498, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.3508847643803501, - "sentence_nr": 9 + "score": 0.8761560783209453, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.529198044527105, - "sentence_nr": 9 + "score": 0.9289416300153619, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.2674628639054191, - "sentence_nr": 9 + "score": 0.4450050658086207, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.4784292149775752, - "sentence_nr": 9 + "score": 0.7558874882119336, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.22972631482860506, - "sentence_nr": 9 + "score": 0.8107492451395732, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.436102988762466, - "sentence_nr": 9 + "score": 0.900032747778274, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.1587543502252646, - "sentence_nr": 9 + "score": 0.8761560783209453, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.4114443619817223, - "sentence_nr": 9 + "score": 0.9484564543183253, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.267457541157426, - "sentence_nr": 9 + "score": 0.8761560783209453, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.4673846703066711, - "sentence_nr": 9 + "score": 0.9484564543183253, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.21808070471467408, - "sentence_nr": 9 + "score": 0.7406375008540003, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.3966492622645894, - "sentence_nr": 9 + "score": 0.9160988509714175, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.26116607863611285, - "sentence_nr": 9 + "score": 0.3132252321342574, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "bleu", - "score": 0.09196922936475649, - "sentence_nr": 9 + "score": 0.35025412310639736, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.35950194744727476, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.2661882195219029, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "en", "task": "translation_to", "metric": "chrf", - "score": 0.3867524502755999, - "sentence_nr": 9 + "score": 0.6825372617659788, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.43200638115383627, - "sentence_nr": 9 + "score": 0.13453927150397377, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.6892273787708799, - "sentence_nr": 9 + "score": 0.10522974272748564, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.341195158470265, - "sentence_nr": 9 + "score": 0.22055493694673897, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.6539473951166187, - "sentence_nr": 9 + "score": 0.3931965048763613, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.47372467075851415, - "sentence_nr": 9 + "score": 0.13755274871304535, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.734800469477975, - "sentence_nr": 9 + "score": 0.10397715306705207, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.5582838437615822, - "sentence_nr": 9 + "score": 0.13899941210887606, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.7625459507115938, - "sentence_nr": 9 + "score": 0.10947303419437356, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.45026965676007474, - "sentence_nr": 9 + "score": 0.13725861056573663, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.6668256174353906, - "sentence_nr": 9 + "score": 0.11147384852362276, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.310668922100995, - "sentence_nr": 9 + "score": 0.1350501875730652, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.58212864821275, - "sentence_nr": 9 + "score": 0.11147384852362276, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.5585674160229753, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.791505922278621, - "sentence_nr": 9 + "score": 0.09408024740752835, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.5592126620745396, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.6961094171330644, - "sentence_nr": 9 + "score": 0.054674609450212665, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.29504037076486817, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.6837809127705262, - "sentence_nr": 9 + "score": 0.09895358918308976, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.44680913024590146, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "bleu", - "score": 0.648473971864945, - "sentence_nr": 9 + "score": 0.11538184104597694, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", - "metric": "chrf", - "score": 0.8247818102038394, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "translation_to", - "metric": "bleu", - "score": 0.6040638744786117, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "zh", "task": "translation_to", "metric": "chrf", - "score": 0.7781313195018753, - "sentence_nr": 9 + "score": 0.09204268041910899, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6885326214539055, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.40475700826319555, - "sentence_nr": 9 + "score": 0.8229812189228393, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.839587623092576, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.4349871720911447, - "sentence_nr": 9 + "score": 0.9096086668952811, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.7267072830982378, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.47497024539412314, - "sentence_nr": 9 + "score": 0.8396959977515368, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6374950652411382, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.3805666011451541, - "sentence_nr": 9 + "score": 0.6643984252563968, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.839587623092576, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.4710260495003035, - "sentence_nr": 9 + "score": 0.9096086668952811, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5821747317554493, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.4677317890018283, - "sentence_nr": 9 + "score": 0.7539119883011114, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6885326214539055, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.354353831625583, - "sentence_nr": 9 + "score": 0.8328652216139806, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.10235881838919027, - "sentence_nr": 9 + "score": 0.839587623092576, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.42794399630326124, - "sentence_nr": 9 + "score": 0.9096086668952811, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.251696695878184, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.09649622940465846, - "sentence_nr": 9 + "score": 0.5710821658681214, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.7498810286408993, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "hi", "task": "translation_to", "metric": "chrf", - "score": 0.29275810079464665, - "sentence_nr": 9 + "score": 0.7886148242134857, + "sentence_nr": 3 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3480442076026084, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6142483232997242, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2852636439147137, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6732018003142922, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2104783778565715, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6117499551501043, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.18814785746917081, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5692328972915052, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_to", "metric": "chrf", - "score": 0.33453241597890554, - "sentence_nr": 9 + "score": 0.589811312024197, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.31072931460421827, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4306285422638574, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.5244380103905697, - "sentence_nr": 9 + "score": 0.17401517708317762, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6914581279144536, - "sentence_nr": 9 + "score": 0.45006261596496794, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.4472834999328078, - "sentence_nr": 9 + "score": 0.151240443751577, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6457130269652316, - "sentence_nr": 9 + "score": 0.4224869587588239, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.403469748891042, - "sentence_nr": 9 + "score": 0.151240443751577, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5836273992135024, - "sentence_nr": 9 + "score": 0.4224869587588239, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.4521209970489246, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6307076431103672, - "sentence_nr": 9 + "score": 0.26860011657329247, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.310186302993101, - "sentence_nr": 9 + "score": 0.14172292406325543, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5434540129901786, - "sentence_nr": 9 + "score": 0.4762857001428092, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.5201565256464291, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.6663170490872967, - "sentence_nr": 9 + "score": 0.4800955244005148, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.5950978682255068, - "sentence_nr": 9 + "score": 0.16234676720992364, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.7209575532500453, - "sentence_nr": 9 + "score": 0.48137970077362496, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.32679491753274487, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5592874366443522, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.36634140441362645, - "sentence_nr": 9 + "score": 0.16853790965501372, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5925773491774018, - "sentence_nr": 9 + "score": 0.5242065098084487, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.2672991324984635, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.5378982230702222, - "sentence_nr": 9 + "score": 0.07810235385630719, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "bleu", - "score": 0.4880149105083363, - "sentence_nr": 9 + "score": 0.10401577613691954, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.7177464929662396, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.6095141355358055, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "ar", "task": "translation_to", "metric": "chrf", - "score": 0.7260733102028687, - "sentence_nr": 9 + "score": 0.32252336426814965, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.504154287515855, - "sentence_nr": 9 + "score": 0.2028736642487601, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.6074467585243234, - "sentence_nr": 9 + "score": 0.3614856639698008, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.18771816026273827, - "sentence_nr": 9 + "score": 0.2255489037266197, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.37594160796244835, - "sentence_nr": 9 + "score": 0.3954925749722234, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.2828480467326008, - "sentence_nr": 9 + "score": 0.2927057121559396, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.4330386622117487, - "sentence_nr": 9 + "score": 0.4330945753016968, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.26314173809974317, - "sentence_nr": 9 + "score": 0.18038302998635977, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.41943156806161835, - "sentence_nr": 9 + "score": 0.320678468026793, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "bleu", - "score": 0.17207258849758605, - "sentence_nr": 9 + "score": 0.21850594525107195, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ur", "task": "translation_to", "metric": "chrf", - "score": 0.3052503498954155, - "sentence_nr": 9 + "score": 0.4049269026117245, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21233470585998818, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3630016390465325, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22325877055095214, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38760873730223866, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2741229265391949, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3721657350281369, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33805023952655533, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.23398197530631124, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2929807168354841, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5975595069845072, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14908960803395838, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4761746966391582, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2466674257522263, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5677534942306638, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.684329671666446, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666935927206881, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7886059879769752, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3854501214118697, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.595779023757305, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1999934463074552, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.30520457148036917, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4113125177363443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42808075762838727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32685141385924577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3758692873615971, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.383916695249631, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3804672236690253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.433708341935832, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4678134833959513, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5051480556620123, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4028998029112093, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43422338821405304, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.46360731056064436, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5726015901952585, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43771936994910393, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2041405149858879, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2728627798814474, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36592034784584504, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6237774736059616, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8500131524897436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.573764722928549, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.817979859532479, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6224956012824276, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.21449459478473423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528501353073614, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27341185048222727, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6411651849711889, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4041187386794465, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6888233111124319, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5645815242299279, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8151453923340255, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972046851135996, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.24570408832734913, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5699365673055954, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8363600587440573, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9912737182609732, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.6018154975998465, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7669980679050217, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.20164065196183215, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3923533979663226, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4212555584968603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.44264089366400194, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2750774388281557, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.34712156908889796, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.31322885062380607, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3821582738832969, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.27483211854002193, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3332411632883488, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4039853784752083, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.392022469660947, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.48059504328652813, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4081354056739722, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4731088237118851, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.20642179908801722, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19178500195247952, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3514233824120371, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3471790743028735, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5354826964964929, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.14528679532351443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.45626264176882697, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.21146239923180532, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3672220683588613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11365352023191169, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5427832684043266, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.25034600728678114, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.14025775160081475, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.37306669253790053, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.41682189465797687, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7155178722189985, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.37420316460821246, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6713980677832108, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.09520646862489263, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4231985179035766, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.47631009147745074, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6668706097750393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3312076918041707, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5907489769215882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5419642316694008, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8375813242343603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.45069082245075975, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6399673599980337, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.19496249079519765, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3404780052299898, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.648844691127488, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.7708186875078075, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8284786787443572, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.08603520723426224, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.41785071883461133, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.175658807429611, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.47157573411560544, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.20217803037339238, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4743589083194767, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9048530940348648, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4267826722481737, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4346232049071254, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.26104909033290696, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5956068369645927, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4278722851826281, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.10008350737965103, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3399171525144127, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3267507236200133, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.14710052131359536, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2322531742374544, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.19427446513842178, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2821379316874468, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30793937214153166, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.39271105518755994, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1928576545653753, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.27080894796384963, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20828838183973028, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3002418280717453, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2234473632117264, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3517185856118227, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20536337741589905, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.23621003955621192, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.17004486924634224, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.14102929105825548, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.16702356077048272, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.32184315197242147, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4765874091118851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.45911557772276623, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.45022125383821326, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.46874267375238576, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6350593429017282, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6022395694696409, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3407065041529668, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3482814151315599, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.7215691881328408, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.735100789804592, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.700487718300918, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.7205373993220106, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5894567062209923, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6051783687131701, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5614660831213585, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5781117871636209, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5907010930652489, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995581839975431, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.33500599401126563, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.34371117385240735, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.630923553986829, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6458808155334796, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3013901676230198, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.38106012955734714, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.35187745073108273, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2850647115160651, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.43458947791319813, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4033902612785559, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3101159279982649, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3909330178955319, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.217295409663537, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.12173115521158184, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3904544509639755, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.540550443602966, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.36314748337164254, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5392658386159207, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5519360558961294, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.23610158425430544, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.46869487580371916, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5499993547125768, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27062395495883934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4407436716645838, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3711335186021823, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5595427509161435, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2294068720558097, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40458364050078693, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32588643749980295, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32003170276441123, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23088247483586974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4093450185788297, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23189835231884592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44157797833899437, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.11917756990194882, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.37850093315889116, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3330732444230803, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2550184675066243, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4392529322675216, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23721317187079113, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4319465813689286, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.29417113956364643, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4653698220842079, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23088247483586974, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.42828303349678104, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.45237912327122276, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6299071573751139, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23736810439041953, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004852416401387, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2916261378761629, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4106520926894174, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.282764733088686, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3743678965131091, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31430120091187586, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5013155459452984, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16451929399933107, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2957279302594959, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11401282249739858, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3117911565455793, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34019506273883837, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48708558391259515, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33184166448858593, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1676136890247661, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38914692664434314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5605065818946205, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.586853267829013, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36300296341860155, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816676674074003, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3525399760372503, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5798116969849163, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927053212677373, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5108598154804425, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3722001929300059, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5252698638532942, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3514475288270508, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5361569875660316, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.27914759735007616, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3958350231734361, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20974733068050955, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41455868084196934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16086531618356015, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2797876941198672, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3277803741755935, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4267708983045122, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28765408533715414, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046608868073569, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1819722649161304, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44254730215235283, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.25291831689404154, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4482360279074225, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.279600269133294, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48172049854477195, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1258907882951215, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34143648068854054, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11986809949741643, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34629467658248214, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32666181171942305, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5670457942911707, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29175929784144866, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45299010750030405, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.24463910693302512, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2908660541001102, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13305199541830684, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.43244987270004115, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3377385620641691, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5256128450453542, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.44776047557667586, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5801193947715436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2875583820017638, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6087635830564418, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.28295596283263513, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6067794553589253, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20863283213455547, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109257435313587, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2810551683573811, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477048453606161, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.473265686519562, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7021422985630228, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.11810019511256618, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.3708545152745943, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3423375720396189, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5343801172775681, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12546912767038895, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2651343523961406, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12407216162020399, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2664864612493293, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11261597894135422, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.25234827342962907, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12757855945289526, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3385577201847465, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1175771442804648, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3103572690939351, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12629279972753293, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2933944065312711, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12363251371327445, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2692822154793075, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.24470651147480013, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.09410612421964877, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.13696035837771334, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335388002918436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5322520826224556, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.727131667480615, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5266403878479265, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6838125749299477, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6871514991080862, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6999124430022288, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5322520826224556, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.727131667480615, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5523722682139371, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7471196627888963, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5523722682139371, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7219229057874782, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4695966835778606, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6515328250192374, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5002744991426422, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6429454824803486, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.40961777715484393, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5773184063472755, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5331034421473965, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6806929097519565, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2092659579124333, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5354766759595367, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1284866896836278, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.54959682211865, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3099627272480552, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5376391724348849, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.15742483335373852, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.43519517439687405, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.6144246566045058, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7550732449201221, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2238855010644693, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5288881528593262, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.473016146288238, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6981990328342826, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5023049672447087, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6885140390468562, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3649580097673384, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.45686283928900234, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7163619637625416, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5703017172567459, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.78509136371851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5226605904538532, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5272140519221666, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4692685009782657, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5470973834864862, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5563318425026342, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5478041897913022, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.18882437844970767, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5789754712947318, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.43427164452809086, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.526079635392936, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.32001589569502475, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5274343388526991, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.31869191523653845, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5973293882694002, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128418715354195, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5961262622141211, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.302221525161365, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926217012511299, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.289331164128846, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5798018459101258, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2994985311892038, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.611702219968759, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4259108629005092, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5685406243620383, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.41946746288765896, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3162277660168379, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.533113142157349, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.19037861963633804, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5872021106043722, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.289331164128846, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5970102524600497, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666467303030572, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.6471929785766445, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.34835288582718865, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3456033257148638, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20038908500140973, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.38365854681342043, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.15071676257541072, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.28916309026824916, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.18605335292758288, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3760221461307777, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1457684614972261, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.26433094519026357, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.20515691941627118, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5398995684986874, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5601040209287937, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2887138086538547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6342291345998248, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.37589902061551017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.42554151277542873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19272923456045185, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.314589204347422, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562402498959597, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462132320098601, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1277700534498365, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20846991452438368, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.25985341959039815, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462132320098601, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.42988105429544615, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7577244658187771, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5366411241731205, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.825566494253596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8665162960307256, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4360038791211645, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7669087484597642, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4135171000263379, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7050151549073953, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3345794609803645, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7523344918083558, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.46997395980026974, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8114935753258365, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.690216773228096, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4547722460981925, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.793631811653261, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44338575968779337, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6736450219247083, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4165530720734658, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7027805129995731, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4027788021844849, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6872835607174038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40245827940445855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664090181705107, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5552412314880962, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40245827940445855, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664090181705107, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5440766840557734, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7530101164980872, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.482878209362615, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7054264546871626, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22447836580911282, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.635962708232662, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3964122180109575, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.584540734626554, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6537813760269277, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7617489761353242, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.41307323705325416, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5785653391533346, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.32797138117025904, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48645628248697975, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2280299254440877, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4447177675003817, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4549681528678131, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6164314607426773, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.05173101600908794, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.062313574266204104, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.35012358768277246, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816664251371266, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103582047611184, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34895836374229405, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4767378358574124, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4010889714538991, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5642546048162433, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.30145280436636923, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4729753929525169, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35210829264331733, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239651686730163, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38732841080078323, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38584042605633057, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35548377438423956, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5150536106864393, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.35098096867859657, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38561859819475125, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8958039312312598, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6433799261824519, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.81037697367602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5403356450597102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7639130574395125, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5815699184831468, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8158797976578578, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8097013849965253, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8896806148658662, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6161420984415483, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6945809713247855, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4500531895417844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43027065541050147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4933292241270431, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5225247297523148, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23487811400114963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4062284746604391, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3763743474188506, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4017565065239436, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5805399561362194, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4810464260105228, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21510618470971102, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.41380245501613677, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489214645008508, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4519496200669607, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.15415064977510756, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48192435154139673, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49539605131242165, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44728880966754114, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4825434542324755, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5617848264135781, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570886750436929, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5095895501997145, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4647137781420131, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3855522725905196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.587260566914102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6368371029698285, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3666340989897011, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5782960278998768, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32892676518285585, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5800761309604682, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4592978565863154, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29456425448249246, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5691358329649412, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4714472446464193, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376192011637994, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47199515498282607, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.26349889713915725, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12514328743841557, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.34961836061490087, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19650854773882592, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5134302167765095, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5205634208063233, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4397936463531347, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19650854773882592, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5134302167765095, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.507227991110909, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.1739898487873076, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216901258730671, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.48994178177127756, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4916235564562672, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5106109398471469, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7688046995197549, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44778459441351737, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7043336945393497, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.32214112487007024, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7794716829174484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5564992960428438, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129586382458503, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7857394056399366, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.48222455960294414, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7511716303980656, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3494188591554153, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7072510223788713, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4544489097785626, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.8155088724539601, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.39693478727333953, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4824766987096576, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7665355959167616, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6158161554766717, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568982835533138, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4062749424452353, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7717578180410056, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35116777059394766, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7265247193057359, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4373266725468241, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7265900332348232, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6810920170253699, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8151678595510182, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8873630455888943, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8571061116877262, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.906026511295714, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8151678595510182, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8873630455888943, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5386933265263314, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6707532211471023, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.7483293841345244, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8146526693270999, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5124776602965491, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6385609025659063, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.11392322187442314, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.39882161697649804, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31684822717918226, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4984008175596484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31684822717918226, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.41779931059703573, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31684822717918226, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4715336632468998, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.01834337391695103, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5606184355158915, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2765950320972588, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4432072463778114, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20248027846537173, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.435318130545113, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.288122591812262, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.43296860754666744, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450474312451057, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.17415784669090767, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3586870164339305, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5606184355158915, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5561195823338172, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5362935676066722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5803515898273521, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5422220468910552, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4204739940979302, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173824078732066, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4177866849157374, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5243375045345786, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3450219162509876, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3993348853061597, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30978068501889056, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28295274449167956, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.33498389276277546, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3296536654279081, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.42670493571995677, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4251985835808586, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2368693821608258, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.320909989176825, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3223833286593516, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.39475158383309167, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.46146548771819573, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4019452398054806, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33438299066966715, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409759573191787, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5743796566387722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5768306472334509, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.32134504358579785, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5076725973953424, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562150245540302, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47046477830594896, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27182849679730653, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5497265770945076, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3509258729305825, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5379703355059909, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19135220621724439, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4724042181215377, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.38936263771250235, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5443518219250745, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28555753499459907, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3416445560351976, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4049402235047407, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5871644977560334, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45506803308128024, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477506541284608, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7382416555842614, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7568286018427376, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.602867050301643, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7367363357155757, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7542976177437886, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.526357446896968, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666823117022298, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7571125338649978, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3864572432237816, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5849342936087653, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5406438522344627, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6520694800788391, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6977240390484037, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.818984467219358, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7019499719108448, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8450280883390384, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7397087417978795, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8865031414920428, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.37717457428685847, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5554130492458337, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31598923484911084, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18953162992336403, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45876745950873354, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2383770504614087, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.466645869611307, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18207052811092134, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4504432021668592, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.14291173574075158, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45184360988354105, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34419514726440925, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2128497674847141, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.438591227628555, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.27743662258385243, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21081851067789198, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4042801758173556, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7629273292796576, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8510385544954956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.90941532255964, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6888074582865503, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8055061207769505, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6888074582865503, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8151715541788959, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6978429290017016, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7717858931341154, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49349163706233623, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.694445271037971, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3037643089519314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183662698462751, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18376711147874328, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3981272326046884, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.340960560695735, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5217663812589132, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2147607499133801, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3976144917079093, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3941175366175992, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5506555496793699, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3339087646492816, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995623358499859, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4122974402951816, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170911690364487, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2643854378698732, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4066689638009577, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5267604642487788, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.8253498772794055, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8529564805429163, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6941268297866866, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7679844670813416, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7072172847953276, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7914639887327892, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5642761727828352, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7668993520558344, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.175538121835486, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.44197441533246407, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5828833474188783, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7908226509294533, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.38694317759010316, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5953878513137957, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2620499195763038, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.48937240022909234, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22128776529156546, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4999323991212311, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5924993690004501, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5730023382770898, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3070898761263382, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5791648909423264, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.31600229153053044, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5374439094267343, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.27733310601709266, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4703077247331959, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7203673717155472, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5040673596100225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6469962279041276, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49849908693271183, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5106109398471469, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6635467152827634, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.325909498033977, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5501364764829885, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3009687072297843, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5341810386314462, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3140382293917749, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5631437828635808, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7726337964681356, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49849908693271183, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4167743222652789, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4806216298219478, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6311658995293531, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7186969683828063, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.8143071707828088, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.395494817172382, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.48210216762305635, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5029543425204815, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.22543108408457457, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5721164465661742, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5138104164912963, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.42099734580654347, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4773779562574767, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.22119423000583918, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5855963149167847, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.2772639581765057, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6585810035136251, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.16111212240349498, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162765195160328, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.23109536367862135, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5052082359105701, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17991078645928837, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5159811845433955, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1794560313432444, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236301264596329, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45963072970927465, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45963072970927465, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4506310431662278, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35504200505176187, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4492950042617377, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4608738248525917, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4252891537802403, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6269243845872724, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.49831162551286645, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6906494695103921, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7887116805325072, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5694299147290928, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7161974280320248, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.49831162551286645, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6906247423308508, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2819665911730608, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5919372748765395, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2624310277292268, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6025429011085721, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7887116805325072, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7490853969372642, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7869453805471358, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4970449067437269, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5549084692917513, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.47136688868251947, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.532838700147956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.43937095446369234, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.46181721677136944, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3246935344198473, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4118595729651108, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4462203715133425, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5604772871598175, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7490853969372642, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8062091543413888, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5054091115759235, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.6683122485502007, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7378351342269067, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7822638455166255, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.49023502313124495, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7638414724136195, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4424906782646928, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.705507971295129, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41452787844405115, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6973605663974715, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41032302768839235, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6634154486532953, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.44711013370113256, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7319347493436125, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4275810014748856, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481070648129139, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.46409619603227925, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7474126325188408, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0422060018445322, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.27278456488226854, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.42803425515420807, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7260183442795153, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.23962966980870534, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5138361143222901, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3597862823053843, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7212767938301806, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.07860105393900486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09678377693633947, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11601141307045003, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21671187566850864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2390076354901812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.33570154125476054, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1371661844308428, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.23455679137513727, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.21326369102393236, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24781828193168487, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.12394460940540938, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.26662620996190534, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14891504773093184, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2618919111168516, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1406879778177777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24227488458492952, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.04114212836378985, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1070604518443882, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.26411327741267115, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2898946819245943, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35015224715252113, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5701648579139658, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3349252032650068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5908087431574293, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3258812297722265, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5753985304712377, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2883113322808919, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5835478395499368, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2577716972449781, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5171901208397282, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.04631732527976412, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.21558480215297515, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5168980964497457, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24643585808835486, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5459613462641708, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.004597701149425286, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23325505861671614, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.503948422566616, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3449058130015412, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365619830343804, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.29688845677442144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494319015457763, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3257602417321556, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5666596539835803, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22511140285349446, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.553839023223762, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3274016883618531, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570399656004248, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2211880505010663, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4573855767208229, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.399477857457097, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5617218895807364, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.10125638619893, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3191349966700777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5329750656706205, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.09175663647957763, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.32499940569388225, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.162496560019558, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4637542439867255, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376032254696296, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5334329403985332, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20039141607873007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.36123312088832493, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0564437248458207, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.15026037463138217, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4303467795130825, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.207314191412716, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4360555836773355, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.08070632004040007, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.35911678207067443, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4398690431123469, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6046405925677363, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.30594422683254774, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5439400651386468, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.24968557018529272, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5511430757077329, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.036093834539820895, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1939545119098376, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48320036215224016, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32707695373369694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5166643606783462, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3069937936246452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5024648105961349, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3575909322256676, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409483829147745, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2228729825024992, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4466759653076362, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3235473265529593, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441122251341168, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2781578586520005, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3796663901127053, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288143137394372, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5596092732231619, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4122335241726334, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6323888082640657, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.12858902882463447, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3148709023566568, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17140863043800483, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.39681418211766745, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927237741677927, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7451438087039315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5570357635362685, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116469942298856, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508597296865219, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6833592152043626, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21259470439331316, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5863866793721222, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26513488970168847, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6029932145447834, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4432782054917686, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7181569025811343, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5112867162620864, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7497537018148864, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32547291366749675, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6738469931497133, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4667782254569818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7754094279644977, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10177931989613292, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335479382455017, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459167762620119, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7155724078484401, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3826576187198625, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6071841372061269, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3447241447679157, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5531085140985558, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3178743908080705, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5513949312034092, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1883251048230039, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45303225382772006, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3860973950960897, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6271680934322363, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24967756802190116, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.559682285505658, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.38048895490051765, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6349497388372479, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4324371049196428, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6305851137521162, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.01937817581496422, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1805414152287055, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4894585255537274, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.31128635710849173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6304411194127884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13308561809919006, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5312476702183977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2579124920342433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545120254366757, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.1595487507830045, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.45111566089364774, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27710310401156996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216248191624099, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.303998162324503, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5931856951819833, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39631066492420963, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6670602127484115, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2625805454451497, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5886806140244891, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.06797010899515823, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.27154181329396565, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.19568007857684672, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5228407307909605, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2879556779114461, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4554184077174173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.09578921953028982, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40472887922389433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22965669823067916, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46247819390492995, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08920952468433085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32241875701400735, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20475739007221866, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3934874462686164, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1719646079342664, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.308102700736633, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11684343186914438, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40293579310759836, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.28255079601170635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4828223682720399, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2023651649328507, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1689706894436884, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32609144958957464, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.38729516708438194, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6571482446395243, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.32995628251235876, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5982616321404195, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4381454708258676, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6163746220282033, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3614922712385951, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5698489012763526, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4730742700342366, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6716818492415609, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.41278042192714015, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6060858750149657, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.568128598260769, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7443891530963911, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3915774240356112, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6438987110697019, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.17827215716412181, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.31830622503514655, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3173241691310352, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5910464434099775, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3665528144045068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5966563047685359, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2587297749908005, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5132023401682766, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2914897522509679, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5714769597200869, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11150937707712508, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4170915413269471, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.20057225201358211, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.47576367606491715, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1078517242048809, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3886262536746606, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2674907183014193, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5237435675958946, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.21024692077841572, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5403945194972577, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.026104354115338492, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.18449230121441001, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.40058346018376356, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.29894673648596126, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6322815922673689, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.18330256089173447, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5328391139635578, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3478966138007723, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.666271052510266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.21697301406549346, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4876777357531764, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3151295371556651, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6242597159052685, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2672991324984635, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6474323586139361, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5047460217572859, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6981561913726569, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2622053872435742, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6045462235214704, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.09841955325773799, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3327723902928814, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615651536220919, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5337436257798058, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.7416488036617811, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9156308978596118, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2924496936199556, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6040522123603048, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.35611859459201994, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6591871481895288, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3092067388523221, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6104451101668408, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5295672450222603, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.715157413474444, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.33262718496001725, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6707552233208028, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6176355987862611, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7898758502538201, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4475966481812816, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6848118022736988, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.33625310520541907, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4899679589833683, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6996203149315261, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23119301671666287, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.354782287640505, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20419333453691463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.39470297247688435, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.14599223028360678, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.28718685195806315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0680779227699037, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2995728063785384, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.41824297302824903, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1948950171081147, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3579044902117876, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2409168844747761, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.34625648713313856, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.25170309939120067, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3899115496810816, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2646814749718951, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.38312949443875044, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.00882086689569064, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1753792879326568, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3388058023792196, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.348007986647201, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6148736550683231, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.31222258402876674, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5549937870516303, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2706573913259733, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5619563043714905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.21331098311931576, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.47660259733052845, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5295534280606148, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5719371199531044, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5196627001050362, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.18627639656696823, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.44356601067804086, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.27048170758554296, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5452157067944216, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3324437360240581, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3472164938104332, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.23683075175361493, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2631328190836655, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.16455392433653304, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.13673885815184886, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.052821402483564636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10721126066665879, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20388486867467934, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.14973178994918127, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1337840368142243, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2143764616947716, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.014262006975939606, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11557977235371186, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.461597801606675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6280777654467244, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4224298950114519, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.60823085524287, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30451258861070496, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4983778740634126, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707860320039717, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.45886678012586496, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6234514801756209, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15465401249808575, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42501995363729067, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30004556274899286, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.561482333900969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36769040719718776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4064141882459388, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.34722897369611144, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103553163121394, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4141871474340027, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36586001924521905, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.44328515185259987, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.2870169689559038, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.37150797394258683, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.006130367300589213, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.29038853710161877, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3286711939680359, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5944310794747374, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4830189619506113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22970092088416938, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5537467826528029, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4685134392551311, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3096036988813059, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5894510883198948, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14957644445778928, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4378856092523028, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22481074167380632, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.49840634234674935, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.26751157705127454, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494472552960327, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0066610108556241394, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22536453058221606, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4365811373563711, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3874773378787974, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31747697264511426, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.40797778663955364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3785761836985817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23841754841770157, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34481325534410395, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.24796413807329218, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3530186228211094, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09821019441701705, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.274825378700542, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2807763229912453, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38395145132718883, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.07218766113019179, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.24018250025773352, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2096419313570871, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.46935933364934335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.773055573548356, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.640995178057518, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3681829215408091, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6273930299436508, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4389321784429702, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.646847036932526, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3595137194874952, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5619162673780028, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2927181624015055, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43383878173729606, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.721993849834018, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.007378883018336222, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.16440791304482247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3160213610127146, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5165614670038283, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.30758744700466467, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4684197705189288, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37169237058440824, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5383668331525606, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18655267161524258, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3640275543948514, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20050320605789015, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046291070099031, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14579837024705408, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3459916112351503, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2761603007895394, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49207696507318593, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.20630721151497294, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31518520840312125, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072004558983904, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.004516711833785005, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16951909200513385, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3380125247643079, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961516536011624, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.49803924348035766, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459667618766101, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170810606402402, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.23623790626704147, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441149448679464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35936994872479583, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6492026440953677, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4161791450287817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7054426787013603, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.577852219465442, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39688965270008814, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.646373332434726, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4272870063962341, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6682855797405902, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.008777992747819234, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.12288887055424895, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4148619356639114, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0067104198717751464, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9025232868361638, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9169897590736298, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9709835434146469, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9951728990866464, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8935248372106969, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9404428602061264, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4593546097889176, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.608602146246901, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.46662929903381617, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.16341242314728613, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.40072549318878165, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.13952118378975725, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4030284875466178, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2830789070123405, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.41858897147271634, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.1958598294695433, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.43120286814245795, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4844328956731527, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6798474086331312, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.24305650182597577, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.46912278832283355, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.0064546295242688114, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.36092853787943247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3397180516736864, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6494995648532881, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.23811989337799513, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4812080785035883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.23168799483443045, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5252672120228886, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2889285495431631, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4536746865348185, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.42067720018268145, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6309653612961436, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.24821926635843994, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.40551062972908847, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.16310121952537132, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5625465668278802, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4126152034907945, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6344543163574141, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0018234865061998542, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3722685688714949, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.255918614113723, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.44318862516624546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35285733014385007, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5945202859296662, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.34940338846112967, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2506708132952771, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5592183664602846, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22166358657237664, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45834104234305023, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22478920073209205, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4500155517039222, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35285733014385007, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5945202859296662, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2297888516430291, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.44601363908967323, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22478920073209205, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.46169108941910525, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.005304235332926387, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3330162771465545, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2809009542151822, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819888906713027, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4573889291137309, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6974989991762017, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5409314026600619, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8023475129738281, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.11372027710077005, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.43980817368282343, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2460137257692754, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5397894338370378, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5421002898382512, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4717991357336539, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7489646628366208, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5170969057682974, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7999241778608444, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.006260653272080335, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.17193972960972626, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.50925856841751, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3487575221722675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.15824382329465247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2847034639706718, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1307655887510901, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2688265704976335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.010715460821011002, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.27185330211646, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2690830377349408, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.28534353976384025, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.19271102520768202, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3383777404070013, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.010275038134729863, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.29676390087816046, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6031612036218008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.736286703381354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39432344823662835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5943452555220106, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.34437686643287496, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6090402109312658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39205580893266934, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6772940233934857, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.43103580001357805, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6690742226623104, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.478854281434795, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6734455797843703, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2981426768485538, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5882799317365235, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.22739562220830442, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.49805301036023364, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4263005628892719, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6688425476017256, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4579102348988084, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6417119032346416, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6252078221435556, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7406162627381982, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11254397891886614, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20623288988983426, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1100081929352474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.18967061672400035, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.23609036869909603, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1498435848533153, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21051700087939107, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.17652714369664665, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1864036495127383, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28188465375440136, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1445047538382198, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2737322242154943, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10686832559533661, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20609270360853799, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1957899789117337, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.32253417440653254, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.137248043368656, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22329074990170197, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14969363386531168, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27820986095394096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24493390281390082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48113625107113883, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19476681308252697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42030407727741037, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33600502687041833, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162346121569341, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.10336049249219333, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3088863284587533, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27190910124573536, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173567851798608, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.09851325694216304, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3616605984753398, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34854547753540127, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5565027260893921, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11215313654295675, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.36001328873605765, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19593487880196195, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4136765523891332, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.08839512340686698, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.21177781620127928, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4460741740050364, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459789902390003, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5620330456296532, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37825713491091884, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5584414289480568, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3695375029926146, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.556875129479421, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.47923168144435746, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6534660189132082, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.429512074830509, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6066779955199886, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4257605183794877, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6026940597371309, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40518022025671885, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707666164180741, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27460305577138294, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5251472574042976, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4257605183794877, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6072620760408021, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.24287220388451114, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4829182994799567, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27309322054464596, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162255850430824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1845747513433909, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44379971518505973, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.18212463619188357, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.469592540371137, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20734616999079872, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5036833880605232, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2817686971402115, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5170853673805775, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.09596136927307748, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40849147213099996, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.216062485604554, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4780977009860418, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23502778906204924, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5533644883224328, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23288432092807593, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4777685664632553, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288562544630599, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5875530351959068, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.024449792954766115, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25748397762867226, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.49702079004924316, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2517176762753373, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45137344500317134, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128384316903283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48016279207050283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508847643803501, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.529198044527105, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2674628639054191, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4784292149775752, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22972631482860506, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.436102988762466, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1587543502252646, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4114443619817223, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.267457541157426, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4673846703066711, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21808070471467408, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3966492622645894, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.26116607863611285, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09196922936475649, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.35950194744727476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43200638115383627, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6892273787708799, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.341195158470265, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6539473951166187, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.47372467075851415, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.734800469477975, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582838437615822, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7625459507115938, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45026965676007474, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6668256174353906, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.310668922100995, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.58212864821275, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5585674160229753, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.791505922278621, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5592126620745396, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6961094171330644, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29504037076486817, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6837809127705262, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44680913024590146, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.648473971864945, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8247818102038394, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.40475700826319555, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4349871720911447, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.47497024539412314, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3805666011451541, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4710260495003035, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4677317890018283, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.354353831625583, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10235881838919027, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42794399630326124, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.09649622940465846, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.29275810079464665, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5244380103905697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6914581279144536, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4472834999328078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6457130269652316, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.403469748891042, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5836273992135024, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4521209970489246, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6307076431103672, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.310186302993101, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5434540129901786, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5201565256464291, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663170490872967, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5950978682255068, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7209575532500453, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.32679491753274487, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5592874366443522, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.36634140441362645, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5925773491774018, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2672991324984635, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378982230702222, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4880149105083363, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7177464929662396, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.504154287515855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6074467585243234, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18771816026273827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37594160796244835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2828480467326008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4330386622117487, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.26314173809974317, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.41943156806161835, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17207258849758605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3052503498954155, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1377448219106278, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.35651447515721807, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3653634812607, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239315135469935, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.30019266689543556, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.520168227007293, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.022925118914031796, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10793991565723801, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3418311350990793, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.33684416564135483, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6173496967095872, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.21054588509072256, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5020237474009813, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.21083122707088572, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49504056885829906, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.22593581165006588, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.47811810874873667, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.377949467106015, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6288808546806746, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3335574881036169, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6172751686457948, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.47194552522795125, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6449793729895639, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.027321912102901323, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.24481610134231654, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.10436839690765871, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4723697955467262, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.10480708799994727, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4347737895846244, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3712375815038101, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6205370332736169, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11283678603002038, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5027560731174364, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.29432909534200313, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5637465580755235, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.20665163138245418, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5424961081814776, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3720123244240524, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.28341626687166926, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5869314876429665, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.34496242859007625, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19771661626342427, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5115994004182517, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.38973727109769035, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4457322258249424, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.008086388726125911, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.17781916046116683, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216356191979474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.30894994002746395, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6495798576994254, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35806497640912766, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5522671396375264, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3391824705480895, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5823536571792293, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3369582032493922, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.604856884215657, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.23399485663908418, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5631067041333725, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.15084681924900642, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5314568462829651, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.28809973780460224, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5952046679740143, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.01955520210672138, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.21292283406490206, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3315037521841549, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.588148042382191, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.01890321292509088, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1721831215207535, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5841092375226741, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.25325897139464854, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.511461689033225, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3730973285213212, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926422939507472, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.21478093144117116, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5322186790358018, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.27204984763557305, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5456825634559386, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1793410088328766, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5126133936832279, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.24328420398524073, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4502822427440237, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.41505282193631027, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5557512735595823, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.20911971038029412, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5234684603685517, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3369338116697911, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.593905704810687, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.07367272607925157, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.37211692202201907, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2571225648472028, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.48544337623818506, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.8827916928185874, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.9278293769424701, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5919743410620021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8142101616656354, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.24942094354139677, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.6642718379939968, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7768492311706325, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7660237942267061, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8523393041110139, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.1377448219106278, - "sentence_nr": 9 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.35651447515721807, - "sentence_nr": 9 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.3653634812607, - "sentence_nr": 9 + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.5239315135469935, - "sentence_nr": 9 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.30019266689543556, - "sentence_nr": 9 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.520168227007293, - "sentence_nr": 9 + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.022925118914031796, - "sentence_nr": 9 + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.10793991565723801, - "sentence_nr": 9 + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.3418311350990793, - "sentence_nr": 9 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.30359559780163287, - "sentence_nr": 9 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.4685200908441382, - "sentence_nr": 9 + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -47638,23 +70307,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -47662,15 +70331,15 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -47678,15 +70347,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -47694,39 +70363,31 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -47734,47 +70395,47 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", @@ -47782,7 +70443,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -47790,7 +70451,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -47798,7 +70459,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -47806,23 +70467,15 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -47830,15 +70483,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -47846,7 +70499,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -47854,31 +70507,31 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -47886,15 +70539,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -47902,23 +70555,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -47926,39 +70571,39 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -47966,15 +70611,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -47982,39 +70627,31 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -48022,7 +70659,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -48030,7 +70667,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -48038,39 +70675,39 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -48078,7 +70715,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -48086,7 +70723,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -48094,79 +70731,71 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -48174,7 +70803,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -48182,15 +70811,15 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -48198,63 +70827,55 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", @@ -48262,7 +70883,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -48270,15 +70891,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -48286,23 +70907,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -48310,23 +70923,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -48334,7 +70947,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", @@ -48342,7 +70955,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -48350,15 +70963,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -48366,15 +70979,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -48382,79 +70995,71 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -48462,15 +71067,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -48478,47 +71083,39 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -48526,7 +71123,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", @@ -48534,7 +71131,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -48542,7 +71139,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", @@ -48550,7 +71147,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", @@ -48558,23 +71155,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -48582,1935 +71179,1767 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -50518,39 +72947,39 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -50558,15 +72987,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -50574,7 +73003,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -50582,7 +73011,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -50590,7 +73019,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -50598,55 +73027,47 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -50654,15 +73075,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -50670,7 +73091,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -50678,7 +73099,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -50686,23 +73107,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -50710,15 +73123,15 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -50726,23 +73139,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -50750,15 +73163,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -50766,7 +73179,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -50774,7 +73187,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -50782,7 +73195,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -50790,31 +73203,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -50822,23 +73227,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -50846,15 +73251,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -50862,7 +73267,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -50870,31 +73275,23 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -50902,7 +73299,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -50910,7 +73307,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -50918,23 +73315,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -50942,15 +73339,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -50958,7 +73355,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -50966,7 +73363,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -50974,7 +73371,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -50982,23 +73379,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -51006,47 +73395,47 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -51054,39 +73443,31 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -51094,15 +73475,15 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -51110,7 +73491,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -51118,15 +73499,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -51134,15 +73515,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -51150,47 +73531,39 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -51198,7 +73571,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -51206,23 +73579,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -51230,15 +73603,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -51246,15 +73619,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -51262,31 +73635,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -51294,7 +73659,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -51302,23 +73667,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -51326,15 +73691,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -51342,7 +73707,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -51350,7 +73715,7 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -51358,7 +73723,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -51366,23 +73731,15 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -51390,7 +73747,7 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -51398,23 +73755,23 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -51422,15 +73779,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", @@ -51438,15 +73795,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -51454,5591 +73811,5031 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "anthropic/claude-3.5-sonnet", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", @@ -57046,7 +78843,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -57054,7 +78851,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -57062,7 +78859,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -57070,15 +78867,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -57086,7 +78883,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", @@ -57094,7 +78891,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -57102,39 +78899,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", @@ -57142,7 +78931,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -57150,7 +78939,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -57158,7 +78947,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -57166,7 +78955,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", @@ -57174,7 +78963,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -57182,15 +78971,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -57198,39 +78987,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", @@ -57238,23 +79019,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -57262,7 +79043,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", @@ -57270,7 +79051,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -57278,15 +79059,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -57294,7 +79075,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -57302,7 +79083,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -57310,7 +79091,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", @@ -57318,15 +79099,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", @@ -57334,7 +79107,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -57342,7 +79115,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -57350,7 +79123,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -57358,7 +79131,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", @@ -57366,7 +79139,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -57374,7 +79147,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", @@ -57382,7 +79155,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -57390,15 +79163,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -57406,23 +79179,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", @@ -57430,23 +79195,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -57454,23 +79219,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", @@ -57478,7 +79243,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -57486,15 +79251,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -57502,7 +79267,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", @@ -57510,15 +79275,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", @@ -57526,7 +79283,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -57534,7 +79291,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -57542,7 +79299,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -57550,7 +79307,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", @@ -57558,7 +79315,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -57566,7 +79323,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", @@ -57574,7 +79331,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", @@ -57582,7 +79339,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -57590,7 +79347,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -57598,1751 +79355,1775 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -59350,47 +81131,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -59398,7 +81179,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59406,7 +81187,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -59414,47 +81195,39 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -59462,31 +81235,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -59494,7 +81267,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59502,7 +81275,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -59510,31 +81283,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -59542,7 +81307,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -59550,15 +81315,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -59566,7 +81331,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -59574,15 +81339,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -59590,7 +81355,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59598,7 +81363,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -59606,7 +81371,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -59614,7 +81379,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", @@ -59622,15 +81387,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -59638,7 +81395,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -59646,15 +81403,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -59662,23 +81419,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -59686,7 +81443,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59694,7 +81451,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -59702,7 +81459,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -59710,47 +81467,39 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -59758,15 +81507,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -59774,7 +81523,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -59782,7 +81531,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59790,15 +81539,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -59806,7 +81555,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", @@ -59814,15 +81563,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -59830,7 +81571,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -59838,7 +81579,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -59846,7 +81587,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -59854,7 +81595,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -59862,23 +81603,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59886,39 +81627,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -59926,31 +81659,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -59958,7 +81691,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -59966,15 +81699,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -59982,23 +81715,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", @@ -60006,15 +81739,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -60022,7 +81747,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -60030,7 +81755,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -60038,7 +81763,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -60046,23 +81771,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -60070,7 +81795,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", @@ -60078,7 +81803,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -60086,7 +81811,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -60094,1559 +81819,1599 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -61654,7 +83419,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -61662,7 +83427,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -61670,7 +83435,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -61678,7 +83443,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -61686,15 +83451,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", @@ -61702,7 +83467,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -61710,7 +83475,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -61718,7 +83483,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -61726,7 +83491,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", @@ -61734,15 +83499,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -61750,31 +83507,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -61782,23 +83539,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -61806,7 +83563,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -61814,31 +83571,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", + "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -61846,7 +83595,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -61854,31 +83603,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -61886,15 +83635,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -61902,39 +83651,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", + "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -61942,31 +83683,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -61974,23 +83715,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -61998,7 +83739,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -62006,31 +83747,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", + "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -62038,7 +83771,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -62046,7 +83779,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -62054,7 +83787,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -62062,7 +83795,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -62070,7 +83803,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -62078,7 +83811,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", @@ -62086,7 +83819,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -62094,7 +83827,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -62102,7 +83835,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -62110,7 +83843,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", @@ -62118,15 +83851,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -62134,7 +83859,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -62142,7 +83867,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -62150,7 +83875,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -62158,7 +83883,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -62166,23 +83891,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -62190,7 +83915,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -62198,7 +83923,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -62206,7 +83931,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", @@ -62214,71 +83939,63 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -62286,7 +84003,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -62294,7 +84011,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -62302,23 +84019,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -62326,7 +84035,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -62334,47 +84043,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", @@ -62382,43 +84091,35 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", + "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62426,7 +84127,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62434,7 +84135,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62442,7 +84143,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62450,7 +84151,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62458,7 +84159,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62466,15 +84167,15 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -62482,15 +84183,15 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62498,15 +84199,95 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62514,7 +84295,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62522,7 +84303,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62530,7 +84311,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62538,7 +84319,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62546,7 +84327,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62554,7 +84335,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62562,7 +84343,7 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62570,7 +84351,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -62578,15 +84359,15 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62594,15 +84375,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62610,7 +84383,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62618,7 +84391,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62626,7 +84399,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62634,7 +84407,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62642,7 +84415,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62650,7 +84423,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62658,7 +84431,7 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -62666,7 +84439,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -62674,15 +84447,15 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -62690,15 +84463,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62706,7 +84471,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62714,7 +84479,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62722,7 +84487,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62730,7 +84495,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62738,7 +84503,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -62746,447 +84511,495 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63194,7 +85007,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63202,7 +85015,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63210,7 +85023,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63218,7 +85031,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63226,7 +85039,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63234,7 +85047,7 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63242,7 +85055,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63250,31 +85063,23 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63282,7 +85087,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63290,7 +85095,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63298,7 +85103,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63306,7 +85111,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63314,7 +85119,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63322,23 +85127,23 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63346,39 +85151,31 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63386,7 +85183,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63394,7 +85191,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63402,7 +85199,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63410,7 +85207,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63418,7 +85215,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63426,7 +85223,7 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63434,7 +85231,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63442,7 +85239,7 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63450,7 +85247,7 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63458,15 +85255,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63474,7 +85263,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63482,7 +85271,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63490,7 +85279,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63498,7 +85287,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63506,7 +85295,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63514,7 +85303,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63522,7 +85311,7 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63530,7 +85319,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63538,7 +85327,7 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63546,23 +85335,15 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "es", + "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63570,7 +85351,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63578,7 +85359,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63586,7 +85367,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63594,7 +85375,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63602,7 +85383,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63610,23 +85391,23 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63634,7 +85415,7 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63642,7 +85423,7 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63650,23 +85431,15 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63674,7 +85447,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63682,7 +85455,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63690,7 +85463,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63698,7 +85471,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63706,7 +85479,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63714,15 +85487,15 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63730,15 +85503,15 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63746,15 +85519,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "fr", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63762,7 +85527,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63770,7 +85535,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63778,7 +85543,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63786,7 +85551,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63794,7 +85559,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63802,15 +85567,15 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63818,7 +85583,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63826,15 +85591,15 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63842,15 +85607,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "bn", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63858,7 +85615,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63866,7 +85623,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63874,7 +85631,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63882,7 +85639,7 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63890,7 +85647,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63898,7 +85655,7 @@ }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63906,7 +85663,7 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63914,7 +85671,7 @@ }, { "model": "qwen/qwq-32b", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -63922,7 +85679,7 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -63930,23 +85687,15 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "anthropic/claude-3.5-sonnet", - "bcp_47": "pt", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1,