{"Models": "gemini-2.0-flash", "Model Size(B)": "-", "Frames": 512, "Type": "Proprietary", "URL": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", "LP_Open": 43.6, "LP_MCQ": 69.0, "LR_Open": 27.9, "LR_MCQ": 58.5, "HP_Open": 27.3, "HP_MCQ": 42.1, "HR_Open": 30.7, "HR_MCQ": 53.8, "Overall_Open": 37.6, "Overall_MCQ": 62.1} {"Models": "gemini-2.5-pro", "Model Size(B)": "-", "Frames": 512, "Type": "Proprietary", "URL": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", "LP_Open": 47.2, "LP_MCQ": 73.3, "LR_Open": 35.4, "LR_MCQ": 69.4, "HP_Open": 41.3, "HP_MCQ": 46.3, "HR_Open": 42.0, "HR_MCQ": 67.4, "Overall_Open": 44.2, "Overall_MCQ": 69.1} {"Models": "GPT-4o", "Model Size(B)": "-", "Frames": 256, "Type": "Proprietary", "URL": "https://openai.com/index/hello-gpt-4o/", "LP_Open": 39.4, "LP_MCQ": 64.8, "LR_Open": 23.1, "LR_MCQ": 62.6, "HP_Open": 26.4, "HP_MCQ": 42.1, "HR_Open": 29.2, "HR_MCQ": 50.4, "Overall_Open": 34.2, "Overall_MCQ": 59.5} {"Models": "Gemini-1.5-Flash", "Model Size(B)": "-", "Frames": 512, "Type": "Proprietary", "URL": "https://storage.googleapis.com/deepmind-media/gemini/gemini_v1_5_report.pdf", "LP_Open": 41.5, "LP_MCQ": 65.5, "LR_Open": 25.9, "LR_MCQ": 63.9, "HP_Open": 27.3, "HP_MCQ": 36.4, "HR_Open": 25.8, "HR_MCQ": 55.7, "Overall_Open": 35.1, "Overall_MCQ": 60.6} {"Models": "Gemini-2.5-Flash", "Model Size(B)": "-", "Frames": 256, "Type": "Proprietary", "URL": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", "LP_Open": 42.4, "LP_MCQ": 64.1, "LR_Open": 30.6, "LR_MCQ": 65.3, "HP_Open": 25.6, "HP_MCQ": 33.9, "HR_Open": 26.9, "HR_MCQ": 54.2, "Overall_Open": 36.3, "Overall_MCQ": 59.3} {"Models": "Gemini-1.5-Pro", "Model Size(B)": "-", "Frames": 512, "Type": "Proprietary", "URL": "https://storage.googleapis.com/deepmind-media/gemini/gemini_v1_5_report.pdf", "LP_Open": 43.7, "LP_MCQ": 66.7, "LR_Open": 32.7, "LR_MCQ": 69.4, "HP_Open": 35.5, "HP_MCQ": 40.5, "HR_Open": 31.8, "HR_MCQ": 61.0, "Overall_Open": 39.3, "Overall_MCQ": 63.4} {"Models": "GPT-4.1-mini", "Model Size(B)": "-", "Frames": 256, "Type": "Proprietary", "URL": "https://openai.com/index/gpt-4-1/", "LP_Open": 46.0, "LP_MCQ": 68.6, "LR_Open": 32.0, "LR_MCQ": 68.7, "HP_Open": 27.3, "HP_MCQ": 38.8, "HR_Open": 32.6, "HR_MCQ": 57.6, "Overall_Open": 39.9, "Overall_MCQ": 63.5} {"Models": "GPT-4.1", "Model Size(B)": "-", "Frames": 256, "Type": "Proprietary", "URL": "https://openai.com/index/gpt-4-1/", "LP_Open": 47.2, "LP_MCQ": 68.8, "LR_Open": 29.9, "LR_MCQ": 68.7, "HP_Open": 28.1, "HP_MCQ": 38.0, "HR_Open": 34.5, "HR_MCQ": 59.5, "Overall_Open": 40.8, "Overall_MCQ": 64.0} {"Models": "Video-LLaVA", "Model Size(B)": "8", "Frames": 8, "Type": "Open-source", "URL": "https://github.com/PKU-YuanGroup/Video-LLaVA", "LP_Open": 13.2, "LP_MCQ": 27.5, "LR_Open": 6.1, "LR_MCQ": 33.3, "HP_Open": 14.0, "HP_MCQ": 24.8, "HR_Open": 6.1, "HR_MCQ": 26.5, "Overall_Open": 11.0, "Overall_MCQ": 27.7} {"Models": "Mantis-Idefics2", "Model Size(B)": "8", "Frames": 24, "Type": "Open-source", "URL": "https://arxiv.org/abs/2405.01483", "LP_Open": 17.8, "LP_MCQ": 33.2, "LR_Open": 9.5, "LR_MCQ": 29.9, "HP_Open": 16.5, "HP_MCQ": 16.5, "HR_Open": 8.3, "HR_MCQ": 29.9, "Overall_Open": 14.8, "Overall_MCQ": 30.6} {"Models": "LongVA", "Model Size(B)": "7", "Frames": 64, "Type": "Open-source", "URL": "https://arxiv.org/abs/2406.16852", "LP_Open": 20.5, "LP_MCQ": 43.3, "LR_Open": 6.8, "LR_MCQ": 33.3, "HP_Open": 19.0, "HP_MCQ": 24.0, "HR_Open": 9.5, "HR_MCQ": 31.8, "Overall_Open": 16.5, "Overall_MCQ": 38.0} {"Models": "Phi-4-Mini", "Model Size(B)": "5.6", "Frames": 128, "Type": "Open-source", "URL": "https://arxiv.org/abs/2503.01743", "LP_Open": 19.2, "LP_MCQ": 46.4, "LR_Open": 12.9, "LR_MCQ": 47.6, "HP_Open": 18.2, "HP_MCQ": 30.6, "HR_Open": 10.2, "HR_MCQ": 31.4, "Overall_Open": 16.5, "Overall_MCQ": 42.0} {"Models": "LongLLaVA", "Model Size(B)": "9", "Frames": 512, "Type": "Open-source", "URL": "https://huggingface.co/aws-prototyping/long-llava-qwen2-7b", "LP_Open": 21.7, "LP_MCQ": 41.2, "LR_Open": 15.0, "LR_MCQ": 34.0, "HP_Open": 14.0, "HP_MCQ": 29.8, "HR_Open": 10.2, "HR_MCQ": 29.2, "Overall_Open": 17.8, "Overall_MCQ": 36.9} {"Models": "Video-XL", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://github.com/VectorSpaceLab/Video-XL", "LP_Open": 22.3, "LP_MCQ": 41.9, "LR_Open": 15.0, "LR_MCQ": 34.0, "HP_Open": 18.2, "HP_MCQ": 28.1, "HR_Open": 10.2, "HR_MCQ": 29.2, "Overall_Open": 18.6, "Overall_MCQ": 38.2} {"Models": "LongVU", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://arxiv.org/abs/2410.17434", "LP_Open": 25.9, "LP_MCQ": 45.6, "LR_Open": 12.9, "LR_MCQ": 38.8, "HP_Open": 19.8, "HP_MCQ": 24.0, "HR_Open": 17.4, "HR_MCQ": 37.1, "Overall_Open": 22.1, "Overall_MCQ": 41.0} {"Models": "Vamba", "Model Size(B)": "10", "Frames": 512, "Type": "Open-source", "URL": "https://arxiv.org/abs/2503.11579", "LP_Open": 28.1, "LP_MCQ": 52.4, "LR_Open": 10.9, "LR_MCQ": 40.8, "HP_Open": 21.5, "HP_MCQ": 26.4, "HR_Open": 12.5, "HR_MCQ": 37.9, "Overall_Open": 22.3, "Overall_MCQ": 45.7} {"Models": "LLaVA-Video", "Model Size(B)": "7", "Frames": 64, "Type": "Open-source", "URL": "https://huggingface.co/lmms-lab/LLaVA-NeXT-Video-72B-Qwen2", "LP_Open": 28.5, "LP_MCQ": 53.5, "LR_Open": 13.6, "LR_MCQ": 47.6, "HP_Open": 20.7, "HP_MCQ": 28.9, "HR_Open": 19.3, "HR_MCQ": 40.2, "Overall_Open": 24.2, "Overall_MCQ": 47.8} {"Models": "InternVL2.5", "Model Size(B)": "8", "Frames": 64, "Type": "Open-source", "URL": "https://internvl.github.io/blog/2024-12-05-InternVL-2.5/", "LP_Open": 28.8, "LP_MCQ": 54.3, "LR_Open": 19.7, "LR_MCQ": 46.3, "HP_Open": 21.5, "HP_MCQ": 35.5, "HR_Open": 16.7, "HR_MCQ": 39.0, "Overall_Open": 24.6, "Overall_MCQ": 48.5} {"Models": "InternVL3", "Model Size(B)": "8", "Frames": 64, "Type": "Open-source", "URL": "https://arxiv.org/abs/2504.10479", "LP_Open": 30.3, "LP_MCQ": 54.6, "LR_Open": 17.0, "LR_MCQ": 49.0, "HP_Open": 24.0, "HP_MCQ": 34.7, "HR_Open": 13.3, "HR_MCQ": 36.7, "Overall_Open": 24.7, "Overall_MCQ": 48.4} {"Models": "Qwen2-VL", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://github.com/QwenLM/Qwen2-VL", "LP_Open": 31.7, "LP_MCQ": 59.3, "LR_Open": 14.3, "LR_MCQ": 51.7, "HP_Open": 21.5, "HP_MCQ": 28.1, "HR_Open": 20.5, "HR_MCQ": 39.0, "Overall_Open": 26.5, "Overall_MCQ": 48.2} {"Models": "InternVideo2.5", "Model Size(B)": "8", "Frames": 512, "Type": "Open-source", "URL": "https://arxiv.org/abs/2501.12386", "LP_Open": 33.6, "LP_MCQ": 59.8, "LR_Open": 17.0, "LR_MCQ": 47.6, "HP_Open": 19.8, "HP_MCQ": 34.7, "HR_Open": 18.2, "HR_MCQ": 45.8, "Overall_Open": 27.2, "Overall_MCQ": 53.2} {"Models": "VideoChat-Flash", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://github.com/OpenGVLab/VideoChat-Flash", "LP_Open": 33.3, "LP_MCQ": 57.7, "LR_Open": 16.3, "LR_MCQ": 43.5, "HP_Open": 21.5, "HP_MCQ": 33.9, "HR_Open": 17.4, "HR_MCQ": 44.7, "Overall_Open": 27.0, "Overall_MCQ": 51.2} {"Models": "Qwen2.5-VL", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://arxiv.org/abs/2502.13923", "LP_Open": 33.9, "LP_MCQ": 51.7, "LR_Open": 15.6, "LR_MCQ": 48.3, "HP_Open": 24.8, "HP_MCQ": 31.4, "HR_Open": 17.8, "HR_MCQ": 39.8, "Overall_Open": 27.7, "Overall_MCQ": 46.9} {"Models": "MiMo-VL-SFT", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://github.com/XiaomiMiMo/MiMo-VL/blob/main/MiMo-VL-Technical-Report.pdf", "LP_Open": 34.7, "LP_MCQ": 57.7, "LR_Open": 19.0, "LR_MCQ": 55.8, "HP_Open": 26.4, "HP_MCQ": 36.4, "HR_Open": 19.7, "HR_MCQ": 41.7, "Overall_Open": 29.1, "Overall_MCQ": 52.2} {"Models": "MiMo-VL-RL", "Model Size(B)": "7", "Frames": 512, "Type": "Open-source", "URL": "https://github.com/XiaomiMiMo/MiMo-VL/blob/main/MiMo-VL-Technical-Report.pdf", "LP_Open": 35.5, "LP_MCQ": 57.5, "LR_Open": 18.4, "LR_MCQ": 55.8, "HP_Open": 28.1, "HP_MCQ": 33.1, "HR_Open": 18.9, "HR_MCQ": 42.8, "Overall_Open": 29.5, "Overall_MCQ": 52.0} {"Models": "Video-XL-2", "Model Size(B)": "8", "Frames": 512, "Type": "Open-source", "URL": "https://github.com/VectorSpaceLab/Video-XL/tree/main/Video-XL-2", "LP_Open": 33.3, "LP_MCQ": 57.6, "LR_Open": 25.2, "LR_MCQ": 55.1, "HP_Open": 21.5, "HP_MCQ": 38.8, "HR_Open": 20.5, "HR_MCQ": 45.1, "Overall_Open": 28.6, "Overall_MCQ": 53.0}