import importlib from pathlib import Path import pandas as pd import streamlit as st from mlip_arena import PKG_DIR from mlip_arena.models import REGISTRY as MODELS from mlip_arena.tasks import REGISTRY as TASKS # Read the data DATA_DIR = PKG_DIR / "tasks" /"diatomics" dfs = [] for model in MODELS: fpath = DATA_DIR / MODELS[model].get("family") / "homonuclear-diatomics.json" if fpath.exists(): dfs.append(pd.read_json(fpath)) df = pd.concat(dfs, ignore_index=True) # Create a table table = pd.DataFrame( columns=[ "Model", "Element Coverage", "Prediction", "NVT", "NPT", "Training Set", "Code", "Paper", "Checkpoint", "First Release", "License", ] ) for model in MODELS: rows = df[df["method"] == model] metadata = MODELS.get(model, {}) new_row = { "Model": model, "Element Coverage": len(rows["name"].unique()), "Prediction": metadata.get("prediction", None), "NVT": "✅" if metadata.get("nvt", False) else "❌", "NPT": "✅" if metadata.get("npt", False) else "❌", "Training Set": metadata.get("datasets", []), "Code": metadata.get("github", None) if metadata else None, "Paper": metadata.get("doi", None) if metadata else None, "Checkpoint": metadata.get("checkpoint", None), "First Release": metadata.get("date", None), "License": metadata.get("license", None), } table = pd.concat([table, pd.DataFrame([new_row])], ignore_index=True) table.set_index("Model", inplace=True) s = table.style.background_gradient( cmap="PuRd", subset=["Element Coverage"], vmin=0, vmax=120 ) # st.warning( # "MLIP Arena is currently in **pre-alpha**. The results are not stable. Please interpret them with care.", # icon="⚠️", # ) st.info( "Contributions are welcome. For more information, visit https://github.com/atomind-ai/mlip-arena.", icon="🤗", ) st.markdown( """

⚔️ MLIP Arena Leaderboard ⚔️

> MLIP Arena is a unified platform for evaluating foundation machine learning interatomic potentials (MLIPs) beyond conventional energy and force error metrics. It focuses on revealing the underlying physics and chemistry learned by these models. The platform's benchmarks are specifically designed to evaluate the readiness and reliability of open-source, open-weight models in accurately reproducing both qualitative and quantitative behaviors of atomic systems. ### :red[Introduction] Foundation machine learning interatomic potentials (MLIPs), trained on extensive databases containing millions of density functional theory (DFT) calculations,have revolutionized molecular and materials modeling, but existing benchmarks suffer from data leakage, limited transferability, and an over-reliance on error-based metrics tied to specific density functional theory (DFT) references. We introduce MLIP Arena, a unified benchmark platform for evaluating foundation MLIP performance beyond conventional error metrics. It focuses on revealing the physical soundness learned by MLIPs and assessing their utilitarian performance agnostic to underlying model architecture and training dataset. ***By moving beyond static DFT references and revealing the important failure modes*** of current foundation MLIPs in real-world settings, MLIP Arena provides a reproducible framework to guide the next-generation MLIP development toward improved predictive accuracy and runtime efficiency while maintaining physical consistency. """, unsafe_allow_html=True, ) st.subheader(":red[Supported Models]") st.dataframe( s, use_container_width=True, column_config={ "Code": st.column_config.LinkColumn( # validate="^https://[a-z]+\.streamlit\.app$", width="medium", display_text="Link", ), "Paper": st.column_config.LinkColumn( # validate="^https://[a-z]+\.streamlit\.app$", width="medium", display_text="Link", ), }, ) # st.markdown("

🏆 Task Ranks 🏆

", unsafe_allow_html=True) st.subheader(":red[Task Ranks]") for task in TASKS: if TASKS[task]["rank-page"] is None: continue st.subheader(task, divider=True) task_module = importlib.import_module(f"ranks.{TASKS[task]['rank-page']}") if TASKS[task]['task-page'] is not None: st.page_link( f"tasks/{TASKS[task]['task-page']}.py", label="Go to the associated task page", icon=":material/link:", ) # Call the function from the imported module if hasattr(task_module, "render"): task_module.render() # if st.button(f"Go to task page"): # st.switch_page(f"tasks/{TASKS[task]['task-page']}.py") else: st.write( "Rank metrics are not available yet but the task has been implemented. Please see the task page for more information." )