from pathlib import Path

from loguru import logger
from tqdm.auto import tqdm

from mlip_arena.models import REGISTRY, MLIPEnum
from mlip_arena.tasks.stability.analysis import gather_results
from mlip_arena.tasks.stability.data import get_atoms_from_db

if __name__ == "__main__":

    compositions = []
    sizes = []
    for atoms in tqdm(get_atoms_from_db("random-mixture.db")):
        if len(atoms) == 0:
            continue
        compositions.append(atoms.get_chemical_formula())

    for model in MLIPEnum:
        try:
            run_dir = Path(__file__).parent / f"{REGISTRY[model.name]['family']}"
            df = gather_results(run_dir, prefix=model.name, run_type="nvt")

            df = df[
                df["formula"].isin(compositions[:120])
            ].copy()  # tentatively we only take the first 120 structures

            assert len(df) > 0

            df.to_parquet(run_dir / f"{model.name}-heating.parquet", index=False)
        except Exception as e:
            logger.warning(f"Error processing model {model.name}: {e}")

    for model in MLIPEnum:
        try:
            run_dir = Path(__file__).parent / f"{REGISTRY[model.name]['family']}"
            df = gather_results(run_dir, prefix=model.name, run_type="npt")

            df = df[
                df["formula"].isin(compositions[:80])
            ].copy()  # tentatively we only take the first 80 structures

            assert len(df) > 0

            df.to_parquet(run_dir / f"{model.name}-compression.parquet", index=False)
        except Exception as e:
            logger.warning(f"Error processing model {model.name}: {e}")