diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py b/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py new file mode 100644 index 000000000..fc62b9669 --- /dev/null +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/analyse_NCIA_HB300SPXx10.py @@ -0,0 +1,177 @@ +"""Analyse NCIA HB300SPXx10 benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import ( + build_table, + plot_density_scatter, +) +from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +D3_MODEL_NAMES = build_d3_name_map(MODELS) + +EV_TO_KCAL = units.mol / units.kcal +CALC_PATH = CALCS_ROOT / "non_covalent_interactions" / "NCIA_HB300SPXx10" / "outputs" +OUT_PATH = APP_ROOT / "data" / "non_covalent_interactions" / "NCIA_HB300SPXx10" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model in MODELS: + labels_list = sorted([path.stem for path in (CALC_PATH / model).glob("*.xyz")]) + break + return labels_list + + +@pytest.fixture +def interaction_energies() -> dict[str, list]: + """ + Get interaction energies for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted interaction energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = read(CALC_PATH / model_name / f"{label}.xyz") + if not ref_stored: + results["ref"].append(atoms.info["ref_int_energy"] * EV_TO_KCAL) + + results[model_name].append(atoms.info["model_int_energy"] * EV_TO_KCAL) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}.xyz", atoms) + + ref_stored = True + return results + + +@pytest.fixture +@plot_density_scatter( + filename=OUT_PATH / "figure_ncia_hb300spxx10_density.json", + title="Interaction energy density plot", + x_label="Reference energy / kcal/mol", + y_label="Predicted energy / kcal/mol", + annotation_metadata={"system_count": "Systems"}, +) +def interaction_density(interaction_energies: dict[str, list]) -> dict[str, dict]: + """ + Build density scatter inputs for interaction energies. + + Parameters + ---------- + interaction_energies + Reference and predicted interaction energies per model. + + Returns + ------- + dict[str, dict] + Mapping of model names to density-plot payloads. + """ + ref_vals = interaction_energies["ref"] + density_inputs: dict[str, dict] = {} + for model_name in MODELS: + preds = interaction_energies.get(model_name, []) + density_inputs[model_name] = { + "ref": ref_vals, + "pred": preds, + "meta": {"system_count": len([val for val in preds if val is not None])}, + } + return density_inputs + + +@pytest.fixture +def get_mae(interaction_energies) -> dict[str, float]: + """ + Get mean absolute error for energies. + + Parameters + ---------- + interaction_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted energy errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae( + interaction_energies["ref"], interaction_energies[model_name] + ) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "ncia_hb300spxx10_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "MAE": get_mae, + } + + +def test_ncia_hb300spxx10( + metrics: dict[str, dict], + interaction_density: dict[str, dict], +) -> None: + """ + Run NCIA HB300SPXx10 test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + interaction_density + Density-scatter inputs for all models (drives saved plots). + """ + return diff --git a/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/metrics.yml b/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/metrics.yml new file mode 100644 index 000000000..45388b831 --- /dev/null +++ b/ml_peg/analysis/non_covalent_interactions/NCIA_HB300SPXx10/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 10 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems + level_of_theory: CCSD(T) diff --git a/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py b/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py new file mode 100644 index 000000000..2b3288dec --- /dev/null +++ b/ml_peg/app/non_covalent_interactions/NCIA_HB300SPXx10/app_NCIA_HB300SPXx10.py @@ -0,0 +1,80 @@ +"""Run NCIA HB300SPXx10 app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_cell +from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "NCIA HB300SPXx10" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" + "non_covalent_interactions.html#ncia-hb300spxx10" +) +DATA_PATH = APP_ROOT / "data" / "non_covalent_interactions" / "NCIA_HB300SPXx10" + + +class NCIANHB300SPXx10App(BaseApp): + """NCIA HB300SPXx10 benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + density_plots: dict[str, dict] = {} + for model in MODELS: + density_graph = read_density_plot_for_model( + filename=DATA_PATH / "figure_ncia_hb300spxx10_density.json", + model=model, + id=f"{BENCHMARK_NAME}-{model}-density", + ) + if density_graph is not None: + density_plots[model] = {"MAE": density_graph} + + plot_from_table_cell( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + cell_to_plot=density_plots, + ) + + +def get_app() -> NCIANHB300SPXx10App: + """ + Get NCIA HB300SPXx10 benchmark app layout and callback registration. + + Returns + ------- + NCIANHB300SPXx10App + Benchmark layout and callback registration. + """ + return NCIANHB300SPXx10App( + name=BENCHMARK_NAME, + description=( + "Performance in predicting ionic hydrogen bond interaction energies " + "for the NCIA HB300SPXx10 dataset (ionic dimers from HB300). " + "Reference data from CCSD(T) calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "ncia_hb300spxx10_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + + # Construct layout and register callbacks + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + + # Run app + full_app.run(port=8057, debug=True) diff --git a/ml_peg/calcs/non_covalent_interactions/NCIA_HB300SPXx10/calc_NCIA_HB300SPXx10.py b/ml_peg/calcs/non_covalent_interactions/NCIA_HB300SPXx10/calc_NCIA_HB300SPXx10.py new file mode 100644 index 000000000..13ae44b02 --- /dev/null +++ b/ml_peg/calcs/non_covalent_interactions/NCIA_HB300SPXx10/calc_NCIA_HB300SPXx10.py @@ -0,0 +1,141 @@ +""" +Compute the NCIA HB300SPXx100. + +Journal of Chemical Theory and Computation 2020 16 (10), 6305-6316. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase import Atoms, units +from ase.io import read, write +import pytest +from tqdm import tqdm + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +KCAL_TO_EV = units.kcal / units.mol + +OUT_PATH = Path(__file__).parent / "outputs" + + +def get_ref_energies(data_path: Path) -> dict[str, float]: + """ + Get reference energies. + + Parameters + ---------- + data_path + Path to data. + + Returns + ------- + dict[str, float] + Loaded reference energies. + """ + ref_energies = {} + + with open(data_path / "NCIA_HB300SPXx10_benchmark.txt") as lines: + for i, line in enumerate(lines): + if i < 3: + continue + items = line.strip().split() + label = items[0] + ref_energy = float(items[1]) * KCAL_TO_EV + ref_energies[label] = ref_energy + + return ref_energies + + +def get_monomers(atoms: Atoms) -> tuple[Atoms, Atoms]: + """ + Get ASE atoms objects of the monomers. + + Parameters + ---------- + atoms + ASE atoms object of the structure. + + Returns + ------- + tuple[Atoms, Atoms] + Tuple containing the two monomers. + """ + if isinstance(atoms.info["selection_a"], str): + a_ids = [int(id) for id in atoms.info["selection_a"].split("-")] + a_ids[0] -= 1 + else: + a_ids = [int(atoms.info["selection_a"]) - 1, int(atoms.info["selection_a"])] + + if isinstance(atoms.info["selection_b"], str): + b_ids = [int(id) for id in atoms.info["selection_b"].split("-")] + b_ids[0] -= 1 + else: + b_ids = [int(atoms.info["selection_b"]) - 1, int(atoms.info["selection_b"])] + + atoms_a = atoms[a_ids[0] : a_ids[1]] + atoms_b = atoms[b_ids[0] : b_ids[1]] + assert len(atoms_a) + len(atoms_b) == len(atoms) + + atoms_a.info["charge"] = int(atoms.info["charge_a"]) + atoms_a.info["spin"] = 1 + + atoms_b.info["charge"] = int(atoms.info["charge_b"]) + atoms_b.info["spin"] = 1 + return (atoms_a, atoms_b) + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_ncia_hb300spxx10(mlip: tuple[str, Any]) -> None: + """ + Run NCIA HB300SPXx10 barriers benchmark. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + """ + model_name, model = mlip + calc = model.get_calculator() + + # Read in data and attach calculator + data_path = ( + download_s3_data( + filename="NCIA_HB300SPXx10.zip", + key="inputs/non_covalent_interactions/NCIA_HB300SPXx10/NCIA_HB300SPXx10.zip", + ) + / "NCIA_HB300SPXx10" + ) + ref_energies = get_ref_energies(data_path) + + calc = model.get_calculator() + # Add D3 calculator for this test + calc = model.add_d3_calculator(calc) + + for label, ref_energy in tqdm(ref_energies.items()): + xyz_fname = f"{label}.xyz" + atoms = read(data_path / "geometries" / xyz_fname) + atoms_a, atoms_b = get_monomers(atoms) + atoms.info["spin"] = 1 + atoms.info["charge"] = int(atoms_a.info["charge"] + atoms_b.info["charge"]) + atoms.calc = calc + atoms_a.calc = calc + atoms_b.calc = calc + + atoms.info["model_int_energy"] = ( + atoms.get_potential_energy() + - atoms_a.get_potential_energy() + - atoms_b.get_potential_energy() + ) + atoms.info["ref_int_energy"] = ref_energy + atoms.calc = None + + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{label}.xyz", atoms)