diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst index 9f339f4d2..33e0d7ba0 100644 --- a/docs/source/user_guide/benchmarks/index.rst +++ b/docs/source/user_guide/benchmarks/index.rst @@ -12,3 +12,4 @@ Benchmarks molecular_crystal molecular bulk_crystal + lanthanides diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst new file mode 100644 index 000000000..e3c5f751f --- /dev/null +++ b/docs/source/user_guide/benchmarks/lanthanides.rst @@ -0,0 +1,45 @@ +=========== +Lanthanides +=========== + +Isomer complexes +================ + +Summary +------- + +Performance in predicting relative isomer energies for lanthanide complexes +compared to r2SCAN-3c DFT reference data. + + +Metrics +------- + +1. Relative isomer energy MAE + +Accuracy of relative isomer energy predictions. + +For each complex, the relative isomer energies are computed with respect to the +lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c +relative energies reported in the reference dataset. + + +Computational cost +------------------ + +Low: tests are likely to take less than a minute to run on CPU. + + +Data availability +----------------- + +Input structures: + +* T. Rose, M. Bursch, J.-M. Mewes, and S. Grimme, Fast and Robust Modeling of + Lanthanide and Actinide Complexes, Biomolecules, and Molecular Crystals with + the Extended GFN-FF Model, Inorganic Chemistry 63 (2024) 19364-19374. + +Reference data: + +* Relative isomer energies from r2SCAN-3c (see Supporting Information of the + above reference). diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py new file mode 100644 index 000000000..72294fa22 --- /dev/null +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -0,0 +1,225 @@ +"""Analyse lanthanide isomer complex benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" +OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) +# These are relative energies (relative to lowest energy isomer for each system) +R2SCAN_REF: dict[str, dict[str, float]] = { + "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52}, + "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67}, + "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17}, + "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74}, + "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11}, + "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08}, + "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82}, + "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0}, + "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23}, +} + + +def get_system_names() -> list[str]: + """ + Get sorted list of system names. + + Returns + ------- + list[str] + Sorted list of system names from R2SCAN_REF. + """ + return sorted(R2SCAN_REF.keys()) + + +def get_reference_keys() -> list[tuple[str, str]]: + """ + Get sorted list of (system, isomer) tuples for consistent ordering. + + Returns + ------- + list[tuple[str, str]] + List of (system, isomer) tuples sorted by system then isomer. + """ + system_names = get_system_names() + return [ + (system, isomer) + for system in system_names + for isomer in sorted(R2SCAN_REF[system].keys()) + ] + + +def get_reference_values() -> list[float]: + """ + Get reference relative energies in sorted order. + + Returns + ------- + list[float] + Reference relative energies matching the order of get_reference_keys(). + """ + reference_keys = get_reference_keys() + return [R2SCAN_REF[system][isomer] for system, isomer in reference_keys] + + +def build_hoverdata() -> dict[str, list[str]]: + """ + Build hoverdata dictionary for parity plot. + + Returns + ------- + dict[str, list[str]] + Dictionary with "System" and "Isomer" keys for hover information. + """ + reference_keys = get_reference_keys() + return { + "System": [system for system, _ in reference_keys], + "Isomer": [isomer for _, isomer in reference_keys], + } + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_isomer_complexes.json", + title="Lanthanide isomer relative energies", + x_label="Model Delta E (kcal/mol)", + y_label="r2SCAN-3c Delta E (kcal/mol)", + hoverdata=build_hoverdata(), +) +def isomer_relative_energies() -> dict[str, list]: + """ + Build parity data for lanthanide isomer complexes benchmark. + + Returns + ------- + dict[str, list] + Reference and per-model relative energies. + """ + results = {"ref": get_reference_values()} | {mlip: [] for mlip in MODELS} + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if not model_dir.exists(): + # Model directory doesn't exist, fill with None + results[model_name] = [None] * len(get_reference_keys()) + continue + + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + + # Process each system separately to compute relative energies + preds: list[float | None] = [] + for system_name in get_system_names(): + # Collect all isomers for this system + isomer_data: dict[str, tuple[float, object]] = {} + for isomer in sorted(R2SCAN_REF[system_name].keys()): + xyz_path = model_dir / f"{system_name}_{isomer}.xyz" + if xyz_path.exists(): + atoms = read(xyz_path) + energy_kcal = atoms.info.get("energy_kcal") + if energy_kcal is not None: + isomer_data[isomer] = (energy_kcal, atoms) + + # Compute relative energies + min_energy = min(energy for energy, _ in isomer_data.values()) + + # Add predictions in sorted isomer order + for isomer in sorted(R2SCAN_REF[system_name].keys()): + if isomer in isomer_data: + energy_kcal, atoms = isomer_data[isomer] + rel_energy = energy_kcal - min_energy + preds.append(rel_energy) + + # Copy structure to app directory + write(structs_dir / f"{system_name}_{isomer}.xyz", atoms) + else: + preds.append(None) + + results[model_name] = preds + + return results + + +@pytest.fixture +def isomer_complex_errors(isomer_relative_energies) -> dict[str, float | None]: + """ + Get mean absolute error for relative energies. + + Parameters + ---------- + isomer_relative_energies + Dictionary of reference and predicted relative energies. + + Returns + ------- + dict[str, float] + Dictionary of predicted relative energy errors for all models. + """ + results: dict[str, float | None] = {} + for model_name in MODELS: + preds = isomer_relative_energies.get(model_name, []) + pairs = [ + (ref, pred) + for ref, pred in zip(isomer_relative_energies["ref"], preds, strict=True) + if pred is not None + ] + if not pairs: + results[model_name] = None + continue + ref_vals, pred_vals = zip(*pairs, strict=True) + results[model_name] = mae(list(ref_vals), list(pred_vals)) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "isomer_complexes_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, +) +def metrics(isomer_complex_errors: dict[str, float | None]) -> dict[str, dict]: + """ + Collect metrics for lanthanide isomer complexes. + + Parameters + ---------- + isomer_complex_errors + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metrics keyed by name for all models. + """ + return {"MAE": isomer_complex_errors} + + +def test_isomer_complexes(metrics: dict[str, dict]) -> None: + """ + Run lanthanide isomer complexes benchmark analysis. + + Parameters + ---------- + metrics + All lanthanide isomer complex metrics. + """ + return diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml new file mode 100644 index 000000000..043a99279 --- /dev/null +++ b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 10.0 + unit: kcal/mol + tooltip: Mean absolute error for relative isomer energies + level_of_theory: r2SCAN-3c diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py new file mode 100644 index 000000000..b77619ade --- /dev/null +++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py @@ -0,0 +1,89 @@ +"""Run lanthanide isomer complex benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "Lanthanide Isomer Complexes" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html" + "#isomer-complexes" +) +DATA_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes" + + +class IsomerComplexesApp(BaseApp): + """Lanthanide isomer complex benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_isomer_complexes.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + # Use first model's structures for visualization + if MODELS: + structs_dir = DATA_PATH / MODELS[0] + structs = [ + f"assets/lanthanides/isomer_complexes/{MODELS[0]}/{struct_file.stem}.xyz" + for struct_file in sorted(structs_dir.glob("*.xyz")) + ] + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> IsomerComplexesApp: + """ + Get lanthanide isomer complex benchmark app layout and callback registration. + + Returns + ------- + IsomerComplexesApp + Benchmark layout and callback registration. + """ + return IsomerComplexesApp( + name=BENCHMARK_NAME, + description=( + "Relative energies of lanthanide isomer complexes compared to r2SCAN-3c." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "isomer_complexes_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + + # Construct layout and register callbacks + app_instance = get_app() + full_app.layout = app_instance.layout + app_instance.register_callbacks() + + # Run app + full_app.run(port=8061, debug=True) diff --git a/ml_peg/app/lanthanides/lanthanides.yml b/ml_peg/app/lanthanides/lanthanides.yml new file mode 100644 index 000000000..1ce5a83a7 --- /dev/null +++ b/ml_peg/app/lanthanides/lanthanides.yml @@ -0,0 +1,2 @@ +title: Lanthanides +description: Relative energies for lanthanide isomer complexes diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py new file mode 100644 index 000000000..691350752 --- /dev/null +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -0,0 +1,126 @@ +"""Run lanthanide isomer complex energy calculations.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase import units +from ase.io import read, write +import pytest +from tqdm import tqdm + +from ml_peg.calcs import CALCS_ROOT +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" +KCAL_PER_EV = units.mol / units.kcal + + +# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) +R2SCAN_REF: dict[str, dict[str, float]] = { + "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52}, + "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67}, + "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17}, + "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74}, + "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11}, + "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08}, + "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82}, + "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0}, + "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23}, +} + + +def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]: + """ + Load isomer entries from the structure root. + + Parameters + ---------- + struct_root + Root directory containing system/iso*/orca.xyz and optional .CHRG/.UHF. + + Returns + ------- + list[dict[str, Any]] + Entry dictionaries with system, isomer, xyz path, charge, multiplicity. + """ + entries: list[dict[str, Any]] = [] + for system_dir in sorted(struct_root.glob("*")): + if not system_dir.is_dir(): + continue + if system_dir.name not in R2SCAN_REF: + continue + for iso_dir in sorted(system_dir.glob("iso*")): + xyz_path = iso_dir / "orca.xyz" + if not xyz_path.exists(): + continue + charge_path = iso_dir / ".CHRG" + uhf_path = iso_dir / ".UHF" + charge = int(charge_path.read_text().strip()) if charge_path.exists() else 0 + multiplicity = int(uhf_path.read_text().strip()) if uhf_path.exists() else 1 + entries.append( + { + "system": system_dir.name, + "isomer": iso_dir.name, + "xyz": xyz_path, + "charge": charge, + "multiplicity": multiplicity, + } + ) + return entries + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_isomer_complexes(mlip: tuple[str, Any]) -> None: + """ + Run single-point energy calculations for lanthanide isomer complexes. + + Parameters + ---------- + mlip + Model name and MLIP calculator wrapper. + """ + # download lanthanide isomer complexes dataset + isomer_complexes_dir = ( + download_s3_data( + key="inputs/lanthanides/isomer_complexes/isomer_complexes.zip", + filename="isomer_complexes.zip", + ) + / "isomer_complexes" + ) + + entries = _load_isomer_entries(isomer_complexes_dir) + if not entries: + pytest.skip(f"No isomer structures found under {isomer_complexes_dir}.") + + model_name, model = mlip + calc = model.get_calculator() + + # results: list[dict[str, Any]] = [] + for entry in tqdm(entries, desc=f"Calculating energies for {model_name}"): + atoms = read(entry["xyz"]) + atoms.info["charge"] = entry["charge"] + atoms.info["spin_multiplicity"] = entry["multiplicity"] + atoms.info["spin"] = entry["multiplicity"] + atoms.calc = calc + energy_ev = float(atoms.get_potential_energy()) + energy_kcal = energy_ev * KCAL_PER_EV + + atoms.info["model"] = model_name + atoms.info["energy_ev"] = energy_ev + atoms.info["energy_kcal"] = energy_kcal + atoms.info["system"] = entry["system"] + atoms.info["isomer"] = entry["isomer"] + + atoms.info["ref_energy_kcal"] = R2SCAN_REF.get(entry["system"], {}).get( + entry["isomer"] + ) + + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{entry['system']}_{entry['isomer']}.xyz", atoms)