From f460a6e9c5e8d862affdb458ce3c51d89d9da069 Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:06:45 +0000 Subject: [PATCH 01/14] add lanthanide benchmark --- docs/source/user_guide/benchmarks/index.rst | 1 + .../user_guide/benchmarks/lanthanides.rst | 48 ++++ .../analyse_isomer_complexes.py | 247 ++++++++++++++++++ .../lanthanides/isomer_complexes/metrics.yml | 7 + .../isomer_complexes/app_isomer_complexes.py | 98 +++++++ ml_peg/app/lanthanides/lanthanides.yml | 2 + .../isomer_complexes/calc_isomer_complexes.py | 43 +++ 7 files changed, 446 insertions(+) create mode 100644 docs/source/user_guide/benchmarks/lanthanides.rst create mode 100644 ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py create mode 100644 ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml create mode 100644 ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py create mode 100644 ml_peg/app/lanthanides/lanthanides.yml create mode 100644 ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst index 9f339f4d2..33e0d7ba0 100644 --- a/docs/source/user_guide/benchmarks/index.rst +++ b/docs/source/user_guide/benchmarks/index.rst @@ -12,3 +12,4 @@ Benchmarks molecular_crystal molecular bulk_crystal + lanthanides diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst new file mode 100644 index 000000000..7c9317266 --- /dev/null +++ b/docs/source/user_guide/benchmarks/lanthanides.rst @@ -0,0 +1,48 @@ +=========== +Lanthanides +=========== + +Isomer complexes +================ + +Summary +------- + +Performance in predicting relative isomer energies for lanthanide complexes +compared to r2SCAN-3c DFT reference data. + + +Metrics +------- + +1. Relative isomer energy MAE + +Accuracy of relative isomer energy predictions. + +For each complex, the relative isomer energies are computed with respect to the +lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c +relative energies reported in the reference dataset. The r2SCAN-3c geometries are +used, with wB97X-V/def2-mTZVPP single-point calculations reported for validation +in the source study. + + +Computational cost +------------------ + +Low: tests are likely to take less than a minute to run on CPU once model outputs +are available. + + +Data availability +----------------- + +Input structures: + +* T. Rose, M. Bursch, J.-M. Mewes, and S. Grimme, Fast and Robust Modeling of + Lanthanide and Actinide Complexes, Biomolecules, and Molecular Crystals with + the Extended GFN-FF Model, Inorganic Chemistry 63 (2024) 19364-19374. + +Reference data: + +* Relative isomer energies from r2SCAN-3c (see Supporting Information of the + above reference). diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py new file mode 100644 index 000000000..50f93e01d --- /dev/null +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -0,0 +1,247 @@ +"""Analyse lanthanide isomer complex benchmark.""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +import shutil + +from dash import dash_table +import pandas as pd +import plotly.graph_objects as go +import pytest + +from ml_peg.analysis.utils.utils import calc_table_scores, load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT + +CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV" +STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES" + +CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" +OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes" +STRUCT_OUT_PATH = OUT_PATH / "structures" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) +R2SCAN_REF: dict[str, dict[str, float]] = { + "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, + "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, + "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20}, + "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00}, + "La_f1a50d": {"iso1": 0.00, "iso2": 3.11}, + "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74}, + "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61}, +} + + +def _resolve_csv_path() -> Path | None: + env_path = os.environ.get(CSV_ENV_VAR) + if env_path: + return Path(env_path).expanduser() + csv_path = CALC_PATH / "isomer_energies.csv" + return csv_path if csv_path.exists() else None + + +def _build_reference_df() -> pd.DataFrame: + records = [] + for system, iso_map in R2SCAN_REF.items(): + for iso, ref in iso_map.items(): + records.append({"system": system, "isomer": iso, "ref": ref}) + return pd.DataFrame.from_records(records) + + +def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]: + struct_map: dict[tuple, str] = {} + for _, row in reference_df.iterrows(): + system = row["system"] + iso = row["isomer"] + src = struct_root / system / iso / "orca.xyz" + if not src.exists(): + continue + dest_dir = STRUCT_OUT_PATH / system + dest_dir.mkdir(parents=True, exist_ok=True) + dest = dest_dir / f"{iso}.xyz" + shutil.copyfile(src, dest) + struct_map[(system, iso)] = ( + f"assets/lanthanides/isomer_complexes/structures/{system}/{iso}.xyz" + ) + return struct_map + + +def _build_table( + mae_by_model: dict[str, float | None], + model_order: list[str], +) -> None: + metrics_data = [] + for model in model_order: + metrics_data.append({"MLIP": model, "MAE": mae_by_model.get(model), "id": model}) + + metrics_data = calc_table_scores( + metrics_data, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, + ) + + metrics_columns = ( + {"name": "MLIP", "id": "MLIP"}, + {"name": "MAE", "id": "MAE"}, + {"name": "Score", "id": "Score"}, + ) + + summary_tooltips = { + "MLIP": "Model identifier, hover for configuration details.", + "Score": "Weighted score across metrics, Higher is better (normalised 0 to 1).", + } + tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips + + model_configs = {model: {} for model in model_order} + model_levels = {model: None for model in model_order} + metric_levels = { + metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory") + for metric_name in DEFAULT_THRESHOLDS + } + + model_name_map = {model: model for model in model_order} + + table = dash_table.DataTable( + metrics_data, + list(metrics_columns), + id="metrics", + tooltip_header=tooltip_header, + ) + + OUT_PATH.mkdir(parents=True, exist_ok=True) + with open(OUT_PATH / "isomer_complexes_metrics_table.json", "w") as fp: + json.dump( + { + "data": table.data, + "columns": table.columns, + "tooltip_header": tooltip_header, + "thresholds": DEFAULT_THRESHOLDS, + "weights": DEFAULT_WEIGHTS, + "model_levels_of_theory": model_levels, + "metric_levels_of_theory": metric_levels, + "model_configs": model_configs, + "model_name_map": model_name_map, + }, + fp, + ) + + +@pytest.fixture +def isomer_complex_outputs() -> dict[str, float | None]: + """ + Build outputs for lanthanide isomer complexes benchmark. + + Returns + ------- + dict[str, float | None] + Mean absolute errors by model. + """ + csv_path = _resolve_csv_path() + if csv_path is None: + pytest.skip( + "No lanthanide isomer CSV found. " + "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs." + ) + + df = pd.read_csv(csv_path) + if df.empty: + pytest.skip("Lanthanide isomer CSV is empty.") + + reference_df = _build_reference_df() + df = df.merge(reference_df, on=["system", "isomer"], how="inner") + if df.empty: + pytest.skip("No overlap between CSV entries and r2SCAN-3c reference data.") + + struct_map: dict[tuple, str] = {} + struct_root_env = os.environ.get(STRUCT_ENV_VAR) + if struct_root_env: + struct_root = Path(struct_root_env).expanduser() + if struct_root.exists(): + struct_map = _copy_structures(struct_root, reference_df) + + models = sorted(df["model"].unique().tolist()) + + mae_by_model: dict[str, float | None] = {} + fig = go.Figure() + + for model in models: + sub = df[df["model"] == model] + if sub.empty: + mae_by_model[model] = None + continue + + mae_by_model[model] = mae( + sub["ref"].tolist(), + sub["rel_energy_kcal"].tolist(), + ) + + customdata = [] + for _, row in sub.iterrows(): + struct_path = struct_map.get((row["system"], row["isomer"]), "") + customdata.append([struct_path, row["system"], row["isomer"]]) + + fig.add_trace( + go.Scatter( + x=sub["ref"], + y=sub["rel_energy_kcal"], + mode="markers", + name=model, + customdata=customdata, + hovertemplate=( + "%{customdata[1]} %{customdata[2]}
" + "r2SCAN-3c: %{x:.2f} kcal/mol
" + "Model: %{y:.2f} kcal/mol" + "" + ), + ) + ) + + min_val = min(df["ref"].min(), df["rel_energy_kcal"].min()) + max_val = max(df["ref"].max(), df["rel_energy_kcal"].max()) + pad = 0.5 + min_val -= pad + max_val += pad + + fig.add_trace( + go.Scatter( + x=[min_val, max_val], + y=[min_val, max_val], + mode="lines", + showlegend=False, + line={"color": "#7f7f7f", "dash": "dash"}, + hoverinfo="skip", + ) + ) + + fig.update_layout( + title="Lanthanide isomer relative energies", + xaxis_title="r2SCAN-3c Delta E (kcal/mol)", + yaxis_title="Model Delta E (kcal/mol)", + plot_bgcolor="#ffffff", + ) + + OUT_PATH.mkdir(parents=True, exist_ok=True) + fig.write_json(OUT_PATH / "figure_isomer_complexes.json") + _build_table(mae_by_model, models) + + return mae_by_model + + +def test_isomer_complexes(isomer_complex_outputs: dict[str, float | None]) -> None: + """ + Run lanthanide isomer complexes benchmark analysis. + + Parameters + ---------- + isomer_complex_outputs + Mean absolute errors for all models. + """ + return diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml new file mode 100644 index 000000000..043a99279 --- /dev/null +++ b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 10.0 + unit: kcal/mol + tooltip: Mean absolute error for relative isomer energies + level_of_theory: r2SCAN-3c diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py new file mode 100644 index 000000000..36182a4d0 --- /dev/null +++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py @@ -0,0 +1,98 @@ +"""Run lanthanide isomer complex benchmark app.""" + +from __future__ import annotations + +from dash import Dash, Input, Output, callback +from dash.html import Div, Iframe + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot +from ml_peg.app.utils.weas import generate_weas_html + +BENCHMARK_NAME = "Lanthanide Isomer Complexes" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html" + "#isomer-complexes" +) +DATA_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes" + + +class IsomerComplexesApp(BaseApp): + """Lanthanide isomer complex benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_isomer_complexes.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + @callback( + Output(f"{BENCHMARK_NAME}-struct-placeholder", "children"), + Input(f"{BENCHMARK_NAME}-figure", "clickData"), + ) + def show_structure(click_data) -> Div: + if not click_data: + return Div("Click on a model point to view the structure.") + + point = click_data.get("points", [{}])[0] + custom = point.get("customdata") or [] + if not custom or not custom[0]: + return Div("No structure available for this point.") + + struct_path = custom[0] + return Div( + Iframe( + srcDoc=generate_weas_html(struct_path, "struct", 0), + style={ + "height": "550px", + "width": "100%", + "border": "1px solid #ddd", + "borderRadius": "5px", + }, + ) + ) + + +def get_app() -> IsomerComplexesApp: + """ + Get lanthanide isomer complex benchmark app layout and callback registration. + + Returns + ------- + IsomerComplexesApp + Benchmark layout and callback registration. + """ + return IsomerComplexesApp( + name=BENCHMARK_NAME, + description=( + "Relative energies of lanthanide isomer complexes compared to r2SCAN-3c." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "isomer_complexes_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + + # Construct layout and register callbacks + app_instance = get_app() + full_app.layout = app_instance.layout + app_instance.register_callbacks() + + # Run app + full_app.run(port=8061, debug=True) diff --git a/ml_peg/app/lanthanides/lanthanides.yml b/ml_peg/app/lanthanides/lanthanides.yml new file mode 100644 index 000000000..1ce5a83a7 --- /dev/null +++ b/ml_peg/app/lanthanides/lanthanides.yml @@ -0,0 +1,2 @@ +title: Lanthanides +description: Relative energies for lanthanide isomer complexes diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py new file mode 100644 index 000000000..3bc8af183 --- /dev/null +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -0,0 +1,43 @@ +"""Stage lanthanide isomer complex energies for analysis.""" + +from __future__ import annotations + +import os +from pathlib import Path +import shutil + +import pytest + +from ml_peg.calcs import CALCS_ROOT + +OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" +CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV" + + +def _resolve_source_csv() -> Path | None: + env_path = os.environ.get(CSV_ENV_VAR) + if env_path: + return Path(env_path).expanduser() + default_path = OUT_PATH / "isomer_energies.csv" + if default_path.exists(): + return default_path + return None + + +def test_stage_isomer_complexes_csv() -> None: + """ + Stage the precomputed isomer energies CSV for analysis. + + Set `ML_PEG_LANTHANIDE_CSV` to point to the source CSV. + """ + source_csv = _resolve_source_csv() + if source_csv is None or not source_csv.exists(): + pytest.skip( + "No lanthanide isomer CSV found. " + "Set ML_PEG_LANTHANIDE_CSV to the isomer_energies.csv path." + ) + + OUT_PATH.mkdir(parents=True, exist_ok=True) + dest_csv = OUT_PATH / "isomer_energies.csv" + if source_csv.resolve() != dest_csv.resolve(): + shutil.copyfile(source_csv, dest_csv) From 33e18c767fd0fee97a2f05833215faf7de6d1ccf Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:16:30 +0000 Subject: [PATCH 02/14] Update lanthanides.rst --- docs/source/user_guide/benchmarks/lanthanides.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst index 7c9317266..e3c5f751f 100644 --- a/docs/source/user_guide/benchmarks/lanthanides.rst +++ b/docs/source/user_guide/benchmarks/lanthanides.rst @@ -21,16 +21,13 @@ Accuracy of relative isomer energy predictions. For each complex, the relative isomer energies are computed with respect to the lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c -relative energies reported in the reference dataset. The r2SCAN-3c geometries are -used, with wB97X-V/def2-mTZVPP single-point calculations reported for validation -in the source study. +relative energies reported in the reference dataset. Computational cost ------------------ -Low: tests are likely to take less than a minute to run on CPU once model outputs -are available. +Low: tests are likely to take less than a minute to run on CPU. Data availability From be14311a6a5974c07501e3aaa8243f528b9e2c59 Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:44:13 +0000 Subject: [PATCH 03/14] fix linter --- .../analyse_isomer_complexes.py | 47 ++++++++++++++++++- .../isomer_complexes/app_isomer_complexes.py | 13 +++++ .../isomer_complexes/calc_isomer_complexes.py | 8 ++++ 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index 50f93e01d..bf0ffe120 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -41,6 +41,14 @@ def _resolve_csv_path() -> Path | None: + """ + Resolve the source CSV path for isomer energies. + + Returns + ------- + Path | None + CSV path if found, otherwise ``None``. + """ env_path = os.environ.get(CSV_ENV_VAR) if env_path: return Path(env_path).expanduser() @@ -49,6 +57,14 @@ def _resolve_csv_path() -> Path | None: def _build_reference_df() -> pd.DataFrame: + """ + Build a reference dataframe from the r2SCAN-3c table. + + Returns + ------- + pandas.DataFrame + Dataframe with columns: system, isomer, ref. + """ records = [] for system, iso_map in R2SCAN_REF.items(): for iso, ref in iso_map.items(): @@ -57,6 +73,21 @@ def _build_reference_df() -> pd.DataFrame: def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]: + """ + Copy reference structures into the app assets directory. + + Parameters + ---------- + struct_root + Root directory containing isomer structures. + reference_df + Dataframe of systems/isomers to copy. + + Returns + ------- + dict[tuple, str] + Mapping of (system, isomer) to asset path. + """ struct_map: dict[tuple, str] = {} for _, row in reference_df.iterrows(): system = row["system"] @@ -78,9 +109,21 @@ def _build_table( mae_by_model: dict[str, float | None], model_order: list[str], ) -> None: + """ + Build the metrics table JSON for the app. + + Parameters + ---------- + mae_by_model + MAE values keyed by model name. + model_order + Ordered list of model names to include. + """ metrics_data = [] for model in model_order: - metrics_data.append({"MLIP": model, "MAE": mae_by_model.get(model), "id": model}) + metrics_data.append( + {"MLIP": model, "MAE": mae_by_model.get(model), "id": model} + ) metrics_data = calc_table_scores( metrics_data, @@ -101,7 +144,7 @@ def _build_table( tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips model_configs = {model: {} for model in model_order} - model_levels = {model: None for model in model_order} + model_levels = dict.fromkeys(model_order) metric_levels = { metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory") for metric_name in DEFAULT_THRESHOLDS diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py index 36182a4d0..eb5bcaea3 100644 --- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py +++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py @@ -40,6 +40,19 @@ def register_callbacks(self) -> None: Input(f"{BENCHMARK_NAME}-figure", "clickData"), ) def show_structure(click_data) -> Div: + """ + Render a structure viewer for the clicked point. + + Parameters + ---------- + click_data + Plotly click payload from the parity scatter. + + Returns + ------- + Div + Viewer iframe or placeholder message. + """ if not click_data: return Div("Click on a model point to view the structure.") diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index 3bc8af183..4f917f51e 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -15,6 +15,14 @@ def _resolve_source_csv() -> Path | None: + """ + Resolve the source CSV path to stage for analysis. + + Returns + ------- + Path | None + CSV path if found, otherwise ``None``. + """ env_path = os.environ.get(CSV_ENV_VAR) if env_path: return Path(env_path).expanduser() From b98082a9889db50627ea739d272fc3cf90dc6796 Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:52:22 +0000 Subject: [PATCH 04/14] fix the calculator lanthanide --- .../analyse_isomer_complexes.py | 30 ++-- .../isomer_complexes/calc_isomer_complexes.py | 163 +++++++++++++++--- 2 files changed, 162 insertions(+), 31 deletions(-) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index bf0ffe120..d9fcc4d44 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -40,20 +40,31 @@ } -def _resolve_csv_path() -> Path | None: +def _load_isomer_dataframe() -> pd.DataFrame | None: """ - Resolve the source CSV path for isomer energies. + Load isomer energies from a CSV file or per-model outputs. Returns ------- - Path | None - CSV path if found, otherwise ``None``. + pandas.DataFrame | None + Loaded dataframe, or ``None`` if no data are found. """ env_path = os.environ.get(CSV_ENV_VAR) if env_path: - return Path(env_path).expanduser() - csv_path = CALC_PATH / "isomer_energies.csv" - return csv_path if csv_path.exists() else None + path = Path(env_path).expanduser() + if path.exists(): + return pd.read_csv(path) + + combined_path = CALC_PATH / "isomer_energies.csv" + if combined_path.exists(): + return pd.read_csv(combined_path) + + csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv")) + if not csv_paths: + return None + + frames = [pd.read_csv(path) for path in csv_paths] + return pd.concat(frames, ignore_index=True) if frames else None def _build_reference_df() -> pd.DataFrame: @@ -187,14 +198,13 @@ def isomer_complex_outputs() -> dict[str, float | None]: dict[str, float | None] Mean absolute errors by model. """ - csv_path = _resolve_csv_path() - if csv_path is None: + df = _load_isomer_dataframe() + if df is None: pytest.skip( "No lanthanide isomer CSV found. " "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs." ) - df = pd.read_csv(csv_path) if df.empty: pytest.skip("Lanthanide isomer CSV is empty.") diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index 4f917f51e..b3413b612 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -1,51 +1,172 @@ -"""Stage lanthanide isomer complex energies for analysis.""" +"""Run lanthanide isomer complex energy calculations.""" from __future__ import annotations import os from pathlib import Path -import shutil +from typing import Any +from ase.io import read import pytest from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" -CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV" +STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES" +KCAL_PER_EV = 23.060547 -def _resolve_source_csv() -> Path | None: +def _resolve_structure_root() -> Path | None: """ - Resolve the source CSV path to stage for analysis. + Resolve the root directory containing isomer structures. Returns ------- Path | None - CSV path if found, otherwise ``None``. + Structure root path if found, otherwise ``None``. """ - env_path = os.environ.get(CSV_ENV_VAR) + env_path = os.environ.get(STRUCT_ENV_VAR) if env_path: return Path(env_path).expanduser() - default_path = OUT_PATH / "isomer_energies.csv" - if default_path.exists(): - return default_path return None -def test_stage_isomer_complexes_csv() -> None: +def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]: """ - Stage the precomputed isomer energies CSV for analysis. + Load isomer entries from the structure root. + + Parameters + ---------- + struct_root + Root directory containing system/iso*/orca.xyz and optional .CHRG/.UHF. - Set `ML_PEG_LANTHANIDE_CSV` to point to the source CSV. + Returns + ------- + list[dict[str, Any]] + Entry dictionaries with system, isomer, xyz path, charge, multiplicity. """ - source_csv = _resolve_source_csv() - if source_csv is None or not source_csv.exists(): + entries: list[dict[str, Any]] = [] + for system_dir in sorted(struct_root.glob("*")): + if not system_dir.is_dir(): + continue + for iso_dir in sorted(system_dir.glob("iso*")): + xyz_path = iso_dir / "orca.xyz" + if not xyz_path.exists(): + continue + charge_path = iso_dir / ".CHRG" + uhf_path = iso_dir / ".UHF" + charge = ( + float(charge_path.read_text().strip()) if charge_path.exists() else 0.0 + ) + multiplicity = ( + int(float(uhf_path.read_text().strip())) if uhf_path.exists() else 1 + ) + entries.append( + { + "system": system_dir.name, + "isomer": iso_dir.name, + "xyz": xyz_path, + "charge": charge, + "multiplicity": multiplicity, + } + ) + return entries + + +def _write_model_csv( + model_name: str, rows: list[dict[str, Any]], out_dir: Path +) -> None: + """ + Write a per-model CSV of isomer energies. + + Parameters + ---------- + model_name + Model identifier. + rows + Rows containing per-isomer energies and metadata. + out_dir + Output directory for the CSV file. + """ + import csv + + out_dir.mkdir(parents=True, exist_ok=True) + csv_path = out_dir / "isomer_energies.csv" + fieldnames = [ + "model", + "system", + "isomer", + "energy_ev", + "energy_kcal", + "rel_energy_kcal", + "charge", + "multiplicity", + ] + with csv_path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow({k: row.get(k, "") for k in fieldnames}) + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_isomer_complexes(mlip: tuple[str, Any]) -> None: + """ + Run single-point energy calculations for lanthanide isomer complexes. + + Parameters + ---------- + mlip + Model name and MLIP calculator wrapper. + """ + struct_root = _resolve_structure_root() + if struct_root is None or not struct_root.exists(): pytest.skip( - "No lanthanide isomer CSV found. " - "Set ML_PEG_LANTHANIDE_CSV to the isomer_energies.csv path." + "No lanthanide structure root found. " + "Set ML_PEG_LANTHANIDE_STRUCTURES to the isomer_structures path." + ) + + entries = _load_isomer_entries(struct_root) + if not entries: + pytest.skip(f"No isomer structures found under {struct_root}.") + + model_name, model = mlip + calc = model.get_calculator() + + results: list[dict[str, Any]] = [] + for entry in entries: + atoms = read(entry["xyz"]) + atoms.info["charge"] = entry["charge"] + atoms.info["spin_multiplicity"] = entry["multiplicity"] + atoms.info["spin"] = (entry["multiplicity"] - 1) / 2 + atoms.calc = calc + energy_ev = float(atoms.get_potential_energy()) + energy_kcal = energy_ev * KCAL_PER_EV + results.append( + { + "model": model_name, + "system": entry["system"], + "isomer": entry["isomer"], + "energy_ev": energy_ev, + "energy_kcal": energy_kcal, + "charge": entry["charge"], + "multiplicity": entry["multiplicity"], + } ) - OUT_PATH.mkdir(parents=True, exist_ok=True) - dest_csv = OUT_PATH / "isomer_energies.csv" - if source_csv.resolve() != dest_csv.resolve(): - shutil.copyfile(source_csv, dest_csv) + results.sort(key=lambda row: (row["model"], row["system"], row["isomer"])) + grouped: dict[tuple[str, str], list[dict[str, Any]]] = {} + for row in results: + key = (row["model"], row["system"]) + grouped.setdefault(key, []).append(row) + + for rows in grouped.values(): + min_energy = min(row["energy_kcal"] for row in rows) + for row in rows: + row["rel_energy_kcal"] = row["energy_kcal"] - min_energy + + _write_model_csv(model_name, results, OUT_PATH / model_name) From 8e656ee87676c5955f6bb88a1ec54290d9985974 Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:00:12 +0000 Subject: [PATCH 05/14] use better decorators --- .../analyse_isomer_complexes.py | 300 ++++++------------ .../isomer_complexes/app_isomer_complexes.py | 62 ++-- 2 files changed, 117 insertions(+), 245 deletions(-) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index d9fcc4d44..693feaff4 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -2,26 +2,21 @@ from __future__ import annotations -import json -import os from pathlib import Path -import shutil -from dash import dash_table import pandas as pd -import plotly.graph_objects as go import pytest -from ml_peg.analysis.utils.utils import calc_table_scores, load_metrics_config, mae +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import load_metrics_config, mae from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models -CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV" -STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES" - +MODELS = get_model_names(current_models) CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes" -STRUCT_OUT_PATH = OUT_PATH / "structures" METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( @@ -40,31 +35,22 @@ } -def _load_isomer_dataframe() -> pd.DataFrame | None: +def _load_isomer_dataframe() -> pd.DataFrame: """ - Load isomer energies from a CSV file or per-model outputs. + Load isomer energies from per-model outputs. Returns ------- - pandas.DataFrame | None - Loaded dataframe, or ``None`` if no data are found. + pandas.DataFrame + Loaded dataframe, or an empty dataframe if no data are found. """ - env_path = os.environ.get(CSV_ENV_VAR) - if env_path: - path = Path(env_path).expanduser() - if path.exists(): - return pd.read_csv(path) - combined_path = CALC_PATH / "isomer_energies.csv" if combined_path.exists(): return pd.read_csv(combined_path) csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv")) - if not csv_paths: - return None - frames = [pd.read_csv(path) for path in csv_paths] - return pd.concat(frames, ignore_index=True) if frames else None + return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame() def _build_reference_df() -> pd.DataFrame: @@ -83,218 +69,124 @@ def _build_reference_df() -> pd.DataFrame: return pd.DataFrame.from_records(records) -def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]: - """ - Copy reference structures into the app assets directory. +REFERENCE_DF = _build_reference_df().sort_values(["system", "isomer"]).reset_index( + drop=True +) +REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]]) +REFERENCE_HOVERDATA = { + "System": REFERENCE_DF["system"].tolist(), + "Isomer": REFERENCE_DF["isomer"].tolist(), +} - Parameters - ---------- - struct_root - Root directory containing isomer structures. - reference_df - Dataframe of systems/isomers to copy. + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_isomer_complexes.json", + title="Lanthanide isomer relative energies", + x_label="Model Delta E (kcal/mol)", + y_label="r2SCAN-3c Delta E (kcal/mol)", + hoverdata=REFERENCE_HOVERDATA, +) +def isomer_relative_energies() -> dict[str, list]: + """ + Build parity data for lanthanide isomer complexes benchmark. Returns ------- - dict[tuple, str] - Mapping of (system, isomer) to asset path. + dict[str, list] + Reference and per-model relative energies. """ - struct_map: dict[tuple, str] = {} - for _, row in reference_df.iterrows(): - system = row["system"] - iso = row["isomer"] - src = struct_root / system / iso / "orca.xyz" - if not src.exists(): - continue - dest_dir = STRUCT_OUT_PATH / system - dest_dir.mkdir(parents=True, exist_ok=True) - dest = dest_dir / f"{iso}.xyz" - shutil.copyfile(src, dest) - struct_map[(system, iso)] = ( - f"assets/lanthanides/isomer_complexes/structures/{system}/{iso}.xyz" + df = _load_isomer_dataframe() + df = df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") if not df.empty else df + + prediction_table = pd.DataFrame(index=REFERENCE_INDEX) + if not df.empty: + prediction_table = ( + df.pivot_table( + index=["system", "isomer"], + columns="model", + values="rel_energy_kcal", + aggfunc="first", + ) + .reindex(REFERENCE_INDEX) ) - return struct_map + results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()} + for model in MODELS: + if model in prediction_table.columns: + series = prediction_table[model] + results[model] = series.where(series.notna(), None).tolist() + else: + results[model] = [None] * len(results["ref"]) -def _build_table( - mae_by_model: dict[str, float | None], - model_order: list[str], -) -> None: - """ - Build the metrics table JSON for the app. - - Parameters - ---------- - mae_by_model - MAE values keyed by model name. - model_order - Ordered list of model names to include. - """ - metrics_data = [] - for model in model_order: - metrics_data.append( - {"MLIP": model, "MAE": mae_by_model.get(model), "id": model} - ) - - metrics_data = calc_table_scores( - metrics_data, - thresholds=DEFAULT_THRESHOLDS, - weights=DEFAULT_WEIGHTS, - ) - - metrics_columns = ( - {"name": "MLIP", "id": "MLIP"}, - {"name": "MAE", "id": "MAE"}, - {"name": "Score", "id": "Score"}, - ) - - summary_tooltips = { - "MLIP": "Model identifier, hover for configuration details.", - "Score": "Weighted score across metrics, Higher is better (normalised 0 to 1).", - } - tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips - - model_configs = {model: {} for model in model_order} - model_levels = dict.fromkeys(model_order) - metric_levels = { - metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory") - for metric_name in DEFAULT_THRESHOLDS - } - - model_name_map = {model: model for model in model_order} - - table = dash_table.DataTable( - metrics_data, - list(metrics_columns), - id="metrics", - tooltip_header=tooltip_header, - ) - - OUT_PATH.mkdir(parents=True, exist_ok=True) - with open(OUT_PATH / "isomer_complexes_metrics_table.json", "w") as fp: - json.dump( - { - "data": table.data, - "columns": table.columns, - "tooltip_header": tooltip_header, - "thresholds": DEFAULT_THRESHOLDS, - "weights": DEFAULT_WEIGHTS, - "model_levels_of_theory": model_levels, - "metric_levels_of_theory": metric_levels, - "model_configs": model_configs, - "model_name_map": model_name_map, - }, - fp, - ) + return results @pytest.fixture -def isomer_complex_outputs() -> dict[str, float | None]: +def isomer_complex_outputs( + isomer_relative_energies: dict[str, list], +) -> dict[str, float | None]: """ Build outputs for lanthanide isomer complexes benchmark. + Parameters + ---------- + isomer_relative_energies + Reference and per-model relative energies. + Returns ------- dict[str, float | None] Mean absolute errors by model. """ - df = _load_isomer_dataframe() - if df is None: - pytest.skip( - "No lanthanide isomer CSV found. " - "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs." - ) - - if df.empty: - pytest.skip("Lanthanide isomer CSV is empty.") - - reference_df = _build_reference_df() - df = df.merge(reference_df, on=["system", "isomer"], how="inner") - if df.empty: - pytest.skip("No overlap between CSV entries and r2SCAN-3c reference data.") - - struct_map: dict[tuple, str] = {} - struct_root_env = os.environ.get(STRUCT_ENV_VAR) - if struct_root_env: - struct_root = Path(struct_root_env).expanduser() - if struct_root.exists(): - struct_map = _copy_structures(struct_root, reference_df) - - models = sorted(df["model"].unique().tolist()) - + ref_vals = isomer_relative_energies["ref"] mae_by_model: dict[str, float | None] = {} - fig = go.Figure() - - for model in models: - sub = df[df["model"] == model] - if sub.empty: + for model in MODELS: + preds = isomer_relative_energies[model] + pairs = [ + (ref, pred) + for ref, pred in zip(ref_vals, preds, strict=True) + if pred is not None + ] + if not pairs: mae_by_model[model] = None continue + ref, pred = zip(*pairs, strict=True) + mae_by_model[model] = mae(list(ref), list(pred)) + return mae_by_model - mae_by_model[model] = mae( - sub["ref"].tolist(), - sub["rel_energy_kcal"].tolist(), - ) - - customdata = [] - for _, row in sub.iterrows(): - struct_path = struct_map.get((row["system"], row["isomer"]), "") - customdata.append([struct_path, row["system"], row["isomer"]]) - - fig.add_trace( - go.Scatter( - x=sub["ref"], - y=sub["rel_energy_kcal"], - mode="markers", - name=model, - customdata=customdata, - hovertemplate=( - "%{customdata[1]} %{customdata[2]}
" - "r2SCAN-3c: %{x:.2f} kcal/mol
" - "Model: %{y:.2f} kcal/mol" - "" - ), - ) - ) - - min_val = min(df["ref"].min(), df["rel_energy_kcal"].min()) - max_val = max(df["ref"].max(), df["rel_energy_kcal"].max()) - pad = 0.5 - min_val -= pad - max_val += pad - - fig.add_trace( - go.Scatter( - x=[min_val, max_val], - y=[min_val, max_val], - mode="lines", - showlegend=False, - line={"color": "#7f7f7f", "dash": "dash"}, - hoverinfo="skip", - ) - ) - fig.update_layout( - title="Lanthanide isomer relative energies", - xaxis_title="r2SCAN-3c Delta E (kcal/mol)", - yaxis_title="Model Delta E (kcal/mol)", - plot_bgcolor="#ffffff", - ) +@pytest.fixture +@build_table( + filename=OUT_PATH / "isomer_complexes_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, +) +def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]: + """ + Collect metrics for lanthanide isomer complexes. - OUT_PATH.mkdir(parents=True, exist_ok=True) - fig.write_json(OUT_PATH / "figure_isomer_complexes.json") - _build_table(mae_by_model, models) + Parameters + ---------- + isomer_complex_outputs + Mean absolute errors for all models. - return mae_by_model + Returns + ------- + dict[str, dict] + Metrics keyed by name for all models. + """ + return {"MAE": isomer_complex_outputs} -def test_isomer_complexes(isomer_complex_outputs: dict[str, float | None]) -> None: +def test_isomer_complexes(metrics: dict[str, dict]) -> None: """ Run lanthanide isomer complexes benchmark analysis. Parameters ---------- - isomer_complex_outputs - Mean absolute errors for all models. + metrics + All lanthanide isomer complex metrics. """ return diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py index eb5bcaea3..35c925825 100644 --- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py +++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py @@ -2,14 +2,13 @@ from __future__ import annotations -from dash import Dash, Input, Output, callback -from dash.html import Div, Iframe +from dash import Dash +from dash.html import Div from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter from ml_peg.app.utils.load import read_plot -from ml_peg.app.utils.weas import generate_weas_html BENCHMARK_NAME = "Lanthanide Isomer Complexes" DOCS_URL = ( @@ -35,44 +34,25 @@ def register_callbacks(self) -> None: column_to_plot={"MAE": scatter}, ) - @callback( - Output(f"{BENCHMARK_NAME}-struct-placeholder", "children"), - Input(f"{BENCHMARK_NAME}-figure", "clickData"), - ) - def show_structure(click_data) -> Div: - """ - Render a structure viewer for the clicked point. - - Parameters - ---------- - click_data - Plotly click payload from the parity scatter. - - Returns - ------- - Div - Viewer iframe or placeholder message. - """ - if not click_data: - return Div("Click on a model point to view the structure.") - - point = click_data.get("points", [{}])[0] - custom = point.get("customdata") or [] - if not custom or not custom[0]: - return Div("No structure available for this point.") - - struct_path = custom[0] - return Div( - Iframe( - srcDoc=generate_weas_html(struct_path, "struct", 0), - style={ - "height": "550px", - "width": "100%", - "border": "1px solid #ddd", - "borderRadius": "5px", - }, + struct_root = DATA_PATH / "structures" + if struct_root.exists(): + structs = [] + for system_dir in sorted(struct_root.glob("*")): + if not system_dir.is_dir(): + continue + for struct_file in sorted(system_dir.glob("*.xyz")): + structs.append( + f"assets/lanthanides/isomer_complexes/structures/" + f"{system_dir.name}/{struct_file.name}" + ) + + if structs: + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", ) - ) def get_app() -> IsomerComplexesApp: From 38bfb18bffe729208cdf7b9a75cd94d42ffb6489 Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:06:41 +0000 Subject: [PATCH 06/14] fix linting --- .../analyse_isomer_complexes.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index 693feaff4..00dd0b206 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -69,8 +69,8 @@ def _build_reference_df() -> pd.DataFrame: return pd.DataFrame.from_records(records) -REFERENCE_DF = _build_reference_df().sort_values(["system", "isomer"]).reset_index( - drop=True +REFERENCE_DF = ( + _build_reference_df().sort_values(["system", "isomer"]).reset_index(drop=True) ) REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]]) REFERENCE_HOVERDATA = { @@ -97,19 +97,20 @@ def isomer_relative_energies() -> dict[str, list]: Reference and per-model relative energies. """ df = _load_isomer_dataframe() - df = df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") if not df.empty else df + df = ( + df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") + if not df.empty + else df + ) prediction_table = pd.DataFrame(index=REFERENCE_INDEX) if not df.empty: - prediction_table = ( - df.pivot_table( - index=["system", "isomer"], - columns="model", - values="rel_energy_kcal", - aggfunc="first", - ) - .reindex(REFERENCE_INDEX) - ) + prediction_table = df.pivot_table( + index=["system", "isomer"], + columns="model", + values="rel_energy_kcal", + aggfunc="first", + ).reindex(REFERENCE_INDEX) results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()} for model in MODELS: From cb6d4a31d94b536e05602fd7f7dd32c3e3eefa5a Mon Sep 17 00:00:00 2001 From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:08:41 +0000 Subject: [PATCH 07/14] fix the multiplicity --- .../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index b3413b612..32632f5cd 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -142,7 +142,7 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None: atoms = read(entry["xyz"]) atoms.info["charge"] = entry["charge"] atoms.info["spin_multiplicity"] = entry["multiplicity"] - atoms.info["spin"] = (entry["multiplicity"] - 1) / 2 + atoms.info["spin"] = entry["multiplicity"] atoms.calc = calc energy_ev = float(atoms.get_potential_energy()) energy_kcal = energy_ev * KCAL_PER_EV From 230e56d6942b79e73ead270505a4032651c3e1a6 Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Fri, 30 Jan 2026 14:57:03 +0000 Subject: [PATCH 08/14] add s3 download and save files as xyz --- .../isomer_complexes/calc_isomer_complexes.py | 117 ++++++------------ 1 file changed, 35 insertions(+), 82 deletions(-) diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index 32632f5cd..e4e7d4a62 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -2,37 +2,34 @@ from __future__ import annotations -import os from pathlib import Path from typing import Any -from ase.io import read +from ase.io import read, write import pytest +from tqdm import tqdm from ml_peg.calcs import CALCS_ROOT +from ml_peg.calcs.utils.utils import download_s3_data from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models MODELS = load_models(current_models) OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" -STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES" KCAL_PER_EV = 23.060547 -def _resolve_structure_root() -> Path | None: - """ - Resolve the root directory containing isomer structures. - - Returns - ------- - Path | None - Structure root path if found, otherwise ``None``. - """ - env_path = os.environ.get(STRUCT_ENV_VAR) - if env_path: - return Path(env_path).expanduser() - return None +# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) +R2SCAN_REF: dict[str, dict[str, float]] = { + "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, + "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, + "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20}, + "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00}, + "La_f1a50d": {"iso1": 0.00, "iso2": 3.11}, + "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74}, + "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61}, +} def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]: @@ -77,42 +74,6 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]: return entries -def _write_model_csv( - model_name: str, rows: list[dict[str, Any]], out_dir: Path -) -> None: - """ - Write a per-model CSV of isomer energies. - - Parameters - ---------- - model_name - Model identifier. - rows - Rows containing per-isomer energies and metadata. - out_dir - Output directory for the CSV file. - """ - import csv - - out_dir.mkdir(parents=True, exist_ok=True) - csv_path = out_dir / "isomer_energies.csv" - fieldnames = [ - "model", - "system", - "isomer", - "energy_ev", - "energy_kcal", - "rel_energy_kcal", - "charge", - "multiplicity", - ] - with csv_path.open("w", newline="") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - for row in rows: - writer.writerow({k: row.get(k, "") for k in fieldnames}) - - @pytest.mark.parametrize("mlip", MODELS.items()) def test_isomer_complexes(mlip: tuple[str, Any]) -> None: """ @@ -123,22 +84,24 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None: mlip Model name and MLIP calculator wrapper. """ - struct_root = _resolve_structure_root() - if struct_root is None or not struct_root.exists(): - pytest.skip( - "No lanthanide structure root found. " - "Set ML_PEG_LANTHANIDE_STRUCTURES to the isomer_structures path." + # download lanthanide isomer complexes dataset + isomer_complexes_dir = ( + download_s3_data( + key="inputs/lanthanides/isomer_complexes/isomer_complexes.zip", + filename="isomer_complexes.zip", ) + / "isomer_complexes" + ) - entries = _load_isomer_entries(struct_root) + entries = _load_isomer_entries(isomer_complexes_dir) if not entries: - pytest.skip(f"No isomer structures found under {struct_root}.") + pytest.skip(f"No isomer structures found under {isomer_complexes_dir}.") model_name, model = mlip calc = model.get_calculator() - results: list[dict[str, Any]] = [] - for entry in entries: + # results: list[dict[str, Any]] = [] + for entry in tqdm(entries, desc=f"Calculating energies for {model_name}"): atoms = read(entry["xyz"]) atoms.info["charge"] = entry["charge"] atoms.info["spin_multiplicity"] = entry["multiplicity"] @@ -146,27 +109,17 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None: atoms.calc = calc energy_ev = float(atoms.get_potential_energy()) energy_kcal = energy_ev * KCAL_PER_EV - results.append( - { - "model": model_name, - "system": entry["system"], - "isomer": entry["isomer"], - "energy_ev": energy_ev, - "energy_kcal": energy_kcal, - "charge": entry["charge"], - "multiplicity": entry["multiplicity"], - } - ) - results.sort(key=lambda row: (row["model"], row["system"], row["isomer"])) - grouped: dict[tuple[str, str], list[dict[str, Any]]] = {} - for row in results: - key = (row["model"], row["system"]) - grouped.setdefault(key, []).append(row) + atoms.info["model"] = model_name + atoms.info["energy_ev"] = energy_ev + atoms.info["energy_kcal"] = energy_kcal + atoms.info["system"] = entry["system"] + atoms.info["isomer"] = entry["isomer"] - for rows in grouped.values(): - min_energy = min(row["energy_kcal"] for row in rows) - for row in rows: - row["rel_energy_kcal"] = row["energy_kcal"] - min_energy + atoms.info["ref_energy_kcal"] = R2SCAN_REF.get(entry["system"], {}).get( + entry["isomer"] + ) - _write_model_csv(model_name, results, OUT_PATH / model_name) + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{entry['system']}_{entry['isomer']}.xyz", atoms) From 9946eaae8246dd7a4525655092cf8ca4d8b31991 Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Fri, 30 Jan 2026 15:12:14 +0000 Subject: [PATCH 09/14] only calculate energies we have references for --- .../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index e4e7d4a62..1658ce1d8 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -50,6 +50,8 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]: for system_dir in sorted(struct_root.glob("*")): if not system_dir.is_dir(): continue + if system_dir.name not in R2SCAN_REF: + continue for iso_dir in sorted(system_dir.glob("iso*")): xyz_path = iso_dir / "orca.xyz" if not xyz_path.exists(): From 4a8a0bdcff7749444491650bfb602289a7e10809 Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Fri, 30 Jan 2026 15:16:24 +0000 Subject: [PATCH 10/14] analysis makeover and add structure visualisaiton to app --- .../analyse_isomer_complexes.py | 172 ++++++++++-------- .../isomer_complexes/app_isomer_complexes.py | 36 ++-- 2 files changed, 118 insertions(+), 90 deletions(-) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index 00dd0b206..73742bdd0 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -4,7 +4,7 @@ from pathlib import Path -import pandas as pd +from ase.io import read, write import pytest from ml_peg.analysis.utils.decorators import build_table, plot_parity @@ -24,6 +24,7 @@ ) # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) +# These are relative energies (relative to lowest energy isomer for each system) R2SCAN_REF: dict[str, dict[str, float]] = { "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, @@ -35,48 +36,62 @@ } -def _load_isomer_dataframe() -> pd.DataFrame: +def get_system_names() -> list[str]: """ - Load isomer energies from per-model outputs. + Get sorted list of system names. Returns ------- - pandas.DataFrame - Loaded dataframe, or an empty dataframe if no data are found. + list[str] + Sorted list of system names from R2SCAN_REF. """ - combined_path = CALC_PATH / "isomer_energies.csv" - if combined_path.exists(): - return pd.read_csv(combined_path) + return sorted(R2SCAN_REF.keys()) - csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv")) - frames = [pd.read_csv(path) for path in csv_paths] - return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame() +def get_reference_keys() -> list[tuple[str, str]]: + """ + Get sorted list of (system, isomer) tuples for consistent ordering. -def _build_reference_df() -> pd.DataFrame: + Returns + ------- + list[tuple[str, str]] + List of (system, isomer) tuples sorted by system then isomer. """ - Build a reference dataframe from the r2SCAN-3c table. + system_names = get_system_names() + return [ + (system, isomer) + for system in system_names + for isomer in sorted(R2SCAN_REF[system].keys()) + ] + + +def get_reference_values() -> list[float]: + """ + Get reference relative energies in sorted order. Returns ------- - pandas.DataFrame - Dataframe with columns: system, isomer, ref. + list[float] + Reference relative energies matching the order of get_reference_keys(). """ - records = [] - for system, iso_map in R2SCAN_REF.items(): - for iso, ref in iso_map.items(): - records.append({"system": system, "isomer": iso, "ref": ref}) - return pd.DataFrame.from_records(records) + reference_keys = get_reference_keys() + return [R2SCAN_REF[system][isomer] for system, isomer in reference_keys] -REFERENCE_DF = ( - _build_reference_df().sort_values(["system", "isomer"]).reset_index(drop=True) -) -REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]]) -REFERENCE_HOVERDATA = { - "System": REFERENCE_DF["system"].tolist(), - "Isomer": REFERENCE_DF["isomer"].tolist(), -} +def build_hoverdata() -> dict[str, list[str]]: + """ + Build hoverdata dictionary for parity plot. + + Returns + ------- + dict[str, list[str]] + Dictionary with "System" and "Isomer" keys for hover information. + """ + reference_keys = get_reference_keys() + return { + "System": [system for system, _ in reference_keys], + "Isomer": [isomer for _, isomer in reference_keys], + } @pytest.fixture @@ -85,7 +100,7 @@ def _build_reference_df() -> pd.DataFrame: title="Lanthanide isomer relative energies", x_label="Model Delta E (kcal/mol)", y_label="r2SCAN-3c Delta E (kcal/mol)", - hoverdata=REFERENCE_HOVERDATA, + hoverdata=build_hoverdata(), ) def isomer_relative_energies() -> dict[str, list]: """ @@ -96,65 +111,80 @@ def isomer_relative_energies() -> dict[str, list]: dict[str, list] Reference and per-model relative energies. """ - df = _load_isomer_dataframe() - df = ( - df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") - if not df.empty - else df - ) - - prediction_table = pd.DataFrame(index=REFERENCE_INDEX) - if not df.empty: - prediction_table = df.pivot_table( - index=["system", "isomer"], - columns="model", - values="rel_energy_kcal", - aggfunc="first", - ).reindex(REFERENCE_INDEX) - - results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()} - for model in MODELS: - if model in prediction_table.columns: - series = prediction_table[model] - results[model] = series.where(series.notna(), None).tolist() - else: - results[model] = [None] * len(results["ref"]) + results = {"ref": get_reference_values()} | {mlip: [] for mlip in MODELS} + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if not model_dir.exists(): + # Model directory doesn't exist, fill with None + results[model_name] = [None] * len(get_reference_keys()) + continue + + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + + # Process each system separately to compute relative energies + preds: list[float | None] = [] + for system_name in get_system_names(): + # Collect all isomers for this system + isomer_data: dict[str, tuple[float, object]] = {} + for isomer in sorted(R2SCAN_REF[system_name].keys()): + xyz_path = model_dir / f"{system_name}_{isomer}.xyz" + if xyz_path.exists(): + atoms = read(xyz_path) + energy_kcal = atoms.info.get("energy_kcal") + if energy_kcal is not None: + isomer_data[isomer] = (energy_kcal, atoms) + + # Compute relative energies + min_energy = min(energy for energy, _ in isomer_data.values()) + + # Add predictions in sorted isomer order + for isomer in sorted(R2SCAN_REF[system_name].keys()): + if isomer in isomer_data: + energy_kcal, atoms = isomer_data[isomer] + rel_energy = energy_kcal - min_energy + preds.append(rel_energy) + + # Copy structure to app directory + write(structs_dir / f"{system_name}_{isomer}.xyz", atoms) + else: + preds.append(None) + + results[model_name] = preds return results @pytest.fixture -def isomer_complex_outputs( - isomer_relative_energies: dict[str, list], -) -> dict[str, float | None]: +def isomer_complex_errors(isomer_relative_energies) -> dict[str, float | None]: """ - Build outputs for lanthanide isomer complexes benchmark. + Get mean absolute error for relative energies. Parameters ---------- isomer_relative_energies - Reference and per-model relative energies. + Dictionary of reference and predicted relative energies. Returns ------- - dict[str, float | None] - Mean absolute errors by model. + dict[str, float] + Dictionary of predicted relative energy errors for all models. """ - ref_vals = isomer_relative_energies["ref"] - mae_by_model: dict[str, float | None] = {} - for model in MODELS: - preds = isomer_relative_energies[model] + results: dict[str, float | None] = {} + for model_name in MODELS: + preds = isomer_relative_energies.get(model_name, []) pairs = [ (ref, pred) - for ref, pred in zip(ref_vals, preds, strict=True) + for ref, pred in zip(isomer_relative_energies["ref"], preds, strict=True) if pred is not None ] if not pairs: - mae_by_model[model] = None + results[model_name] = None continue - ref, pred = zip(*pairs, strict=True) - mae_by_model[model] = mae(list(ref), list(pred)) - return mae_by_model + ref_vals, pred_vals = zip(*pairs, strict=True) + results[model_name] = mae(list(ref_vals), list(pred_vals)) + return results @pytest.fixture @@ -164,13 +194,13 @@ def isomer_complex_outputs( thresholds=DEFAULT_THRESHOLDS, weights=DEFAULT_WEIGHTS, ) -def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]: +def metrics(isomer_complex_errors: dict[str, float | None]) -> dict[str, dict]: """ Collect metrics for lanthanide isomer complexes. Parameters ---------- - isomer_complex_outputs + isomer_complex_errors Mean absolute errors for all models. Returns @@ -178,7 +208,7 @@ def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]: dict[str, dict] Metrics keyed by name for all models. """ - return {"MAE": isomer_complex_outputs} + return {"MAE": isomer_complex_errors} def test_isomer_complexes(metrics: dict[str, dict]) -> None: diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py index 35c925825..b77619ade 100644 --- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py +++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py @@ -9,7 +9,10 @@ from ml_peg.app.base_app import BaseApp from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models +MODELS = get_model_names(current_models) BENCHMARK_NAME = "Lanthanide Isomer Complexes" DOCS_URL = ( "https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html" @@ -34,25 +37,20 @@ def register_callbacks(self) -> None: column_to_plot={"MAE": scatter}, ) - struct_root = DATA_PATH / "structures" - if struct_root.exists(): - structs = [] - for system_dir in sorted(struct_root.glob("*")): - if not system_dir.is_dir(): - continue - for struct_file in sorted(system_dir.glob("*.xyz")): - structs.append( - f"assets/lanthanides/isomer_complexes/structures/" - f"{system_dir.name}/{struct_file.name}" - ) - - if structs: - struct_from_scatter( - scatter_id=f"{BENCHMARK_NAME}-figure", - struct_id=f"{BENCHMARK_NAME}-struct-placeholder", - structs=structs, - mode="struct", - ) + # Use first model's structures for visualization + if MODELS: + structs_dir = DATA_PATH / MODELS[0] + structs = [ + f"assets/lanthanides/isomer_complexes/{MODELS[0]}/{struct_file.stem}.xyz" + for struct_file in sorted(structs_dir.glob("*.xyz")) + ] + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) def get_app() -> IsomerComplexesApp: From b9ed690924d0c1048e9e053f7372452ddff373eb Mon Sep 17 00:00:00 2001 From: Joseph Hart <92541539+joehart2001@users.noreply.github.com> Date: Mon, 2 Feb 2026 19:20:12 +0000 Subject: [PATCH 11/14] Update ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py Co-authored-by: Elliott Kasoar <45317199+ElliottKasoar@users.noreply.github.com> --- .../lanthanides/isomer_complexes/calc_isomer_complexes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index 1658ce1d8..5a9863e6e 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any +from ase import units from ase.io import read, write import pytest from tqdm import tqdm @@ -17,7 +18,7 @@ MODELS = load_models(current_models) OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs" -KCAL_PER_EV = 23.060547 +KCAL_PER_EV = units.mol / units.kcal # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) From 5deffbcd3cafa80106da5a5abac4ce58e0b096dc Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 3 Feb 2026 15:07:49 +0000 Subject: [PATCH 12/14] Set integer charges --- .../lanthanides/isomer_complexes/calc_isomer_complexes.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index 5a9863e6e..a99e93a93 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -59,12 +59,8 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]: continue charge_path = iso_dir / ".CHRG" uhf_path = iso_dir / ".UHF" - charge = ( - float(charge_path.read_text().strip()) if charge_path.exists() else 0.0 - ) - multiplicity = ( - int(float(uhf_path.read_text().strip())) if uhf_path.exists() else 1 - ) + charge = int(charge_path.read_text().strip()) if charge_path.exists() else 0 + multiplicity = int(uhf_path.read_text().strip()) if uhf_path.exists() else 1 entries.append( { "system": system_dir.name, From 3d1186b839f0b425ffa219e4efbe8393d883d6fa Mon Sep 17 00:00:00 2001 From: Joseph Hart <92541539+joehart2001@users.noreply.github.com> Date: Tue, 3 Feb 2026 16:44:17 +0000 Subject: [PATCH 13/14] Update reference data with two extra lanthanides --- .../lanthanides/isomer_complexes/analyse_isomer_complexes.py | 2 ++ .../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index 73742bdd0..3c93b02a4 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -28,9 +28,11 @@ R2SCAN_REF: dict[str, dict[str, float]] = { "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, + "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23}, "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20}, "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00}, "La_f1a50d": {"iso1": 0.00, "iso2": 3.11}, + "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52}, "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74}, "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61}, } diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index a99e93a93..32e2a810b 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -25,9 +25,11 @@ R2SCAN_REF: dict[str, dict[str, float]] = { "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, + "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23}, "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20}, "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00}, "La_f1a50d": {"iso1": 0.00, "iso2": 3.11}, + "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52}, "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74}, "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61}, } From dff39d3b6ab289180b7e791df969e18d30a548da Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Wed, 4 Feb 2026 14:20:39 +0000 Subject: [PATCH 14/14] add new ref values from orca files --- .../analyse_isomer_complexes.py | 18 +++++++++--------- .../isomer_complexes/calc_isomer_complexes.py | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py index 3c93b02a4..72294fa22 100644 --- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py +++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py @@ -26,15 +26,15 @@ # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) # These are relative energies (relative to lowest energy isomer for each system) R2SCAN_REF: dict[str, dict[str, float]] = { - "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, - "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, - "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23}, - "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20}, - "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00}, - "La_f1a50d": {"iso1": 0.00, "iso2": 3.11}, - "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52}, - "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74}, - "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61}, + "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52}, + "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67}, + "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17}, + "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74}, + "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11}, + "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08}, + "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82}, + "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0}, + "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23}, } diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py index 32e2a810b..691350752 100644 --- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py +++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py @@ -23,15 +23,15 @@ # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only) R2SCAN_REF: dict[str, dict[str, float]] = { - "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08}, - "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17}, - "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23}, - "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20}, - "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00}, - "La_f1a50d": {"iso1": 0.00, "iso2": 3.11}, - "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52}, - "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74}, - "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61}, + "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52}, + "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67}, + "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17}, + "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74}, + "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11}, + "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08}, + "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82}, + "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0}, + "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23}, }