From f460a6e9c5e8d862affdb458ce3c51d89d9da069 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:06:45 +0000
Subject: [PATCH 01/14] add lanthanide benchmark
---
docs/source/user_guide/benchmarks/index.rst | 1 +
.../user_guide/benchmarks/lanthanides.rst | 48 ++++
.../analyse_isomer_complexes.py | 247 ++++++++++++++++++
.../lanthanides/isomer_complexes/metrics.yml | 7 +
.../isomer_complexes/app_isomer_complexes.py | 98 +++++++
ml_peg/app/lanthanides/lanthanides.yml | 2 +
.../isomer_complexes/calc_isomer_complexes.py | 43 +++
7 files changed, 446 insertions(+)
create mode 100644 docs/source/user_guide/benchmarks/lanthanides.rst
create mode 100644 ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
create mode 100644 ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
create mode 100644 ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
create mode 100644 ml_peg/app/lanthanides/lanthanides.yml
create mode 100644 ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst
index 9f339f4d2..33e0d7ba0 100644
--- a/docs/source/user_guide/benchmarks/index.rst
+++ b/docs/source/user_guide/benchmarks/index.rst
@@ -12,3 +12,4 @@ Benchmarks
molecular_crystal
molecular
bulk_crystal
+ lanthanides
diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst
new file mode 100644
index 000000000..7c9317266
--- /dev/null
+++ b/docs/source/user_guide/benchmarks/lanthanides.rst
@@ -0,0 +1,48 @@
+===========
+Lanthanides
+===========
+
+Isomer complexes
+================
+
+Summary
+-------
+
+Performance in predicting relative isomer energies for lanthanide complexes
+compared to r2SCAN-3c DFT reference data.
+
+
+Metrics
+-------
+
+1. Relative isomer energy MAE
+
+Accuracy of relative isomer energy predictions.
+
+For each complex, the relative isomer energies are computed with respect to the
+lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c
+relative energies reported in the reference dataset. The r2SCAN-3c geometries are
+used, with wB97X-V/def2-mTZVPP single-point calculations reported for validation
+in the source study.
+
+
+Computational cost
+------------------
+
+Low: tests are likely to take less than a minute to run on CPU once model outputs
+are available.
+
+
+Data availability
+-----------------
+
+Input structures:
+
+* T. Rose, M. Bursch, J.-M. Mewes, and S. Grimme, Fast and Robust Modeling of
+ Lanthanide and Actinide Complexes, Biomolecules, and Molecular Crystals with
+ the Extended GFN-FF Model, Inorganic Chemistry 63 (2024) 19364-19374.
+
+Reference data:
+
+* Relative isomer energies from r2SCAN-3c (see Supporting Information of the
+ above reference).
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
new file mode 100644
index 000000000..50f93e01d
--- /dev/null
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -0,0 +1,247 @@
+"""Analyse lanthanide isomer complex benchmark."""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+import shutil
+
+from dash import dash_table
+import pandas as pd
+import plotly.graph_objects as go
+import pytest
+
+from ml_peg.analysis.utils.utils import calc_table_scores, load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+
+CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
+STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
+
+CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"
+STRUCT_OUT_PATH = OUT_PATH / "structures"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+ METRICS_CONFIG_PATH
+)
+
+# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
+R2SCAN_REF: dict[str, dict[str, float]] = {
+ "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
+ "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+ "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
+ "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
+ "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+ "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
+ "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+}
+
+
+def _resolve_csv_path() -> Path | None:
+ env_path = os.environ.get(CSV_ENV_VAR)
+ if env_path:
+ return Path(env_path).expanduser()
+ csv_path = CALC_PATH / "isomer_energies.csv"
+ return csv_path if csv_path.exists() else None
+
+
+def _build_reference_df() -> pd.DataFrame:
+ records = []
+ for system, iso_map in R2SCAN_REF.items():
+ for iso, ref in iso_map.items():
+ records.append({"system": system, "isomer": iso, "ref": ref})
+ return pd.DataFrame.from_records(records)
+
+
+def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]:
+ struct_map: dict[tuple, str] = {}
+ for _, row in reference_df.iterrows():
+ system = row["system"]
+ iso = row["isomer"]
+ src = struct_root / system / iso / "orca.xyz"
+ if not src.exists():
+ continue
+ dest_dir = STRUCT_OUT_PATH / system
+ dest_dir.mkdir(parents=True, exist_ok=True)
+ dest = dest_dir / f"{iso}.xyz"
+ shutil.copyfile(src, dest)
+ struct_map[(system, iso)] = (
+ f"assets/lanthanides/isomer_complexes/structures/{system}/{iso}.xyz"
+ )
+ return struct_map
+
+
+def _build_table(
+ mae_by_model: dict[str, float | None],
+ model_order: list[str],
+) -> None:
+ metrics_data = []
+ for model in model_order:
+ metrics_data.append({"MLIP": model, "MAE": mae_by_model.get(model), "id": model})
+
+ metrics_data = calc_table_scores(
+ metrics_data,
+ thresholds=DEFAULT_THRESHOLDS,
+ weights=DEFAULT_WEIGHTS,
+ )
+
+ metrics_columns = (
+ {"name": "MLIP", "id": "MLIP"},
+ {"name": "MAE", "id": "MAE"},
+ {"name": "Score", "id": "Score"},
+ )
+
+ summary_tooltips = {
+ "MLIP": "Model identifier, hover for configuration details.",
+ "Score": "Weighted score across metrics, Higher is better (normalised 0 to 1).",
+ }
+ tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips
+
+ model_configs = {model: {} for model in model_order}
+ model_levels = {model: None for model in model_order}
+ metric_levels = {
+ metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory")
+ for metric_name in DEFAULT_THRESHOLDS
+ }
+
+ model_name_map = {model: model for model in model_order}
+
+ table = dash_table.DataTable(
+ metrics_data,
+ list(metrics_columns),
+ id="metrics",
+ tooltip_header=tooltip_header,
+ )
+
+ OUT_PATH.mkdir(parents=True, exist_ok=True)
+ with open(OUT_PATH / "isomer_complexes_metrics_table.json", "w") as fp:
+ json.dump(
+ {
+ "data": table.data,
+ "columns": table.columns,
+ "tooltip_header": tooltip_header,
+ "thresholds": DEFAULT_THRESHOLDS,
+ "weights": DEFAULT_WEIGHTS,
+ "model_levels_of_theory": model_levels,
+ "metric_levels_of_theory": metric_levels,
+ "model_configs": model_configs,
+ "model_name_map": model_name_map,
+ },
+ fp,
+ )
+
+
+@pytest.fixture
+def isomer_complex_outputs() -> dict[str, float | None]:
+ """
+ Build outputs for lanthanide isomer complexes benchmark.
+
+ Returns
+ -------
+ dict[str, float | None]
+ Mean absolute errors by model.
+ """
+ csv_path = _resolve_csv_path()
+ if csv_path is None:
+ pytest.skip(
+ "No lanthanide isomer CSV found. "
+ "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs."
+ )
+
+ df = pd.read_csv(csv_path)
+ if df.empty:
+ pytest.skip("Lanthanide isomer CSV is empty.")
+
+ reference_df = _build_reference_df()
+ df = df.merge(reference_df, on=["system", "isomer"], how="inner")
+ if df.empty:
+ pytest.skip("No overlap between CSV entries and r2SCAN-3c reference data.")
+
+ struct_map: dict[tuple, str] = {}
+ struct_root_env = os.environ.get(STRUCT_ENV_VAR)
+ if struct_root_env:
+ struct_root = Path(struct_root_env).expanduser()
+ if struct_root.exists():
+ struct_map = _copy_structures(struct_root, reference_df)
+
+ models = sorted(df["model"].unique().tolist())
+
+ mae_by_model: dict[str, float | None] = {}
+ fig = go.Figure()
+
+ for model in models:
+ sub = df[df["model"] == model]
+ if sub.empty:
+ mae_by_model[model] = None
+ continue
+
+ mae_by_model[model] = mae(
+ sub["ref"].tolist(),
+ sub["rel_energy_kcal"].tolist(),
+ )
+
+ customdata = []
+ for _, row in sub.iterrows():
+ struct_path = struct_map.get((row["system"], row["isomer"]), "")
+ customdata.append([struct_path, row["system"], row["isomer"]])
+
+ fig.add_trace(
+ go.Scatter(
+ x=sub["ref"],
+ y=sub["rel_energy_kcal"],
+ mode="markers",
+ name=model,
+ customdata=customdata,
+ hovertemplate=(
+ "%{customdata[1]} %{customdata[2]}
"
+ "r2SCAN-3c: %{x:.2f} kcal/mol
"
+ "Model: %{y:.2f} kcal/mol"
+ ""
+ ),
+ )
+ )
+
+ min_val = min(df["ref"].min(), df["rel_energy_kcal"].min())
+ max_val = max(df["ref"].max(), df["rel_energy_kcal"].max())
+ pad = 0.5
+ min_val -= pad
+ max_val += pad
+
+ fig.add_trace(
+ go.Scatter(
+ x=[min_val, max_val],
+ y=[min_val, max_val],
+ mode="lines",
+ showlegend=False,
+ line={"color": "#7f7f7f", "dash": "dash"},
+ hoverinfo="skip",
+ )
+ )
+
+ fig.update_layout(
+ title="Lanthanide isomer relative energies",
+ xaxis_title="r2SCAN-3c Delta E (kcal/mol)",
+ yaxis_title="Model Delta E (kcal/mol)",
+ plot_bgcolor="#ffffff",
+ )
+
+ OUT_PATH.mkdir(parents=True, exist_ok=True)
+ fig.write_json(OUT_PATH / "figure_isomer_complexes.json")
+ _build_table(mae_by_model, models)
+
+ return mae_by_model
+
+
+def test_isomer_complexes(isomer_complex_outputs: dict[str, float | None]) -> None:
+ """
+ Run lanthanide isomer complexes benchmark analysis.
+
+ Parameters
+ ----------
+ isomer_complex_outputs
+ Mean absolute errors for all models.
+ """
+ return
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
new file mode 100644
index 000000000..043a99279
--- /dev/null
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
@@ -0,0 +1,7 @@
+metrics:
+ MAE:
+ good: 0.0
+ bad: 10.0
+ unit: kcal/mol
+ tooltip: Mean absolute error for relative isomer energies
+ level_of_theory: r2SCAN-3c
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
new file mode 100644
index 000000000..36182a4d0
--- /dev/null
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -0,0 +1,98 @@
+"""Run lanthanide isomer complex benchmark app."""
+
+from __future__ import annotations
+
+from dash import Dash, Input, Output, callback
+from dash.html import Div, Iframe
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.load import read_plot
+from ml_peg.app.utils.weas import generate_weas_html
+
+BENCHMARK_NAME = "Lanthanide Isomer Complexes"
+DOCS_URL = (
+ "https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html"
+ "#isomer-complexes"
+)
+DATA_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"
+
+
+class IsomerComplexesApp(BaseApp):
+ """Lanthanide isomer complex benchmark app layout and callbacks."""
+
+ def register_callbacks(self) -> None:
+ """Register callbacks to app."""
+ scatter = read_plot(
+ DATA_PATH / "figure_isomer_complexes.json",
+ id=f"{BENCHMARK_NAME}-figure",
+ )
+
+ plot_from_table_column(
+ table_id=self.table_id,
+ plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+ column_to_plot={"MAE": scatter},
+ )
+
+ @callback(
+ Output(f"{BENCHMARK_NAME}-struct-placeholder", "children"),
+ Input(f"{BENCHMARK_NAME}-figure", "clickData"),
+ )
+ def show_structure(click_data) -> Div:
+ if not click_data:
+ return Div("Click on a model point to view the structure.")
+
+ point = click_data.get("points", [{}])[0]
+ custom = point.get("customdata") or []
+ if not custom or not custom[0]:
+ return Div("No structure available for this point.")
+
+ struct_path = custom[0]
+ return Div(
+ Iframe(
+ srcDoc=generate_weas_html(struct_path, "struct", 0),
+ style={
+ "height": "550px",
+ "width": "100%",
+ "border": "1px solid #ddd",
+ "borderRadius": "5px",
+ },
+ )
+ )
+
+
+def get_app() -> IsomerComplexesApp:
+ """
+ Get lanthanide isomer complex benchmark app layout and callback registration.
+
+ Returns
+ -------
+ IsomerComplexesApp
+ Benchmark layout and callback registration.
+ """
+ return IsomerComplexesApp(
+ name=BENCHMARK_NAME,
+ description=(
+ "Relative energies of lanthanide isomer complexes compared to r2SCAN-3c."
+ ),
+ docs_url=DOCS_URL,
+ table_path=DATA_PATH / "isomer_complexes_metrics_table.json",
+ extra_components=[
+ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+ Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+ ],
+ )
+
+
+if __name__ == "__main__":
+ # Create Dash app
+ full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+
+ # Construct layout and register callbacks
+ app_instance = get_app()
+ full_app.layout = app_instance.layout
+ app_instance.register_callbacks()
+
+ # Run app
+ full_app.run(port=8061, debug=True)
diff --git a/ml_peg/app/lanthanides/lanthanides.yml b/ml_peg/app/lanthanides/lanthanides.yml
new file mode 100644
index 000000000..1ce5a83a7
--- /dev/null
+++ b/ml_peg/app/lanthanides/lanthanides.yml
@@ -0,0 +1,2 @@
+title: Lanthanides
+description: Relative energies for lanthanide isomer complexes
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
new file mode 100644
index 000000000..3bc8af183
--- /dev/null
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -0,0 +1,43 @@
+"""Stage lanthanide isomer complex energies for analysis."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import shutil
+
+import pytest
+
+from ml_peg.calcs import CALCS_ROOT
+
+OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
+CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
+
+
+def _resolve_source_csv() -> Path | None:
+ env_path = os.environ.get(CSV_ENV_VAR)
+ if env_path:
+ return Path(env_path).expanduser()
+ default_path = OUT_PATH / "isomer_energies.csv"
+ if default_path.exists():
+ return default_path
+ return None
+
+
+def test_stage_isomer_complexes_csv() -> None:
+ """
+ Stage the precomputed isomer energies CSV for analysis.
+
+ Set `ML_PEG_LANTHANIDE_CSV` to point to the source CSV.
+ """
+ source_csv = _resolve_source_csv()
+ if source_csv is None or not source_csv.exists():
+ pytest.skip(
+ "No lanthanide isomer CSV found. "
+ "Set ML_PEG_LANTHANIDE_CSV to the isomer_energies.csv path."
+ )
+
+ OUT_PATH.mkdir(parents=True, exist_ok=True)
+ dest_csv = OUT_PATH / "isomer_energies.csv"
+ if source_csv.resolve() != dest_csv.resolve():
+ shutil.copyfile(source_csv, dest_csv)
From 33e18c767fd0fee97a2f05833215faf7de6d1ccf Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:16:30 +0000
Subject: [PATCH 02/14] Update lanthanides.rst
---
docs/source/user_guide/benchmarks/lanthanides.rst | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst
index 7c9317266..e3c5f751f 100644
--- a/docs/source/user_guide/benchmarks/lanthanides.rst
+++ b/docs/source/user_guide/benchmarks/lanthanides.rst
@@ -21,16 +21,13 @@ Accuracy of relative isomer energy predictions.
For each complex, the relative isomer energies are computed with respect to the
lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c
-relative energies reported in the reference dataset. The r2SCAN-3c geometries are
-used, with wB97X-V/def2-mTZVPP single-point calculations reported for validation
-in the source study.
+relative energies reported in the reference dataset.
Computational cost
------------------
-Low: tests are likely to take less than a minute to run on CPU once model outputs
-are available.
+Low: tests are likely to take less than a minute to run on CPU.
Data availability
From be14311a6a5974c07501e3aaa8243f528b9e2c59 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:44:13 +0000
Subject: [PATCH 03/14] fix linter
---
.../analyse_isomer_complexes.py | 47 ++++++++++++++++++-
.../isomer_complexes/app_isomer_complexes.py | 13 +++++
.../isomer_complexes/calc_isomer_complexes.py | 8 ++++
3 files changed, 66 insertions(+), 2 deletions(-)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 50f93e01d..bf0ffe120 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -41,6 +41,14 @@
def _resolve_csv_path() -> Path | None:
+ """
+ Resolve the source CSV path for isomer energies.
+
+ Returns
+ -------
+ Path | None
+ CSV path if found, otherwise ``None``.
+ """
env_path = os.environ.get(CSV_ENV_VAR)
if env_path:
return Path(env_path).expanduser()
@@ -49,6 +57,14 @@ def _resolve_csv_path() -> Path | None:
def _build_reference_df() -> pd.DataFrame:
+ """
+ Build a reference dataframe from the r2SCAN-3c table.
+
+ Returns
+ -------
+ pandas.DataFrame
+ Dataframe with columns: system, isomer, ref.
+ """
records = []
for system, iso_map in R2SCAN_REF.items():
for iso, ref in iso_map.items():
@@ -57,6 +73,21 @@ def _build_reference_df() -> pd.DataFrame:
def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]:
+ """
+ Copy reference structures into the app assets directory.
+
+ Parameters
+ ----------
+ struct_root
+ Root directory containing isomer structures.
+ reference_df
+ Dataframe of systems/isomers to copy.
+
+ Returns
+ -------
+ dict[tuple, str]
+ Mapping of (system, isomer) to asset path.
+ """
struct_map: dict[tuple, str] = {}
for _, row in reference_df.iterrows():
system = row["system"]
@@ -78,9 +109,21 @@ def _build_table(
mae_by_model: dict[str, float | None],
model_order: list[str],
) -> None:
+ """
+ Build the metrics table JSON for the app.
+
+ Parameters
+ ----------
+ mae_by_model
+ MAE values keyed by model name.
+ model_order
+ Ordered list of model names to include.
+ """
metrics_data = []
for model in model_order:
- metrics_data.append({"MLIP": model, "MAE": mae_by_model.get(model), "id": model})
+ metrics_data.append(
+ {"MLIP": model, "MAE": mae_by_model.get(model), "id": model}
+ )
metrics_data = calc_table_scores(
metrics_data,
@@ -101,7 +144,7 @@ def _build_table(
tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips
model_configs = {model: {} for model in model_order}
- model_levels = {model: None for model in model_order}
+ model_levels = dict.fromkeys(model_order)
metric_levels = {
metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory")
for metric_name in DEFAULT_THRESHOLDS
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
index 36182a4d0..eb5bcaea3 100644
--- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -40,6 +40,19 @@ def register_callbacks(self) -> None:
Input(f"{BENCHMARK_NAME}-figure", "clickData"),
)
def show_structure(click_data) -> Div:
+ """
+ Render a structure viewer for the clicked point.
+
+ Parameters
+ ----------
+ click_data
+ Plotly click payload from the parity scatter.
+
+ Returns
+ -------
+ Div
+ Viewer iframe or placeholder message.
+ """
if not click_data:
return Div("Click on a model point to view the structure.")
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 3bc8af183..4f917f51e 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -15,6 +15,14 @@
def _resolve_source_csv() -> Path | None:
+ """
+ Resolve the source CSV path to stage for analysis.
+
+ Returns
+ -------
+ Path | None
+ CSV path if found, otherwise ``None``.
+ """
env_path = os.environ.get(CSV_ENV_VAR)
if env_path:
return Path(env_path).expanduser()
From b98082a9889db50627ea739d272fc3cf90dc6796 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 15:52:22 +0000
Subject: [PATCH 04/14] fix the calculator lanthanide
---
.../analyse_isomer_complexes.py | 30 ++--
.../isomer_complexes/calc_isomer_complexes.py | 163 +++++++++++++++---
2 files changed, 162 insertions(+), 31 deletions(-)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index bf0ffe120..d9fcc4d44 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -40,20 +40,31 @@
}
-def _resolve_csv_path() -> Path | None:
+def _load_isomer_dataframe() -> pd.DataFrame | None:
"""
- Resolve the source CSV path for isomer energies.
+ Load isomer energies from a CSV file or per-model outputs.
Returns
-------
- Path | None
- CSV path if found, otherwise ``None``.
+ pandas.DataFrame | None
+ Loaded dataframe, or ``None`` if no data are found.
"""
env_path = os.environ.get(CSV_ENV_VAR)
if env_path:
- return Path(env_path).expanduser()
- csv_path = CALC_PATH / "isomer_energies.csv"
- return csv_path if csv_path.exists() else None
+ path = Path(env_path).expanduser()
+ if path.exists():
+ return pd.read_csv(path)
+
+ combined_path = CALC_PATH / "isomer_energies.csv"
+ if combined_path.exists():
+ return pd.read_csv(combined_path)
+
+ csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv"))
+ if not csv_paths:
+ return None
+
+ frames = [pd.read_csv(path) for path in csv_paths]
+ return pd.concat(frames, ignore_index=True) if frames else None
def _build_reference_df() -> pd.DataFrame:
@@ -187,14 +198,13 @@ def isomer_complex_outputs() -> dict[str, float | None]:
dict[str, float | None]
Mean absolute errors by model.
"""
- csv_path = _resolve_csv_path()
- if csv_path is None:
+ df = _load_isomer_dataframe()
+ if df is None:
pytest.skip(
"No lanthanide isomer CSV found. "
"Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs."
)
- df = pd.read_csv(csv_path)
if df.empty:
pytest.skip("Lanthanide isomer CSV is empty.")
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 4f917f51e..b3413b612 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -1,51 +1,172 @@
-"""Stage lanthanide isomer complex energies for analysis."""
+"""Run lanthanide isomer complex energy calculations."""
from __future__ import annotations
import os
from pathlib import Path
-import shutil
+from typing import Any
+from ase.io import read
import pytest
from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
-CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
+STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
+KCAL_PER_EV = 23.060547
-def _resolve_source_csv() -> Path | None:
+def _resolve_structure_root() -> Path | None:
"""
- Resolve the source CSV path to stage for analysis.
+ Resolve the root directory containing isomer structures.
Returns
-------
Path | None
- CSV path if found, otherwise ``None``.
+ Structure root path if found, otherwise ``None``.
"""
- env_path = os.environ.get(CSV_ENV_VAR)
+ env_path = os.environ.get(STRUCT_ENV_VAR)
if env_path:
return Path(env_path).expanduser()
- default_path = OUT_PATH / "isomer_energies.csv"
- if default_path.exists():
- return default_path
return None
-def test_stage_isomer_complexes_csv() -> None:
+def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
"""
- Stage the precomputed isomer energies CSV for analysis.
+ Load isomer entries from the structure root.
+
+ Parameters
+ ----------
+ struct_root
+ Root directory containing system/iso*/orca.xyz and optional .CHRG/.UHF.
- Set `ML_PEG_LANTHANIDE_CSV` to point to the source CSV.
+ Returns
+ -------
+ list[dict[str, Any]]
+ Entry dictionaries with system, isomer, xyz path, charge, multiplicity.
"""
- source_csv = _resolve_source_csv()
- if source_csv is None or not source_csv.exists():
+ entries: list[dict[str, Any]] = []
+ for system_dir in sorted(struct_root.glob("*")):
+ if not system_dir.is_dir():
+ continue
+ for iso_dir in sorted(system_dir.glob("iso*")):
+ xyz_path = iso_dir / "orca.xyz"
+ if not xyz_path.exists():
+ continue
+ charge_path = iso_dir / ".CHRG"
+ uhf_path = iso_dir / ".UHF"
+ charge = (
+ float(charge_path.read_text().strip()) if charge_path.exists() else 0.0
+ )
+ multiplicity = (
+ int(float(uhf_path.read_text().strip())) if uhf_path.exists() else 1
+ )
+ entries.append(
+ {
+ "system": system_dir.name,
+ "isomer": iso_dir.name,
+ "xyz": xyz_path,
+ "charge": charge,
+ "multiplicity": multiplicity,
+ }
+ )
+ return entries
+
+
+def _write_model_csv(
+ model_name: str, rows: list[dict[str, Any]], out_dir: Path
+) -> None:
+ """
+ Write a per-model CSV of isomer energies.
+
+ Parameters
+ ----------
+ model_name
+ Model identifier.
+ rows
+ Rows containing per-isomer energies and metadata.
+ out_dir
+ Output directory for the CSV file.
+ """
+ import csv
+
+ out_dir.mkdir(parents=True, exist_ok=True)
+ csv_path = out_dir / "isomer_energies.csv"
+ fieldnames = [
+ "model",
+ "system",
+ "isomer",
+ "energy_ev",
+ "energy_kcal",
+ "rel_energy_kcal",
+ "charge",
+ "multiplicity",
+ ]
+ with csv_path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
+ writer.writeheader()
+ for row in rows:
+ writer.writerow({k: row.get(k, "") for k in fieldnames})
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
+ """
+ Run single-point energy calculations for lanthanide isomer complexes.
+
+ Parameters
+ ----------
+ mlip
+ Model name and MLIP calculator wrapper.
+ """
+ struct_root = _resolve_structure_root()
+ if struct_root is None or not struct_root.exists():
pytest.skip(
- "No lanthanide isomer CSV found. "
- "Set ML_PEG_LANTHANIDE_CSV to the isomer_energies.csv path."
+ "No lanthanide structure root found. "
+ "Set ML_PEG_LANTHANIDE_STRUCTURES to the isomer_structures path."
+ )
+
+ entries = _load_isomer_entries(struct_root)
+ if not entries:
+ pytest.skip(f"No isomer structures found under {struct_root}.")
+
+ model_name, model = mlip
+ calc = model.get_calculator()
+
+ results: list[dict[str, Any]] = []
+ for entry in entries:
+ atoms = read(entry["xyz"])
+ atoms.info["charge"] = entry["charge"]
+ atoms.info["spin_multiplicity"] = entry["multiplicity"]
+ atoms.info["spin"] = (entry["multiplicity"] - 1) / 2
+ atoms.calc = calc
+ energy_ev = float(atoms.get_potential_energy())
+ energy_kcal = energy_ev * KCAL_PER_EV
+ results.append(
+ {
+ "model": model_name,
+ "system": entry["system"],
+ "isomer": entry["isomer"],
+ "energy_ev": energy_ev,
+ "energy_kcal": energy_kcal,
+ "charge": entry["charge"],
+ "multiplicity": entry["multiplicity"],
+ }
)
- OUT_PATH.mkdir(parents=True, exist_ok=True)
- dest_csv = OUT_PATH / "isomer_energies.csv"
- if source_csv.resolve() != dest_csv.resolve():
- shutil.copyfile(source_csv, dest_csv)
+ results.sort(key=lambda row: (row["model"], row["system"], row["isomer"]))
+ grouped: dict[tuple[str, str], list[dict[str, Any]]] = {}
+ for row in results:
+ key = (row["model"], row["system"])
+ grouped.setdefault(key, []).append(row)
+
+ for rows in grouped.values():
+ min_energy = min(row["energy_kcal"] for row in rows)
+ for row in rows:
+ row["rel_energy_kcal"] = row["energy_kcal"] - min_energy
+
+ _write_model_csv(model_name, results, OUT_PATH / model_name)
From 8e656ee87676c5955f6bb88a1ec54290d9985974 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:00:12 +0000
Subject: [PATCH 05/14] use better decorators
---
.../analyse_isomer_complexes.py | 300 ++++++------------
.../isomer_complexes/app_isomer_complexes.py | 62 ++--
2 files changed, 117 insertions(+), 245 deletions(-)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index d9fcc4d44..693feaff4 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -2,26 +2,21 @@
from __future__ import annotations
-import json
-import os
from pathlib import Path
-import shutil
-from dash import dash_table
import pandas as pd
-import plotly.graph_objects as go
import pytest
-from ml_peg.analysis.utils.utils import calc_table_scores, load_metrics_config, mae
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import load_metrics_config, mae
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
-CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
-STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
-
+MODELS = get_model_names(current_models)
CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"
-STRUCT_OUT_PATH = OUT_PATH / "structures"
METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
@@ -40,31 +35,22 @@
}
-def _load_isomer_dataframe() -> pd.DataFrame | None:
+def _load_isomer_dataframe() -> pd.DataFrame:
"""
- Load isomer energies from a CSV file or per-model outputs.
+ Load isomer energies from per-model outputs.
Returns
-------
- pandas.DataFrame | None
- Loaded dataframe, or ``None`` if no data are found.
+ pandas.DataFrame
+ Loaded dataframe, or an empty dataframe if no data are found.
"""
- env_path = os.environ.get(CSV_ENV_VAR)
- if env_path:
- path = Path(env_path).expanduser()
- if path.exists():
- return pd.read_csv(path)
-
combined_path = CALC_PATH / "isomer_energies.csv"
if combined_path.exists():
return pd.read_csv(combined_path)
csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv"))
- if not csv_paths:
- return None
-
frames = [pd.read_csv(path) for path in csv_paths]
- return pd.concat(frames, ignore_index=True) if frames else None
+ return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
def _build_reference_df() -> pd.DataFrame:
@@ -83,218 +69,124 @@ def _build_reference_df() -> pd.DataFrame:
return pd.DataFrame.from_records(records)
-def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]:
- """
- Copy reference structures into the app assets directory.
+REFERENCE_DF = _build_reference_df().sort_values(["system", "isomer"]).reset_index(
+ drop=True
+)
+REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]])
+REFERENCE_HOVERDATA = {
+ "System": REFERENCE_DF["system"].tolist(),
+ "Isomer": REFERENCE_DF["isomer"].tolist(),
+}
- Parameters
- ----------
- struct_root
- Root directory containing isomer structures.
- reference_df
- Dataframe of systems/isomers to copy.
+
+@pytest.fixture
+@plot_parity(
+ filename=OUT_PATH / "figure_isomer_complexes.json",
+ title="Lanthanide isomer relative energies",
+ x_label="Model Delta E (kcal/mol)",
+ y_label="r2SCAN-3c Delta E (kcal/mol)",
+ hoverdata=REFERENCE_HOVERDATA,
+)
+def isomer_relative_energies() -> dict[str, list]:
+ """
+ Build parity data for lanthanide isomer complexes benchmark.
Returns
-------
- dict[tuple, str]
- Mapping of (system, isomer) to asset path.
+ dict[str, list]
+ Reference and per-model relative energies.
"""
- struct_map: dict[tuple, str] = {}
- for _, row in reference_df.iterrows():
- system = row["system"]
- iso = row["isomer"]
- src = struct_root / system / iso / "orca.xyz"
- if not src.exists():
- continue
- dest_dir = STRUCT_OUT_PATH / system
- dest_dir.mkdir(parents=True, exist_ok=True)
- dest = dest_dir / f"{iso}.xyz"
- shutil.copyfile(src, dest)
- struct_map[(system, iso)] = (
- f"assets/lanthanides/isomer_complexes/structures/{system}/{iso}.xyz"
+ df = _load_isomer_dataframe()
+ df = df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") if not df.empty else df
+
+ prediction_table = pd.DataFrame(index=REFERENCE_INDEX)
+ if not df.empty:
+ prediction_table = (
+ df.pivot_table(
+ index=["system", "isomer"],
+ columns="model",
+ values="rel_energy_kcal",
+ aggfunc="first",
+ )
+ .reindex(REFERENCE_INDEX)
)
- return struct_map
+ results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()}
+ for model in MODELS:
+ if model in prediction_table.columns:
+ series = prediction_table[model]
+ results[model] = series.where(series.notna(), None).tolist()
+ else:
+ results[model] = [None] * len(results["ref"])
-def _build_table(
- mae_by_model: dict[str, float | None],
- model_order: list[str],
-) -> None:
- """
- Build the metrics table JSON for the app.
-
- Parameters
- ----------
- mae_by_model
- MAE values keyed by model name.
- model_order
- Ordered list of model names to include.
- """
- metrics_data = []
- for model in model_order:
- metrics_data.append(
- {"MLIP": model, "MAE": mae_by_model.get(model), "id": model}
- )
-
- metrics_data = calc_table_scores(
- metrics_data,
- thresholds=DEFAULT_THRESHOLDS,
- weights=DEFAULT_WEIGHTS,
- )
-
- metrics_columns = (
- {"name": "MLIP", "id": "MLIP"},
- {"name": "MAE", "id": "MAE"},
- {"name": "Score", "id": "Score"},
- )
-
- summary_tooltips = {
- "MLIP": "Model identifier, hover for configuration details.",
- "Score": "Weighted score across metrics, Higher is better (normalised 0 to 1).",
- }
- tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips
-
- model_configs = {model: {} for model in model_order}
- model_levels = dict.fromkeys(model_order)
- metric_levels = {
- metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory")
- for metric_name in DEFAULT_THRESHOLDS
- }
-
- model_name_map = {model: model for model in model_order}
-
- table = dash_table.DataTable(
- metrics_data,
- list(metrics_columns),
- id="metrics",
- tooltip_header=tooltip_header,
- )
-
- OUT_PATH.mkdir(parents=True, exist_ok=True)
- with open(OUT_PATH / "isomer_complexes_metrics_table.json", "w") as fp:
- json.dump(
- {
- "data": table.data,
- "columns": table.columns,
- "tooltip_header": tooltip_header,
- "thresholds": DEFAULT_THRESHOLDS,
- "weights": DEFAULT_WEIGHTS,
- "model_levels_of_theory": model_levels,
- "metric_levels_of_theory": metric_levels,
- "model_configs": model_configs,
- "model_name_map": model_name_map,
- },
- fp,
- )
+ return results
@pytest.fixture
-def isomer_complex_outputs() -> dict[str, float | None]:
+def isomer_complex_outputs(
+ isomer_relative_energies: dict[str, list],
+) -> dict[str, float | None]:
"""
Build outputs for lanthanide isomer complexes benchmark.
+ Parameters
+ ----------
+ isomer_relative_energies
+ Reference and per-model relative energies.
+
Returns
-------
dict[str, float | None]
Mean absolute errors by model.
"""
- df = _load_isomer_dataframe()
- if df is None:
- pytest.skip(
- "No lanthanide isomer CSV found. "
- "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs."
- )
-
- if df.empty:
- pytest.skip("Lanthanide isomer CSV is empty.")
-
- reference_df = _build_reference_df()
- df = df.merge(reference_df, on=["system", "isomer"], how="inner")
- if df.empty:
- pytest.skip("No overlap between CSV entries and r2SCAN-3c reference data.")
-
- struct_map: dict[tuple, str] = {}
- struct_root_env = os.environ.get(STRUCT_ENV_VAR)
- if struct_root_env:
- struct_root = Path(struct_root_env).expanduser()
- if struct_root.exists():
- struct_map = _copy_structures(struct_root, reference_df)
-
- models = sorted(df["model"].unique().tolist())
-
+ ref_vals = isomer_relative_energies["ref"]
mae_by_model: dict[str, float | None] = {}
- fig = go.Figure()
-
- for model in models:
- sub = df[df["model"] == model]
- if sub.empty:
+ for model in MODELS:
+ preds = isomer_relative_energies[model]
+ pairs = [
+ (ref, pred)
+ for ref, pred in zip(ref_vals, preds, strict=True)
+ if pred is not None
+ ]
+ if not pairs:
mae_by_model[model] = None
continue
+ ref, pred = zip(*pairs, strict=True)
+ mae_by_model[model] = mae(list(ref), list(pred))
+ return mae_by_model
- mae_by_model[model] = mae(
- sub["ref"].tolist(),
- sub["rel_energy_kcal"].tolist(),
- )
-
- customdata = []
- for _, row in sub.iterrows():
- struct_path = struct_map.get((row["system"], row["isomer"]), "")
- customdata.append([struct_path, row["system"], row["isomer"]])
-
- fig.add_trace(
- go.Scatter(
- x=sub["ref"],
- y=sub["rel_energy_kcal"],
- mode="markers",
- name=model,
- customdata=customdata,
- hovertemplate=(
- "%{customdata[1]} %{customdata[2]}
"
- "r2SCAN-3c: %{x:.2f} kcal/mol
"
- "Model: %{y:.2f} kcal/mol"
- ""
- ),
- )
- )
-
- min_val = min(df["ref"].min(), df["rel_energy_kcal"].min())
- max_val = max(df["ref"].max(), df["rel_energy_kcal"].max())
- pad = 0.5
- min_val -= pad
- max_val += pad
-
- fig.add_trace(
- go.Scatter(
- x=[min_val, max_val],
- y=[min_val, max_val],
- mode="lines",
- showlegend=False,
- line={"color": "#7f7f7f", "dash": "dash"},
- hoverinfo="skip",
- )
- )
- fig.update_layout(
- title="Lanthanide isomer relative energies",
- xaxis_title="r2SCAN-3c Delta E (kcal/mol)",
- yaxis_title="Model Delta E (kcal/mol)",
- plot_bgcolor="#ffffff",
- )
+@pytest.fixture
+@build_table(
+ filename=OUT_PATH / "isomer_complexes_metrics_table.json",
+ metric_tooltips=DEFAULT_TOOLTIPS,
+ thresholds=DEFAULT_THRESHOLDS,
+ weights=DEFAULT_WEIGHTS,
+)
+def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]:
+ """
+ Collect metrics for lanthanide isomer complexes.
- OUT_PATH.mkdir(parents=True, exist_ok=True)
- fig.write_json(OUT_PATH / "figure_isomer_complexes.json")
- _build_table(mae_by_model, models)
+ Parameters
+ ----------
+ isomer_complex_outputs
+ Mean absolute errors for all models.
- return mae_by_model
+ Returns
+ -------
+ dict[str, dict]
+ Metrics keyed by name for all models.
+ """
+ return {"MAE": isomer_complex_outputs}
-def test_isomer_complexes(isomer_complex_outputs: dict[str, float | None]) -> None:
+def test_isomer_complexes(metrics: dict[str, dict]) -> None:
"""
Run lanthanide isomer complexes benchmark analysis.
Parameters
----------
- isomer_complex_outputs
- Mean absolute errors for all models.
+ metrics
+ All lanthanide isomer complex metrics.
"""
return
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
index eb5bcaea3..35c925825 100644
--- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -2,14 +2,13 @@
from __future__ import annotations
-from dash import Dash, Input, Output, callback
-from dash.html import Div, Iframe
+from dash import Dash
+from dash.html import Div
from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
-from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter
from ml_peg.app.utils.load import read_plot
-from ml_peg.app.utils.weas import generate_weas_html
BENCHMARK_NAME = "Lanthanide Isomer Complexes"
DOCS_URL = (
@@ -35,44 +34,25 @@ def register_callbacks(self) -> None:
column_to_plot={"MAE": scatter},
)
- @callback(
- Output(f"{BENCHMARK_NAME}-struct-placeholder", "children"),
- Input(f"{BENCHMARK_NAME}-figure", "clickData"),
- )
- def show_structure(click_data) -> Div:
- """
- Render a structure viewer for the clicked point.
-
- Parameters
- ----------
- click_data
- Plotly click payload from the parity scatter.
-
- Returns
- -------
- Div
- Viewer iframe or placeholder message.
- """
- if not click_data:
- return Div("Click on a model point to view the structure.")
-
- point = click_data.get("points", [{}])[0]
- custom = point.get("customdata") or []
- if not custom or not custom[0]:
- return Div("No structure available for this point.")
-
- struct_path = custom[0]
- return Div(
- Iframe(
- srcDoc=generate_weas_html(struct_path, "struct", 0),
- style={
- "height": "550px",
- "width": "100%",
- "border": "1px solid #ddd",
- "borderRadius": "5px",
- },
+ struct_root = DATA_PATH / "structures"
+ if struct_root.exists():
+ structs = []
+ for system_dir in sorted(struct_root.glob("*")):
+ if not system_dir.is_dir():
+ continue
+ for struct_file in sorted(system_dir.glob("*.xyz")):
+ structs.append(
+ f"assets/lanthanides/isomer_complexes/structures/"
+ f"{system_dir.name}/{struct_file.name}"
+ )
+
+ if structs:
+ struct_from_scatter(
+ scatter_id=f"{BENCHMARK_NAME}-figure",
+ struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+ structs=structs,
+ mode="struct",
)
- )
def get_app() -> IsomerComplexesApp:
From 38bfb18bffe729208cdf7b9a75cd94d42ffb6489 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:06:41 +0000
Subject: [PATCH 06/14] fix linting
---
.../analyse_isomer_complexes.py | 25 ++++++++++---------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 693feaff4..00dd0b206 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -69,8 +69,8 @@ def _build_reference_df() -> pd.DataFrame:
return pd.DataFrame.from_records(records)
-REFERENCE_DF = _build_reference_df().sort_values(["system", "isomer"]).reset_index(
- drop=True
+REFERENCE_DF = (
+ _build_reference_df().sort_values(["system", "isomer"]).reset_index(drop=True)
)
REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]])
REFERENCE_HOVERDATA = {
@@ -97,19 +97,20 @@ def isomer_relative_energies() -> dict[str, list]:
Reference and per-model relative energies.
"""
df = _load_isomer_dataframe()
- df = df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") if not df.empty else df
+ df = (
+ df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner")
+ if not df.empty
+ else df
+ )
prediction_table = pd.DataFrame(index=REFERENCE_INDEX)
if not df.empty:
- prediction_table = (
- df.pivot_table(
- index=["system", "isomer"],
- columns="model",
- values="rel_energy_kcal",
- aggfunc="first",
- )
- .reindex(REFERENCE_INDEX)
- )
+ prediction_table = df.pivot_table(
+ index=["system", "isomer"],
+ columns="model",
+ values="rel_energy_kcal",
+ aggfunc="first",
+ ).reindex(REFERENCE_INDEX)
results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()}
for model in MODELS:
From cb6d4a31d94b536e05602fd7f7dd32c3e3eefa5a Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:08:41 +0000
Subject: [PATCH 07/14] fix the multiplicity
---
.../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index b3413b612..32632f5cd 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -142,7 +142,7 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
atoms = read(entry["xyz"])
atoms.info["charge"] = entry["charge"]
atoms.info["spin_multiplicity"] = entry["multiplicity"]
- atoms.info["spin"] = (entry["multiplicity"] - 1) / 2
+ atoms.info["spin"] = entry["multiplicity"]
atoms.calc = calc
energy_ev = float(atoms.get_potential_energy())
energy_kcal = energy_ev * KCAL_PER_EV
From 230e56d6942b79e73ead270505a4032651c3e1a6 Mon Sep 17 00:00:00 2001
From: joehart2001
Date: Fri, 30 Jan 2026 14:57:03 +0000
Subject: [PATCH 08/14] add s3 download and save files as xyz
---
.../isomer_complexes/calc_isomer_complexes.py | 117 ++++++------------
1 file changed, 35 insertions(+), 82 deletions(-)
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 32632f5cd..e4e7d4a62 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -2,37 +2,34 @@
from __future__ import annotations
-import os
from pathlib import Path
from typing import Any
-from ase.io import read
+from ase.io import read, write
import pytest
+from tqdm import tqdm
from ml_peg.calcs import CALCS_ROOT
+from ml_peg.calcs.utils.utils import download_s3_data
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models
MODELS = load_models(current_models)
OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
-STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
KCAL_PER_EV = 23.060547
-def _resolve_structure_root() -> Path | None:
- """
- Resolve the root directory containing isomer structures.
-
- Returns
- -------
- Path | None
- Structure root path if found, otherwise ``None``.
- """
- env_path = os.environ.get(STRUCT_ENV_VAR)
- if env_path:
- return Path(env_path).expanduser()
- return None
+# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
+R2SCAN_REF: dict[str, dict[str, float]] = {
+ "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
+ "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+ "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
+ "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
+ "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+ "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
+ "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+}
def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
@@ -77,42 +74,6 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
return entries
-def _write_model_csv(
- model_name: str, rows: list[dict[str, Any]], out_dir: Path
-) -> None:
- """
- Write a per-model CSV of isomer energies.
-
- Parameters
- ----------
- model_name
- Model identifier.
- rows
- Rows containing per-isomer energies and metadata.
- out_dir
- Output directory for the CSV file.
- """
- import csv
-
- out_dir.mkdir(parents=True, exist_ok=True)
- csv_path = out_dir / "isomer_energies.csv"
- fieldnames = [
- "model",
- "system",
- "isomer",
- "energy_ev",
- "energy_kcal",
- "rel_energy_kcal",
- "charge",
- "multiplicity",
- ]
- with csv_path.open("w", newline="") as f:
- writer = csv.DictWriter(f, fieldnames=fieldnames)
- writer.writeheader()
- for row in rows:
- writer.writerow({k: row.get(k, "") for k in fieldnames})
-
-
@pytest.mark.parametrize("mlip", MODELS.items())
def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
"""
@@ -123,22 +84,24 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
mlip
Model name and MLIP calculator wrapper.
"""
- struct_root = _resolve_structure_root()
- if struct_root is None or not struct_root.exists():
- pytest.skip(
- "No lanthanide structure root found. "
- "Set ML_PEG_LANTHANIDE_STRUCTURES to the isomer_structures path."
+ # download lanthanide isomer complexes dataset
+ isomer_complexes_dir = (
+ download_s3_data(
+ key="inputs/lanthanides/isomer_complexes/isomer_complexes.zip",
+ filename="isomer_complexes.zip",
)
+ / "isomer_complexes"
+ )
- entries = _load_isomer_entries(struct_root)
+ entries = _load_isomer_entries(isomer_complexes_dir)
if not entries:
- pytest.skip(f"No isomer structures found under {struct_root}.")
+ pytest.skip(f"No isomer structures found under {isomer_complexes_dir}.")
model_name, model = mlip
calc = model.get_calculator()
- results: list[dict[str, Any]] = []
- for entry in entries:
+ # results: list[dict[str, Any]] = []
+ for entry in tqdm(entries, desc=f"Calculating energies for {model_name}"):
atoms = read(entry["xyz"])
atoms.info["charge"] = entry["charge"]
atoms.info["spin_multiplicity"] = entry["multiplicity"]
@@ -146,27 +109,17 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
atoms.calc = calc
energy_ev = float(atoms.get_potential_energy())
energy_kcal = energy_ev * KCAL_PER_EV
- results.append(
- {
- "model": model_name,
- "system": entry["system"],
- "isomer": entry["isomer"],
- "energy_ev": energy_ev,
- "energy_kcal": energy_kcal,
- "charge": entry["charge"],
- "multiplicity": entry["multiplicity"],
- }
- )
- results.sort(key=lambda row: (row["model"], row["system"], row["isomer"]))
- grouped: dict[tuple[str, str], list[dict[str, Any]]] = {}
- for row in results:
- key = (row["model"], row["system"])
- grouped.setdefault(key, []).append(row)
+ atoms.info["model"] = model_name
+ atoms.info["energy_ev"] = energy_ev
+ atoms.info["energy_kcal"] = energy_kcal
+ atoms.info["system"] = entry["system"]
+ atoms.info["isomer"] = entry["isomer"]
- for rows in grouped.values():
- min_energy = min(row["energy_kcal"] for row in rows)
- for row in rows:
- row["rel_energy_kcal"] = row["energy_kcal"] - min_energy
+ atoms.info["ref_energy_kcal"] = R2SCAN_REF.get(entry["system"], {}).get(
+ entry["isomer"]
+ )
- _write_model_csv(model_name, results, OUT_PATH / model_name)
+ write_dir = OUT_PATH / model_name
+ write_dir.mkdir(parents=True, exist_ok=True)
+ write(write_dir / f"{entry['system']}_{entry['isomer']}.xyz", atoms)
From 9946eaae8246dd7a4525655092cf8ca4d8b31991 Mon Sep 17 00:00:00 2001
From: joehart2001
Date: Fri, 30 Jan 2026 15:12:14 +0000
Subject: [PATCH 09/14] only calculate energies we have references for
---
.../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index e4e7d4a62..1658ce1d8 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -50,6 +50,8 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
for system_dir in sorted(struct_root.glob("*")):
if not system_dir.is_dir():
continue
+ if system_dir.name not in R2SCAN_REF:
+ continue
for iso_dir in sorted(system_dir.glob("iso*")):
xyz_path = iso_dir / "orca.xyz"
if not xyz_path.exists():
From 4a8a0bdcff7749444491650bfb602289a7e10809 Mon Sep 17 00:00:00 2001
From: joehart2001
Date: Fri, 30 Jan 2026 15:16:24 +0000
Subject: [PATCH 10/14] analysis makeover and add structure visualisaiton to
app
---
.../analyse_isomer_complexes.py | 172 ++++++++++--------
.../isomer_complexes/app_isomer_complexes.py | 36 ++--
2 files changed, 118 insertions(+), 90 deletions(-)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 00dd0b206..73742bdd0 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -4,7 +4,7 @@
from pathlib import Path
-import pandas as pd
+from ase.io import read, write
import pytest
from ml_peg.analysis.utils.decorators import build_table, plot_parity
@@ -24,6 +24,7 @@
)
# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
+# These are relative energies (relative to lowest energy isomer for each system)
R2SCAN_REF: dict[str, dict[str, float]] = {
"Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
"Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
@@ -35,48 +36,62 @@
}
-def _load_isomer_dataframe() -> pd.DataFrame:
+def get_system_names() -> list[str]:
"""
- Load isomer energies from per-model outputs.
+ Get sorted list of system names.
Returns
-------
- pandas.DataFrame
- Loaded dataframe, or an empty dataframe if no data are found.
+ list[str]
+ Sorted list of system names from R2SCAN_REF.
"""
- combined_path = CALC_PATH / "isomer_energies.csv"
- if combined_path.exists():
- return pd.read_csv(combined_path)
+ return sorted(R2SCAN_REF.keys())
- csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv"))
- frames = [pd.read_csv(path) for path in csv_paths]
- return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
+def get_reference_keys() -> list[tuple[str, str]]:
+ """
+ Get sorted list of (system, isomer) tuples for consistent ordering.
-def _build_reference_df() -> pd.DataFrame:
+ Returns
+ -------
+ list[tuple[str, str]]
+ List of (system, isomer) tuples sorted by system then isomer.
"""
- Build a reference dataframe from the r2SCAN-3c table.
+ system_names = get_system_names()
+ return [
+ (system, isomer)
+ for system in system_names
+ for isomer in sorted(R2SCAN_REF[system].keys())
+ ]
+
+
+def get_reference_values() -> list[float]:
+ """
+ Get reference relative energies in sorted order.
Returns
-------
- pandas.DataFrame
- Dataframe with columns: system, isomer, ref.
+ list[float]
+ Reference relative energies matching the order of get_reference_keys().
"""
- records = []
- for system, iso_map in R2SCAN_REF.items():
- for iso, ref in iso_map.items():
- records.append({"system": system, "isomer": iso, "ref": ref})
- return pd.DataFrame.from_records(records)
+ reference_keys = get_reference_keys()
+ return [R2SCAN_REF[system][isomer] for system, isomer in reference_keys]
-REFERENCE_DF = (
- _build_reference_df().sort_values(["system", "isomer"]).reset_index(drop=True)
-)
-REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]])
-REFERENCE_HOVERDATA = {
- "System": REFERENCE_DF["system"].tolist(),
- "Isomer": REFERENCE_DF["isomer"].tolist(),
-}
+def build_hoverdata() -> dict[str, list[str]]:
+ """
+ Build hoverdata dictionary for parity plot.
+
+ Returns
+ -------
+ dict[str, list[str]]
+ Dictionary with "System" and "Isomer" keys for hover information.
+ """
+ reference_keys = get_reference_keys()
+ return {
+ "System": [system for system, _ in reference_keys],
+ "Isomer": [isomer for _, isomer in reference_keys],
+ }
@pytest.fixture
@@ -85,7 +100,7 @@ def _build_reference_df() -> pd.DataFrame:
title="Lanthanide isomer relative energies",
x_label="Model Delta E (kcal/mol)",
y_label="r2SCAN-3c Delta E (kcal/mol)",
- hoverdata=REFERENCE_HOVERDATA,
+ hoverdata=build_hoverdata(),
)
def isomer_relative_energies() -> dict[str, list]:
"""
@@ -96,65 +111,80 @@ def isomer_relative_energies() -> dict[str, list]:
dict[str, list]
Reference and per-model relative energies.
"""
- df = _load_isomer_dataframe()
- df = (
- df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner")
- if not df.empty
- else df
- )
-
- prediction_table = pd.DataFrame(index=REFERENCE_INDEX)
- if not df.empty:
- prediction_table = df.pivot_table(
- index=["system", "isomer"],
- columns="model",
- values="rel_energy_kcal",
- aggfunc="first",
- ).reindex(REFERENCE_INDEX)
-
- results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()}
- for model in MODELS:
- if model in prediction_table.columns:
- series = prediction_table[model]
- results[model] = series.where(series.notna(), None).tolist()
- else:
- results[model] = [None] * len(results["ref"])
+ results = {"ref": get_reference_values()} | {mlip: [] for mlip in MODELS}
+
+ for model_name in MODELS:
+ model_dir = CALC_PATH / model_name
+ if not model_dir.exists():
+ # Model directory doesn't exist, fill with None
+ results[model_name] = [None] * len(get_reference_keys())
+ continue
+
+ structs_dir = OUT_PATH / model_name
+ structs_dir.mkdir(parents=True, exist_ok=True)
+
+ # Process each system separately to compute relative energies
+ preds: list[float | None] = []
+ for system_name in get_system_names():
+ # Collect all isomers for this system
+ isomer_data: dict[str, tuple[float, object]] = {}
+ for isomer in sorted(R2SCAN_REF[system_name].keys()):
+ xyz_path = model_dir / f"{system_name}_{isomer}.xyz"
+ if xyz_path.exists():
+ atoms = read(xyz_path)
+ energy_kcal = atoms.info.get("energy_kcal")
+ if energy_kcal is not None:
+ isomer_data[isomer] = (energy_kcal, atoms)
+
+ # Compute relative energies
+ min_energy = min(energy for energy, _ in isomer_data.values())
+
+ # Add predictions in sorted isomer order
+ for isomer in sorted(R2SCAN_REF[system_name].keys()):
+ if isomer in isomer_data:
+ energy_kcal, atoms = isomer_data[isomer]
+ rel_energy = energy_kcal - min_energy
+ preds.append(rel_energy)
+
+ # Copy structure to app directory
+ write(structs_dir / f"{system_name}_{isomer}.xyz", atoms)
+ else:
+ preds.append(None)
+
+ results[model_name] = preds
return results
@pytest.fixture
-def isomer_complex_outputs(
- isomer_relative_energies: dict[str, list],
-) -> dict[str, float | None]:
+def isomer_complex_errors(isomer_relative_energies) -> dict[str, float | None]:
"""
- Build outputs for lanthanide isomer complexes benchmark.
+ Get mean absolute error for relative energies.
Parameters
----------
isomer_relative_energies
- Reference and per-model relative energies.
+ Dictionary of reference and predicted relative energies.
Returns
-------
- dict[str, float | None]
- Mean absolute errors by model.
+ dict[str, float]
+ Dictionary of predicted relative energy errors for all models.
"""
- ref_vals = isomer_relative_energies["ref"]
- mae_by_model: dict[str, float | None] = {}
- for model in MODELS:
- preds = isomer_relative_energies[model]
+ results: dict[str, float | None] = {}
+ for model_name in MODELS:
+ preds = isomer_relative_energies.get(model_name, [])
pairs = [
(ref, pred)
- for ref, pred in zip(ref_vals, preds, strict=True)
+ for ref, pred in zip(isomer_relative_energies["ref"], preds, strict=True)
if pred is not None
]
if not pairs:
- mae_by_model[model] = None
+ results[model_name] = None
continue
- ref, pred = zip(*pairs, strict=True)
- mae_by_model[model] = mae(list(ref), list(pred))
- return mae_by_model
+ ref_vals, pred_vals = zip(*pairs, strict=True)
+ results[model_name] = mae(list(ref_vals), list(pred_vals))
+ return results
@pytest.fixture
@@ -164,13 +194,13 @@ def isomer_complex_outputs(
thresholds=DEFAULT_THRESHOLDS,
weights=DEFAULT_WEIGHTS,
)
-def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]:
+def metrics(isomer_complex_errors: dict[str, float | None]) -> dict[str, dict]:
"""
Collect metrics for lanthanide isomer complexes.
Parameters
----------
- isomer_complex_outputs
+ isomer_complex_errors
Mean absolute errors for all models.
Returns
@@ -178,7 +208,7 @@ def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]:
dict[str, dict]
Metrics keyed by name for all models.
"""
- return {"MAE": isomer_complex_outputs}
+ return {"MAE": isomer_complex_errors}
def test_isomer_complexes(metrics: dict[str, dict]) -> None:
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
index 35c925825..b77619ade 100644
--- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -9,7 +9,10 @@
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter
from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+MODELS = get_model_names(current_models)
BENCHMARK_NAME = "Lanthanide Isomer Complexes"
DOCS_URL = (
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html"
@@ -34,25 +37,20 @@ def register_callbacks(self) -> None:
column_to_plot={"MAE": scatter},
)
- struct_root = DATA_PATH / "structures"
- if struct_root.exists():
- structs = []
- for system_dir in sorted(struct_root.glob("*")):
- if not system_dir.is_dir():
- continue
- for struct_file in sorted(system_dir.glob("*.xyz")):
- structs.append(
- f"assets/lanthanides/isomer_complexes/structures/"
- f"{system_dir.name}/{struct_file.name}"
- )
-
- if structs:
- struct_from_scatter(
- scatter_id=f"{BENCHMARK_NAME}-figure",
- struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
- structs=structs,
- mode="struct",
- )
+ # Use first model's structures for visualization
+ if MODELS:
+ structs_dir = DATA_PATH / MODELS[0]
+ structs = [
+ f"assets/lanthanides/isomer_complexes/{MODELS[0]}/{struct_file.stem}.xyz"
+ for struct_file in sorted(structs_dir.glob("*.xyz"))
+ ]
+
+ struct_from_scatter(
+ scatter_id=f"{BENCHMARK_NAME}-figure",
+ struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+ structs=structs,
+ mode="struct",
+ )
def get_app() -> IsomerComplexesApp:
From b9ed690924d0c1048e9e053f7372452ddff373eb Mon Sep 17 00:00:00 2001
From: Joseph Hart <92541539+joehart2001@users.noreply.github.com>
Date: Mon, 2 Feb 2026 19:20:12 +0000
Subject: [PATCH 11/14] Update
ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
Co-authored-by: Elliott Kasoar <45317199+ElliottKasoar@users.noreply.github.com>
---
.../lanthanides/isomer_complexes/calc_isomer_complexes.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 1658ce1d8..5a9863e6e 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -5,6 +5,7 @@
from pathlib import Path
from typing import Any
+from ase import units
from ase.io import read, write
import pytest
from tqdm import tqdm
@@ -17,7 +18,7 @@
MODELS = load_models(current_models)
OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
-KCAL_PER_EV = 23.060547
+KCAL_PER_EV = units.mol / units.kcal
# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
From 5deffbcd3cafa80106da5a5abac4ce58e0b096dc Mon Sep 17 00:00:00 2001
From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com>
Date: Tue, 3 Feb 2026 15:07:49 +0000
Subject: [PATCH 12/14] Set integer charges
---
.../lanthanides/isomer_complexes/calc_isomer_complexes.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 5a9863e6e..a99e93a93 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -59,12 +59,8 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
continue
charge_path = iso_dir / ".CHRG"
uhf_path = iso_dir / ".UHF"
- charge = (
- float(charge_path.read_text().strip()) if charge_path.exists() else 0.0
- )
- multiplicity = (
- int(float(uhf_path.read_text().strip())) if uhf_path.exists() else 1
- )
+ charge = int(charge_path.read_text().strip()) if charge_path.exists() else 0
+ multiplicity = int(uhf_path.read_text().strip()) if uhf_path.exists() else 1
entries.append(
{
"system": system_dir.name,
From 3d1186b839f0b425ffa219e4efbe8393d883d6fa Mon Sep 17 00:00:00 2001
From: Joseph Hart <92541539+joehart2001@users.noreply.github.com>
Date: Tue, 3 Feb 2026 16:44:17 +0000
Subject: [PATCH 13/14] Update reference data with two extra lanthanides
---
.../lanthanides/isomer_complexes/analyse_isomer_complexes.py | 2 ++
.../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 ++
2 files changed, 4 insertions(+)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 73742bdd0..3c93b02a4 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -28,9 +28,11 @@
R2SCAN_REF: dict[str, dict[str, float]] = {
"Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
"Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+ "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
"Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
"Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
"La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+ "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
"Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
"Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
}
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index a99e93a93..32e2a810b 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -25,9 +25,11 @@
R2SCAN_REF: dict[str, dict[str, float]] = {
"Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
"Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+ "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
"Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
"Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
"La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+ "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
"Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
"Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
}
From dff39d3b6ab289180b7e791df969e18d30a548da Mon Sep 17 00:00:00 2001
From: joehart2001
Date: Wed, 4 Feb 2026 14:20:39 +0000
Subject: [PATCH 14/14] add new ref values from orca files
---
.../analyse_isomer_complexes.py | 18 +++++++++---------
.../isomer_complexes/calc_isomer_complexes.py | 18 +++++++++---------
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 3c93b02a4..72294fa22 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -26,15 +26,15 @@
# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
# These are relative energies (relative to lowest energy isomer for each system)
R2SCAN_REF: dict[str, dict[str, float]] = {
- "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
- "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
- "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
- "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
- "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
- "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
- "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
- "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
- "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+ "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52},
+ "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67},
+ "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17},
+ "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74},
+ "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11},
+ "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08},
+ "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82},
+ "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0},
+ "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23},
}
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 32e2a810b..691350752 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -23,15 +23,15 @@
# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
R2SCAN_REF: dict[str, dict[str, float]] = {
- "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
- "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
- "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
- "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
- "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
- "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
- "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
- "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
- "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+ "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52},
+ "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67},
+ "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17},
+ "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74},
+ "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11},
+ "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08},
+ "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82},
+ "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0},
+ "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23},
}