From f460a6e9c5e8d862affdb458ce3c51d89d9da069 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:06:45 +0000
Subject: [PATCH 01/14] add lanthanide benchmark

---
 docs/source/user_guide/benchmarks/index.rst   |   1 +
 .../user_guide/benchmarks/lanthanides.rst     |  48 ++++
 .../analyse_isomer_complexes.py               | 247 ++++++++++++++++++
 .../lanthanides/isomer_complexes/metrics.yml  |   7 +
 .../isomer_complexes/app_isomer_complexes.py  |  98 +++++++
 ml_peg/app/lanthanides/lanthanides.yml        |   2 +
 .../isomer_complexes/calc_isomer_complexes.py |  43 +++
 7 files changed, 446 insertions(+)
 create mode 100644 docs/source/user_guide/benchmarks/lanthanides.rst
 create mode 100644 ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
 create mode 100644 ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
 create mode 100644 ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
 create mode 100644 ml_peg/app/lanthanides/lanthanides.yml
 create mode 100644 ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py

diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst
index 9f339f4d2..33e0d7ba0 100644
--- a/docs/source/user_guide/benchmarks/index.rst
+++ b/docs/source/user_guide/benchmarks/index.rst
@@ -12,3 +12,4 @@ Benchmarks
     molecular_crystal
     molecular
     bulk_crystal
+    lanthanides
diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst
new file mode 100644
index 000000000..7c9317266
--- /dev/null
+++ b/docs/source/user_guide/benchmarks/lanthanides.rst
@@ -0,0 +1,48 @@
+===========
+Lanthanides
+===========
+
+Isomer complexes
+================
+
+Summary
+-------
+
+Performance in predicting relative isomer energies for lanthanide complexes
+compared to r2SCAN-3c DFT reference data.
+
+
+Metrics
+-------
+
+1. Relative isomer energy MAE
+
+Accuracy of relative isomer energy predictions.
+
+For each complex, the relative isomer energies are computed with respect to the
+lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c
+relative energies reported in the reference dataset. The r2SCAN-3c geometries are
+used, with wB97X-V/def2-mTZVPP single-point calculations reported for validation
+in the source study.
+
+
+Computational cost
+------------------
+
+Low: tests are likely to take less than a minute to run on CPU once model outputs
+are available.
+
+
+Data availability
+-----------------
+
+Input structures:
+
+* T. Rose, M. Bursch, J.-M. Mewes, and S. Grimme, Fast and Robust Modeling of
+  Lanthanide and Actinide Complexes, Biomolecules, and Molecular Crystals with
+  the Extended GFN-FF Model, Inorganic Chemistry 63 (2024) 19364-19374.
+
+Reference data:
+
+* Relative isomer energies from r2SCAN-3c (see Supporting Information of the
+  above reference).
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
new file mode 100644
index 000000000..50f93e01d
--- /dev/null
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -0,0 +1,247 @@
+"""Analyse lanthanide isomer complex benchmark."""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+import shutil
+
+from dash import dash_table
+import pandas as pd
+import plotly.graph_objects as go
+import pytest
+
+from ml_peg.analysis.utils.utils import calc_table_scores, load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+
+CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
+STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
+
+CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"
+STRUCT_OUT_PATH = OUT_PATH / "structures"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
+R2SCAN_REF: dict[str, dict[str, float]] = {
+    "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
+    "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+    "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
+    "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
+    "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+    "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
+    "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+}
+
+
+def _resolve_csv_path() -> Path | None:
+    env_path = os.environ.get(CSV_ENV_VAR)
+    if env_path:
+        return Path(env_path).expanduser()
+    csv_path = CALC_PATH / "isomer_energies.csv"
+    return csv_path if csv_path.exists() else None
+
+
+def _build_reference_df() -> pd.DataFrame:
+    records = []
+    for system, iso_map in R2SCAN_REF.items():
+        for iso, ref in iso_map.items():
+            records.append({"system": system, "isomer": iso, "ref": ref})
+    return pd.DataFrame.from_records(records)
+
+
+def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]:
+    struct_map: dict[tuple, str] = {}
+    for _, row in reference_df.iterrows():
+        system = row["system"]
+        iso = row["isomer"]
+        src = struct_root / system / iso / "orca.xyz"
+        if not src.exists():
+            continue
+        dest_dir = STRUCT_OUT_PATH / system
+        dest_dir.mkdir(parents=True, exist_ok=True)
+        dest = dest_dir / f"{iso}.xyz"
+        shutil.copyfile(src, dest)
+        struct_map[(system, iso)] = (
+            f"assets/lanthanides/isomer_complexes/structures/{system}/{iso}.xyz"
+        )
+    return struct_map
+
+
+def _build_table(
+    mae_by_model: dict[str, float | None],
+    model_order: list[str],
+) -> None:
+    metrics_data = []
+    for model in model_order:
+        metrics_data.append({"MLIP": model, "MAE": mae_by_model.get(model), "id": model})
+
+    metrics_data = calc_table_scores(
+        metrics_data,
+        thresholds=DEFAULT_THRESHOLDS,
+        weights=DEFAULT_WEIGHTS,
+    )
+
+    metrics_columns = (
+        {"name": "MLIP", "id": "MLIP"},
+        {"name": "MAE", "id": "MAE"},
+        {"name": "Score", "id": "Score"},
+    )
+
+    summary_tooltips = {
+        "MLIP": "Model identifier, hover for configuration details.",
+        "Score": "Weighted score across metrics, Higher is better (normalised 0 to 1).",
+    }
+    tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips
+
+    model_configs = {model: {} for model in model_order}
+    model_levels = {model: None for model in model_order}
+    metric_levels = {
+        metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory")
+        for metric_name in DEFAULT_THRESHOLDS
+    }
+
+    model_name_map = {model: model for model in model_order}
+
+    table = dash_table.DataTable(
+        metrics_data,
+        list(metrics_columns),
+        id="metrics",
+        tooltip_header=tooltip_header,
+    )
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+    with open(OUT_PATH / "isomer_complexes_metrics_table.json", "w") as fp:
+        json.dump(
+            {
+                "data": table.data,
+                "columns": table.columns,
+                "tooltip_header": tooltip_header,
+                "thresholds": DEFAULT_THRESHOLDS,
+                "weights": DEFAULT_WEIGHTS,
+                "model_levels_of_theory": model_levels,
+                "metric_levels_of_theory": metric_levels,
+                "model_configs": model_configs,
+                "model_name_map": model_name_map,
+            },
+            fp,
+        )
+
+
+@pytest.fixture
+def isomer_complex_outputs() -> dict[str, float | None]:
+    """
+    Build outputs for lanthanide isomer complexes benchmark.
+
+    Returns
+    -------
+    dict[str, float | None]
+        Mean absolute errors by model.
+    """
+    csv_path = _resolve_csv_path()
+    if csv_path is None:
+        pytest.skip(
+            "No lanthanide isomer CSV found. "
+            "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs."
+        )
+
+    df = pd.read_csv(csv_path)
+    if df.empty:
+        pytest.skip("Lanthanide isomer CSV is empty.")
+
+    reference_df = _build_reference_df()
+    df = df.merge(reference_df, on=["system", "isomer"], how="inner")
+    if df.empty:
+        pytest.skip("No overlap between CSV entries and r2SCAN-3c reference data.")
+
+    struct_map: dict[tuple, str] = {}
+    struct_root_env = os.environ.get(STRUCT_ENV_VAR)
+    if struct_root_env:
+        struct_root = Path(struct_root_env).expanduser()
+        if struct_root.exists():
+            struct_map = _copy_structures(struct_root, reference_df)
+
+    models = sorted(df["model"].unique().tolist())
+
+    mae_by_model: dict[str, float | None] = {}
+    fig = go.Figure()
+
+    for model in models:
+        sub = df[df["model"] == model]
+        if sub.empty:
+            mae_by_model[model] = None
+            continue
+
+        mae_by_model[model] = mae(
+            sub["ref"].tolist(),
+            sub["rel_energy_kcal"].tolist(),
+        )
+
+        customdata = []
+        for _, row in sub.iterrows():
+            struct_path = struct_map.get((row["system"], row["isomer"]), "")
+            customdata.append([struct_path, row["system"], row["isomer"]])
+
+        fig.add_trace(
+            go.Scatter(
+                x=sub["ref"],
+                y=sub["rel_energy_kcal"],
+                mode="markers",
+                name=model,
+                customdata=customdata,
+                hovertemplate=(
+                    "<b>%{customdata[1]}</b> %{customdata[2]}<br>"
+                    "r2SCAN-3c: %{x:.2f} kcal/mol<br>"
+                    "Model: %{y:.2f} kcal/mol"
+                    "<extra></extra>"
+                ),
+            )
+        )
+
+    min_val = min(df["ref"].min(), df["rel_energy_kcal"].min())
+    max_val = max(df["ref"].max(), df["rel_energy_kcal"].max())
+    pad = 0.5
+    min_val -= pad
+    max_val += pad
+
+    fig.add_trace(
+        go.Scatter(
+            x=[min_val, max_val],
+            y=[min_val, max_val],
+            mode="lines",
+            showlegend=False,
+            line={"color": "#7f7f7f", "dash": "dash"},
+            hoverinfo="skip",
+        )
+    )
+
+    fig.update_layout(
+        title="Lanthanide isomer relative energies",
+        xaxis_title="r2SCAN-3c Delta E (kcal/mol)",
+        yaxis_title="Model Delta E (kcal/mol)",
+        plot_bgcolor="#ffffff",
+    )
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+    fig.write_json(OUT_PATH / "figure_isomer_complexes.json")
+    _build_table(mae_by_model, models)
+
+    return mae_by_model
+
+
+def test_isomer_complexes(isomer_complex_outputs: dict[str, float | None]) -> None:
+    """
+    Run lanthanide isomer complexes benchmark analysis.
+
+    Parameters
+    ----------
+    isomer_complex_outputs
+        Mean absolute errors for all models.
+    """
+    return
diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
new file mode 100644
index 000000000..043a99279
--- /dev/null
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
@@ -0,0 +1,7 @@
+metrics:
+  MAE:
+    good: 0.0
+    bad: 10.0
+    unit: kcal/mol
+    tooltip: Mean absolute error for relative isomer energies
+    level_of_theory: r2SCAN-3c
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
new file mode 100644
index 000000000..36182a4d0
--- /dev/null
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -0,0 +1,98 @@
+"""Run lanthanide isomer complex benchmark app."""
+
+from __future__ import annotations
+
+from dash import Dash, Input, Output, callback
+from dash.html import Div, Iframe
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.load import read_plot
+from ml_peg.app.utils.weas import generate_weas_html
+
+BENCHMARK_NAME = "Lanthanide Isomer Complexes"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html"
+    "#isomer-complexes"
+)
+DATA_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"
+
+
+class IsomerComplexesApp(BaseApp):
+    """Lanthanide isomer complex benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_isomer_complexes.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter},
+        )
+
+        @callback(
+            Output(f"{BENCHMARK_NAME}-struct-placeholder", "children"),
+            Input(f"{BENCHMARK_NAME}-figure", "clickData"),
+        )
+        def show_structure(click_data) -> Div:
+            if not click_data:
+                return Div("Click on a model point to view the structure.")
+
+            point = click_data.get("points", [{}])[0]
+            custom = point.get("customdata") or []
+            if not custom or not custom[0]:
+                return Div("No structure available for this point.")
+
+            struct_path = custom[0]
+            return Div(
+                Iframe(
+                    srcDoc=generate_weas_html(struct_path, "struct", 0),
+                    style={
+                        "height": "550px",
+                        "width": "100%",
+                        "border": "1px solid #ddd",
+                        "borderRadius": "5px",
+                    },
+                )
+            )
+
+
+def get_app() -> IsomerComplexesApp:
+    """
+    Get lanthanide isomer complex benchmark app layout and callback registration.
+
+    Returns
+    -------
+    IsomerComplexesApp
+        Benchmark layout and callback registration.
+    """
+    return IsomerComplexesApp(
+        name=BENCHMARK_NAME,
+        description=(
+            "Relative energies of lanthanide isomer complexes compared to r2SCAN-3c."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "isomer_complexes_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # Create Dash app
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+
+    # Construct layout and register callbacks
+    app_instance = get_app()
+    full_app.layout = app_instance.layout
+    app_instance.register_callbacks()
+
+    # Run app
+    full_app.run(port=8061, debug=True)
diff --git a/ml_peg/app/lanthanides/lanthanides.yml b/ml_peg/app/lanthanides/lanthanides.yml
new file mode 100644
index 000000000..1ce5a83a7
--- /dev/null
+++ b/ml_peg/app/lanthanides/lanthanides.yml
@@ -0,0 +1,2 @@
+title: Lanthanides
+description: Relative energies for lanthanide isomer complexes
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
new file mode 100644
index 000000000..3bc8af183
--- /dev/null
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -0,0 +1,43 @@
+"""Stage lanthanide isomer complex energies for analysis."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import shutil
+
+import pytest
+
+from ml_peg.calcs import CALCS_ROOT
+
+OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
+CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
+
+
+def _resolve_source_csv() -> Path | None:
+    env_path = os.environ.get(CSV_ENV_VAR)
+    if env_path:
+        return Path(env_path).expanduser()
+    default_path = OUT_PATH / "isomer_energies.csv"
+    if default_path.exists():
+        return default_path
+    return None
+
+
+def test_stage_isomer_complexes_csv() -> None:
+    """
+    Stage the precomputed isomer energies CSV for analysis.
+
+    Set `ML_PEG_LANTHANIDE_CSV` to point to the source CSV.
+    """
+    source_csv = _resolve_source_csv()
+    if source_csv is None or not source_csv.exists():
+        pytest.skip(
+            "No lanthanide isomer CSV found. "
+            "Set ML_PEG_LANTHANIDE_CSV to the isomer_energies.csv path."
+        )
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+    dest_csv = OUT_PATH / "isomer_energies.csv"
+    if source_csv.resolve() != dest_csv.resolve():
+        shutil.copyfile(source_csv, dest_csv)

From 33e18c767fd0fee97a2f05833215faf7de6d1ccf Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:16:30 +0000
Subject: [PATCH 02/14] Update lanthanides.rst

---
 docs/source/user_guide/benchmarks/lanthanides.rst | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/source/user_guide/benchmarks/lanthanides.rst b/docs/source/user_guide/benchmarks/lanthanides.rst
index 7c9317266..e3c5f751f 100644
--- a/docs/source/user_guide/benchmarks/lanthanides.rst
+++ b/docs/source/user_guide/benchmarks/lanthanides.rst
@@ -21,16 +21,13 @@ Accuracy of relative isomer energy predictions.
 
 For each complex, the relative isomer energies are computed with respect to the
 lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c
-relative energies reported in the reference dataset. The r2SCAN-3c geometries are
-used, with wB97X-V/def2-mTZVPP single-point calculations reported for validation
-in the source study.
+relative energies reported in the reference dataset.
 
 
 Computational cost
 ------------------
 
-Low: tests are likely to take less than a minute to run on CPU once model outputs
-are available.
+Low: tests are likely to take less than a minute to run on CPU.
 
 
 Data availability

From be14311a6a5974c07501e3aaa8243f528b9e2c59 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 14:44:13 +0000
Subject: [PATCH 03/14] fix linter

---
 .../analyse_isomer_complexes.py               | 47 ++++++++++++++++++-
 .../isomer_complexes/app_isomer_complexes.py  | 13 +++++
 .../isomer_complexes/calc_isomer_complexes.py |  8 ++++
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 50f93e01d..bf0ffe120 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -41,6 +41,14 @@
 
 
 def _resolve_csv_path() -> Path | None:
+    """
+    Resolve the source CSV path for isomer energies.
+
+    Returns
+    -------
+    Path | None
+        CSV path if found, otherwise ``None``.
+    """
     env_path = os.environ.get(CSV_ENV_VAR)
     if env_path:
         return Path(env_path).expanduser()
@@ -49,6 +57,14 @@ def _resolve_csv_path() -> Path | None:
 
 
 def _build_reference_df() -> pd.DataFrame:
+    """
+    Build a reference dataframe from the r2SCAN-3c table.
+
+    Returns
+    -------
+    pandas.DataFrame
+        Dataframe with columns: system, isomer, ref.
+    """
     records = []
     for system, iso_map in R2SCAN_REF.items():
         for iso, ref in iso_map.items():
@@ -57,6 +73,21 @@ def _build_reference_df() -> pd.DataFrame:
 
 
 def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]:
+    """
+    Copy reference structures into the app assets directory.
+
+    Parameters
+    ----------
+    struct_root
+        Root directory containing isomer structures.
+    reference_df
+        Dataframe of systems/isomers to copy.
+
+    Returns
+    -------
+    dict[tuple, str]
+        Mapping of (system, isomer) to asset path.
+    """
     struct_map: dict[tuple, str] = {}
     for _, row in reference_df.iterrows():
         system = row["system"]
@@ -78,9 +109,21 @@ def _build_table(
     mae_by_model: dict[str, float | None],
     model_order: list[str],
 ) -> None:
+    """
+    Build the metrics table JSON for the app.
+
+    Parameters
+    ----------
+    mae_by_model
+        MAE values keyed by model name.
+    model_order
+        Ordered list of model names to include.
+    """
     metrics_data = []
     for model in model_order:
-        metrics_data.append({"MLIP": model, "MAE": mae_by_model.get(model), "id": model})
+        metrics_data.append(
+            {"MLIP": model, "MAE": mae_by_model.get(model), "id": model}
+        )
 
     metrics_data = calc_table_scores(
         metrics_data,
@@ -101,7 +144,7 @@ def _build_table(
     tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips
 
     model_configs = {model: {} for model in model_order}
-    model_levels = {model: None for model in model_order}
+    model_levels = dict.fromkeys(model_order)
     metric_levels = {
         metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory")
         for metric_name in DEFAULT_THRESHOLDS
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
index 36182a4d0..eb5bcaea3 100644
--- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -40,6 +40,19 @@ def register_callbacks(self) -> None:
             Input(f"{BENCHMARK_NAME}-figure", "clickData"),
         )
         def show_structure(click_data) -> Div:
+            """
+            Render a structure viewer for the clicked point.
+
+            Parameters
+            ----------
+            click_data
+                Plotly click payload from the parity scatter.
+
+            Returns
+            -------
+            Div
+                Viewer iframe or placeholder message.
+            """
             if not click_data:
                 return Div("Click on a model point to view the structure.")
 
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 3bc8af183..4f917f51e 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -15,6 +15,14 @@
 
 
 def _resolve_source_csv() -> Path | None:
+    """
+    Resolve the source CSV path to stage for analysis.
+
+    Returns
+    -------
+    Path | None
+        CSV path if found, otherwise ``None``.
+    """
     env_path = os.environ.get(CSV_ENV_VAR)
     if env_path:
         return Path(env_path).expanduser()

From b98082a9889db50627ea739d272fc3cf90dc6796 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 15:52:22 +0000
Subject: [PATCH 04/14] fix the calculator lanthanide

---
 .../analyse_isomer_complexes.py               |  30 ++--
 .../isomer_complexes/calc_isomer_complexes.py | 163 +++++++++++++++---
 2 files changed, 162 insertions(+), 31 deletions(-)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index bf0ffe120..d9fcc4d44 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -40,20 +40,31 @@
 }
 
 
-def _resolve_csv_path() -> Path | None:
+def _load_isomer_dataframe() -> pd.DataFrame | None:
     """
-    Resolve the source CSV path for isomer energies.
+    Load isomer energies from a CSV file or per-model outputs.
 
     Returns
     -------
-    Path | None
-        CSV path if found, otherwise ``None``.
+    pandas.DataFrame | None
+        Loaded dataframe, or ``None`` if no data are found.
     """
     env_path = os.environ.get(CSV_ENV_VAR)
     if env_path:
-        return Path(env_path).expanduser()
-    csv_path = CALC_PATH / "isomer_energies.csv"
-    return csv_path if csv_path.exists() else None
+        path = Path(env_path).expanduser()
+        if path.exists():
+            return pd.read_csv(path)
+
+    combined_path = CALC_PATH / "isomer_energies.csv"
+    if combined_path.exists():
+        return pd.read_csv(combined_path)
+
+    csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv"))
+    if not csv_paths:
+        return None
+
+    frames = [pd.read_csv(path) for path in csv_paths]
+    return pd.concat(frames, ignore_index=True) if frames else None
 
 
 def _build_reference_df() -> pd.DataFrame:
@@ -187,14 +198,13 @@ def isomer_complex_outputs() -> dict[str, float | None]:
     dict[str, float | None]
         Mean absolute errors by model.
     """
-    csv_path = _resolve_csv_path()
-    if csv_path is None:
+    df = _load_isomer_dataframe()
+    if df is None:
         pytest.skip(
             "No lanthanide isomer CSV found. "
             "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs."
         )
 
-    df = pd.read_csv(csv_path)
     if df.empty:
         pytest.skip("Lanthanide isomer CSV is empty.")
 
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 4f917f51e..b3413b612 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -1,51 +1,172 @@
-"""Stage lanthanide isomer complex energies for analysis."""
+"""Run lanthanide isomer complex energy calculations."""
 
 from __future__ import annotations
 
 import os
 from pathlib import Path
-import shutil
+from typing import Any
 
+from ase.io import read
 import pytest
 
 from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
 
 OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
-CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
+STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
+KCAL_PER_EV = 23.060547
 
 
-def _resolve_source_csv() -> Path | None:
+def _resolve_structure_root() -> Path | None:
     """
-    Resolve the source CSV path to stage for analysis.
+    Resolve the root directory containing isomer structures.
 
     Returns
     -------
     Path | None
-        CSV path if found, otherwise ``None``.
+        Structure root path if found, otherwise ``None``.
     """
-    env_path = os.environ.get(CSV_ENV_VAR)
+    env_path = os.environ.get(STRUCT_ENV_VAR)
     if env_path:
         return Path(env_path).expanduser()
-    default_path = OUT_PATH / "isomer_energies.csv"
-    if default_path.exists():
-        return default_path
     return None
 
 
-def test_stage_isomer_complexes_csv() -> None:
+def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
     """
-    Stage the precomputed isomer energies CSV for analysis.
+    Load isomer entries from the structure root.
+
+    Parameters
+    ----------
+    struct_root
+        Root directory containing system/iso*/orca.xyz and optional .CHRG/.UHF.
 
-    Set `ML_PEG_LANTHANIDE_CSV` to point to the source CSV.
+    Returns
+    -------
+    list[dict[str, Any]]
+        Entry dictionaries with system, isomer, xyz path, charge, multiplicity.
     """
-    source_csv = _resolve_source_csv()
-    if source_csv is None or not source_csv.exists():
+    entries: list[dict[str, Any]] = []
+    for system_dir in sorted(struct_root.glob("*")):
+        if not system_dir.is_dir():
+            continue
+        for iso_dir in sorted(system_dir.glob("iso*")):
+            xyz_path = iso_dir / "orca.xyz"
+            if not xyz_path.exists():
+                continue
+            charge_path = iso_dir / ".CHRG"
+            uhf_path = iso_dir / ".UHF"
+            charge = (
+                float(charge_path.read_text().strip()) if charge_path.exists() else 0.0
+            )
+            multiplicity = (
+                int(float(uhf_path.read_text().strip())) if uhf_path.exists() else 1
+            )
+            entries.append(
+                {
+                    "system": system_dir.name,
+                    "isomer": iso_dir.name,
+                    "xyz": xyz_path,
+                    "charge": charge,
+                    "multiplicity": multiplicity,
+                }
+            )
+    return entries
+
+
+def _write_model_csv(
+    model_name: str, rows: list[dict[str, Any]], out_dir: Path
+) -> None:
+    """
+    Write a per-model CSV of isomer energies.
+
+    Parameters
+    ----------
+    model_name
+        Model identifier.
+    rows
+        Rows containing per-isomer energies and metadata.
+    out_dir
+        Output directory for the CSV file.
+    """
+    import csv
+
+    out_dir.mkdir(parents=True, exist_ok=True)
+    csv_path = out_dir / "isomer_energies.csv"
+    fieldnames = [
+        "model",
+        "system",
+        "isomer",
+        "energy_ev",
+        "energy_kcal",
+        "rel_energy_kcal",
+        "charge",
+        "multiplicity",
+    ]
+    with csv_path.open("w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in rows:
+            writer.writerow({k: row.get(k, "") for k in fieldnames})
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
+    """
+    Run single-point energy calculations for lanthanide isomer complexes.
+
+    Parameters
+    ----------
+    mlip
+        Model name and MLIP calculator wrapper.
+    """
+    struct_root = _resolve_structure_root()
+    if struct_root is None or not struct_root.exists():
         pytest.skip(
-            "No lanthanide isomer CSV found. "
-            "Set ML_PEG_LANTHANIDE_CSV to the isomer_energies.csv path."
+            "No lanthanide structure root found. "
+            "Set ML_PEG_LANTHANIDE_STRUCTURES to the isomer_structures path."
+        )
+
+    entries = _load_isomer_entries(struct_root)
+    if not entries:
+        pytest.skip(f"No isomer structures found under {struct_root}.")
+
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    results: list[dict[str, Any]] = []
+    for entry in entries:
+        atoms = read(entry["xyz"])
+        atoms.info["charge"] = entry["charge"]
+        atoms.info["spin_multiplicity"] = entry["multiplicity"]
+        atoms.info["spin"] = (entry["multiplicity"] - 1) / 2
+        atoms.calc = calc
+        energy_ev = float(atoms.get_potential_energy())
+        energy_kcal = energy_ev * KCAL_PER_EV
+        results.append(
+            {
+                "model": model_name,
+                "system": entry["system"],
+                "isomer": entry["isomer"],
+                "energy_ev": energy_ev,
+                "energy_kcal": energy_kcal,
+                "charge": entry["charge"],
+                "multiplicity": entry["multiplicity"],
+            }
         )
 
-    OUT_PATH.mkdir(parents=True, exist_ok=True)
-    dest_csv = OUT_PATH / "isomer_energies.csv"
-    if source_csv.resolve() != dest_csv.resolve():
-        shutil.copyfile(source_csv, dest_csv)
+    results.sort(key=lambda row: (row["model"], row["system"], row["isomer"]))
+    grouped: dict[tuple[str, str], list[dict[str, Any]]] = {}
+    for row in results:
+        key = (row["model"], row["system"])
+        grouped.setdefault(key, []).append(row)
+
+    for rows in grouped.values():
+        min_energy = min(row["energy_kcal"] for row in rows)
+        for row in rows:
+            row["rel_energy_kcal"] = row["energy_kcal"] - min_energy
+
+    _write_model_csv(model_name, results, OUT_PATH / model_name)

From 8e656ee87676c5955f6bb88a1ec54290d9985974 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:00:12 +0000
Subject: [PATCH 05/14] use better decorators

---
 .../analyse_isomer_complexes.py               | 300 ++++++------------
 .../isomer_complexes/app_isomer_complexes.py  |  62 ++--
 2 files changed, 117 insertions(+), 245 deletions(-)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index d9fcc4d44..693feaff4 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -2,26 +2,21 @@
 
 from __future__ import annotations
 
-import json
-import os
 from pathlib import Path
-import shutil
 
-from dash import dash_table
 import pandas as pd
-import plotly.graph_objects as go
 import pytest
 
-from ml_peg.analysis.utils.utils import calc_table_scores, load_metrics_config, mae
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import load_metrics_config, mae
 from ml_peg.app import APP_ROOT
 from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
 
-CSV_ENV_VAR = "ML_PEG_LANTHANIDE_CSV"
-STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
-
+MODELS = get_model_names(current_models)
 CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
 OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"
-STRUCT_OUT_PATH = OUT_PATH / "structures"
 
 METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
 DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
@@ -40,31 +35,22 @@
 }
 
 
-def _load_isomer_dataframe() -> pd.DataFrame | None:
+def _load_isomer_dataframe() -> pd.DataFrame:
     """
-    Load isomer energies from a CSV file or per-model outputs.
+    Load isomer energies from per-model outputs.
 
     Returns
     -------
-    pandas.DataFrame | None
-        Loaded dataframe, or ``None`` if no data are found.
+    pandas.DataFrame
+        Loaded dataframe, or an empty dataframe if no data are found.
     """
-    env_path = os.environ.get(CSV_ENV_VAR)
-    if env_path:
-        path = Path(env_path).expanduser()
-        if path.exists():
-            return pd.read_csv(path)
-
     combined_path = CALC_PATH / "isomer_energies.csv"
     if combined_path.exists():
         return pd.read_csv(combined_path)
 
     csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv"))
-    if not csv_paths:
-        return None
-
     frames = [pd.read_csv(path) for path in csv_paths]
-    return pd.concat(frames, ignore_index=True) if frames else None
+    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
 
 
 def _build_reference_df() -> pd.DataFrame:
@@ -83,218 +69,124 @@ def _build_reference_df() -> pd.DataFrame:
     return pd.DataFrame.from_records(records)
 
 
-def _copy_structures(struct_root: Path, reference_df: pd.DataFrame) -> dict[tuple, str]:
-    """
-    Copy reference structures into the app assets directory.
+REFERENCE_DF = _build_reference_df().sort_values(["system", "isomer"]).reset_index(
+    drop=True
+)
+REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]])
+REFERENCE_HOVERDATA = {
+    "System": REFERENCE_DF["system"].tolist(),
+    "Isomer": REFERENCE_DF["isomer"].tolist(),
+}
 
-    Parameters
-    ----------
-    struct_root
-        Root directory containing isomer structures.
-    reference_df
-        Dataframe of systems/isomers to copy.
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_isomer_complexes.json",
+    title="Lanthanide isomer relative energies",
+    x_label="Model Delta E (kcal/mol)",
+    y_label="r2SCAN-3c Delta E (kcal/mol)",
+    hoverdata=REFERENCE_HOVERDATA,
+)
+def isomer_relative_energies() -> dict[str, list]:
+    """
+    Build parity data for lanthanide isomer complexes benchmark.
 
     Returns
     -------
-    dict[tuple, str]
-        Mapping of (system, isomer) to asset path.
+    dict[str, list]
+        Reference and per-model relative energies.
     """
-    struct_map: dict[tuple, str] = {}
-    for _, row in reference_df.iterrows():
-        system = row["system"]
-        iso = row["isomer"]
-        src = struct_root / system / iso / "orca.xyz"
-        if not src.exists():
-            continue
-        dest_dir = STRUCT_OUT_PATH / system
-        dest_dir.mkdir(parents=True, exist_ok=True)
-        dest = dest_dir / f"{iso}.xyz"
-        shutil.copyfile(src, dest)
-        struct_map[(system, iso)] = (
-            f"assets/lanthanides/isomer_complexes/structures/{system}/{iso}.xyz"
+    df = _load_isomer_dataframe()
+    df = df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") if not df.empty else df
+
+    prediction_table = pd.DataFrame(index=REFERENCE_INDEX)
+    if not df.empty:
+        prediction_table = (
+            df.pivot_table(
+                index=["system", "isomer"],
+                columns="model",
+                values="rel_energy_kcal",
+                aggfunc="first",
+            )
+            .reindex(REFERENCE_INDEX)
         )
-    return struct_map
 
+    results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()}
+    for model in MODELS:
+        if model in prediction_table.columns:
+            series = prediction_table[model]
+            results[model] = series.where(series.notna(), None).tolist()
+        else:
+            results[model] = [None] * len(results["ref"])
 
-def _build_table(
-    mae_by_model: dict[str, float | None],
-    model_order: list[str],
-) -> None:
-    """
-    Build the metrics table JSON for the app.
-
-    Parameters
-    ----------
-    mae_by_model
-        MAE values keyed by model name.
-    model_order
-        Ordered list of model names to include.
-    """
-    metrics_data = []
-    for model in model_order:
-        metrics_data.append(
-            {"MLIP": model, "MAE": mae_by_model.get(model), "id": model}
-        )
-
-    metrics_data = calc_table_scores(
-        metrics_data,
-        thresholds=DEFAULT_THRESHOLDS,
-        weights=DEFAULT_WEIGHTS,
-    )
-
-    metrics_columns = (
-        {"name": "MLIP", "id": "MLIP"},
-        {"name": "MAE", "id": "MAE"},
-        {"name": "Score", "id": "Score"},
-    )
-
-    summary_tooltips = {
-        "MLIP": "Model identifier, hover for configuration details.",
-        "Score": "Weighted score across metrics, Higher is better (normalised 0 to 1).",
-    }
-    tooltip_header = DEFAULT_TOOLTIPS | summary_tooltips
-
-    model_configs = {model: {} for model in model_order}
-    model_levels = dict.fromkeys(model_order)
-    metric_levels = {
-        metric_name: DEFAULT_THRESHOLDS.get(metric_name, {}).get("level_of_theory")
-        for metric_name in DEFAULT_THRESHOLDS
-    }
-
-    model_name_map = {model: model for model in model_order}
-
-    table = dash_table.DataTable(
-        metrics_data,
-        list(metrics_columns),
-        id="metrics",
-        tooltip_header=tooltip_header,
-    )
-
-    OUT_PATH.mkdir(parents=True, exist_ok=True)
-    with open(OUT_PATH / "isomer_complexes_metrics_table.json", "w") as fp:
-        json.dump(
-            {
-                "data": table.data,
-                "columns": table.columns,
-                "tooltip_header": tooltip_header,
-                "thresholds": DEFAULT_THRESHOLDS,
-                "weights": DEFAULT_WEIGHTS,
-                "model_levels_of_theory": model_levels,
-                "metric_levels_of_theory": metric_levels,
-                "model_configs": model_configs,
-                "model_name_map": model_name_map,
-            },
-            fp,
-        )
+    return results
 
 
 @pytest.fixture
-def isomer_complex_outputs() -> dict[str, float | None]:
+def isomer_complex_outputs(
+    isomer_relative_energies: dict[str, list],
+) -> dict[str, float | None]:
     """
     Build outputs for lanthanide isomer complexes benchmark.
 
+    Parameters
+    ----------
+    isomer_relative_energies
+        Reference and per-model relative energies.
+
     Returns
     -------
     dict[str, float | None]
         Mean absolute errors by model.
     """
-    df = _load_isomer_dataframe()
-    if df is None:
-        pytest.skip(
-            "No lanthanide isomer CSV found. "
-            "Set ML_PEG_LANTHANIDE_CSV or run calc to stage outputs."
-        )
-
-    if df.empty:
-        pytest.skip("Lanthanide isomer CSV is empty.")
-
-    reference_df = _build_reference_df()
-    df = df.merge(reference_df, on=["system", "isomer"], how="inner")
-    if df.empty:
-        pytest.skip("No overlap between CSV entries and r2SCAN-3c reference data.")
-
-    struct_map: dict[tuple, str] = {}
-    struct_root_env = os.environ.get(STRUCT_ENV_VAR)
-    if struct_root_env:
-        struct_root = Path(struct_root_env).expanduser()
-        if struct_root.exists():
-            struct_map = _copy_structures(struct_root, reference_df)
-
-    models = sorted(df["model"].unique().tolist())
-
+    ref_vals = isomer_relative_energies["ref"]
     mae_by_model: dict[str, float | None] = {}
-    fig = go.Figure()
-
-    for model in models:
-        sub = df[df["model"] == model]
-        if sub.empty:
+    for model in MODELS:
+        preds = isomer_relative_energies[model]
+        pairs = [
+            (ref, pred)
+            for ref, pred in zip(ref_vals, preds, strict=True)
+            if pred is not None
+        ]
+        if not pairs:
             mae_by_model[model] = None
             continue
+        ref, pred = zip(*pairs, strict=True)
+        mae_by_model[model] = mae(list(ref), list(pred))
+    return mae_by_model
 
-        mae_by_model[model] = mae(
-            sub["ref"].tolist(),
-            sub["rel_energy_kcal"].tolist(),
-        )
-
-        customdata = []
-        for _, row in sub.iterrows():
-            struct_path = struct_map.get((row["system"], row["isomer"]), "")
-            customdata.append([struct_path, row["system"], row["isomer"]])
-
-        fig.add_trace(
-            go.Scatter(
-                x=sub["ref"],
-                y=sub["rel_energy_kcal"],
-                mode="markers",
-                name=model,
-                customdata=customdata,
-                hovertemplate=(
-                    "<b>%{customdata[1]}</b> %{customdata[2]}<br>"
-                    "r2SCAN-3c: %{x:.2f} kcal/mol<br>"
-                    "Model: %{y:.2f} kcal/mol"
-                    "<extra></extra>"
-                ),
-            )
-        )
-
-    min_val = min(df["ref"].min(), df["rel_energy_kcal"].min())
-    max_val = max(df["ref"].max(), df["rel_energy_kcal"].max())
-    pad = 0.5
-    min_val -= pad
-    max_val += pad
-
-    fig.add_trace(
-        go.Scatter(
-            x=[min_val, max_val],
-            y=[min_val, max_val],
-            mode="lines",
-            showlegend=False,
-            line={"color": "#7f7f7f", "dash": "dash"},
-            hoverinfo="skip",
-        )
-    )
 
-    fig.update_layout(
-        title="Lanthanide isomer relative energies",
-        xaxis_title="r2SCAN-3c Delta E (kcal/mol)",
-        yaxis_title="Model Delta E (kcal/mol)",
-        plot_bgcolor="#ffffff",
-    )
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "isomer_complexes_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    weights=DEFAULT_WEIGHTS,
+)
+def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]:
+    """
+    Collect metrics for lanthanide isomer complexes.
 
-    OUT_PATH.mkdir(parents=True, exist_ok=True)
-    fig.write_json(OUT_PATH / "figure_isomer_complexes.json")
-    _build_table(mae_by_model, models)
+    Parameters
+    ----------
+    isomer_complex_outputs
+        Mean absolute errors for all models.
 
-    return mae_by_model
+    Returns
+    -------
+    dict[str, dict]
+        Metrics keyed by name for all models.
+    """
+    return {"MAE": isomer_complex_outputs}
 
 
-def test_isomer_complexes(isomer_complex_outputs: dict[str, float | None]) -> None:
+def test_isomer_complexes(metrics: dict[str, dict]) -> None:
     """
     Run lanthanide isomer complexes benchmark analysis.
 
     Parameters
     ----------
-    isomer_complex_outputs
-        Mean absolute errors for all models.
+    metrics
+        All lanthanide isomer complex metrics.
     """
     return
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
index eb5bcaea3..35c925825 100644
--- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -2,14 +2,13 @@
 
 from __future__ import annotations
 
-from dash import Dash, Input, Output, callback
-from dash.html import Div, Iframe
+from dash import Dash
+from dash.html import Div
 
 from ml_peg.app import APP_ROOT
 from ml_peg.app.base_app import BaseApp
-from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter
 from ml_peg.app.utils.load import read_plot
-from ml_peg.app.utils.weas import generate_weas_html
 
 BENCHMARK_NAME = "Lanthanide Isomer Complexes"
 DOCS_URL = (
@@ -35,44 +34,25 @@ def register_callbacks(self) -> None:
             column_to_plot={"MAE": scatter},
         )
 
-        @callback(
-            Output(f"{BENCHMARK_NAME}-struct-placeholder", "children"),
-            Input(f"{BENCHMARK_NAME}-figure", "clickData"),
-        )
-        def show_structure(click_data) -> Div:
-            """
-            Render a structure viewer for the clicked point.
-
-            Parameters
-            ----------
-            click_data
-                Plotly click payload from the parity scatter.
-
-            Returns
-            -------
-            Div
-                Viewer iframe or placeholder message.
-            """
-            if not click_data:
-                return Div("Click on a model point to view the structure.")
-
-            point = click_data.get("points", [{}])[0]
-            custom = point.get("customdata") or []
-            if not custom or not custom[0]:
-                return Div("No structure available for this point.")
-
-            struct_path = custom[0]
-            return Div(
-                Iframe(
-                    srcDoc=generate_weas_html(struct_path, "struct", 0),
-                    style={
-                        "height": "550px",
-                        "width": "100%",
-                        "border": "1px solid #ddd",
-                        "borderRadius": "5px",
-                    },
+        struct_root = DATA_PATH / "structures"
+        if struct_root.exists():
+            structs = []
+            for system_dir in sorted(struct_root.glob("*")):
+                if not system_dir.is_dir():
+                    continue
+                for struct_file in sorted(system_dir.glob("*.xyz")):
+                    structs.append(
+                        f"assets/lanthanides/isomer_complexes/structures/"
+                        f"{system_dir.name}/{struct_file.name}"
+                    )
+
+            if structs:
+                struct_from_scatter(
+                    scatter_id=f"{BENCHMARK_NAME}-figure",
+                    struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+                    structs=structs,
+                    mode="struct",
                 )
-            )
 
 
 def get_app() -> IsomerComplexesApp:

From 38bfb18bffe729208cdf7b9a75cd94d42ffb6489 Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:06:41 +0000
Subject: [PATCH 06/14] fix linting

---
 .../analyse_isomer_complexes.py               | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 693feaff4..00dd0b206 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -69,8 +69,8 @@ def _build_reference_df() -> pd.DataFrame:
     return pd.DataFrame.from_records(records)
 
 
-REFERENCE_DF = _build_reference_df().sort_values(["system", "isomer"]).reset_index(
-    drop=True
+REFERENCE_DF = (
+    _build_reference_df().sort_values(["system", "isomer"]).reset_index(drop=True)
 )
 REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]])
 REFERENCE_HOVERDATA = {
@@ -97,19 +97,20 @@ def isomer_relative_energies() -> dict[str, list]:
         Reference and per-model relative energies.
     """
     df = _load_isomer_dataframe()
-    df = df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner") if not df.empty else df
+    df = (
+        df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner")
+        if not df.empty
+        else df
+    )
 
     prediction_table = pd.DataFrame(index=REFERENCE_INDEX)
     if not df.empty:
-        prediction_table = (
-            df.pivot_table(
-                index=["system", "isomer"],
-                columns="model",
-                values="rel_energy_kcal",
-                aggfunc="first",
-            )
-            .reindex(REFERENCE_INDEX)
-        )
+        prediction_table = df.pivot_table(
+            index=["system", "isomer"],
+            columns="model",
+            values="rel_energy_kcal",
+            aggfunc="first",
+        ).reindex(REFERENCE_INDEX)
 
     results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()}
     for model in MODELS:

From cb6d4a31d94b536e05602fd7f7dd32c3e3eefa5a Mon Sep 17 00:00:00 2001
From: Ilyes Batatia <48651863+ilyes319@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:08:41 +0000
Subject: [PATCH 07/14] fix the multiplicity

---
 .../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index b3413b612..32632f5cd 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -142,7 +142,7 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
         atoms = read(entry["xyz"])
         atoms.info["charge"] = entry["charge"]
         atoms.info["spin_multiplicity"] = entry["multiplicity"]
-        atoms.info["spin"] = (entry["multiplicity"] - 1) / 2
+        atoms.info["spin"] = entry["multiplicity"]
         atoms.calc = calc
         energy_ev = float(atoms.get_potential_energy())
         energy_kcal = energy_ev * KCAL_PER_EV

From 230e56d6942b79e73ead270505a4032651c3e1a6 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Fri, 30 Jan 2026 14:57:03 +0000
Subject: [PATCH 08/14] add s3 download and save files as xyz

---
 .../isomer_complexes/calc_isomer_complexes.py | 117 ++++++------------
 1 file changed, 35 insertions(+), 82 deletions(-)

diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 32632f5cd..e4e7d4a62 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -2,37 +2,34 @@
 
 from __future__ import annotations
 
-import os
 from pathlib import Path
 from typing import Any
 
-from ase.io import read
+from ase.io import read, write
 import pytest
+from tqdm import tqdm
 
 from ml_peg.calcs import CALCS_ROOT
+from ml_peg.calcs.utils.utils import download_s3_data
 from ml_peg.models.get_models import load_models
 from ml_peg.models.models import current_models
 
 MODELS = load_models(current_models)
 
 OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
-STRUCT_ENV_VAR = "ML_PEG_LANTHANIDE_STRUCTURES"
 KCAL_PER_EV = 23.060547
 
 
-def _resolve_structure_root() -> Path | None:
-    """
-    Resolve the root directory containing isomer structures.
-
-    Returns
-    -------
-    Path | None
-        Structure root path if found, otherwise ``None``.
-    """
-    env_path = os.environ.get(STRUCT_ENV_VAR)
-    if env_path:
-        return Path(env_path).expanduser()
-    return None
+# r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
+R2SCAN_REF: dict[str, dict[str, float]] = {
+    "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
+    "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+    "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
+    "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
+    "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+    "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
+    "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+}
 
 
 def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
@@ -77,42 +74,6 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
     return entries
 
 
-def _write_model_csv(
-    model_name: str, rows: list[dict[str, Any]], out_dir: Path
-) -> None:
-    """
-    Write a per-model CSV of isomer energies.
-
-    Parameters
-    ----------
-    model_name
-        Model identifier.
-    rows
-        Rows containing per-isomer energies and metadata.
-    out_dir
-        Output directory for the CSV file.
-    """
-    import csv
-
-    out_dir.mkdir(parents=True, exist_ok=True)
-    csv_path = out_dir / "isomer_energies.csv"
-    fieldnames = [
-        "model",
-        "system",
-        "isomer",
-        "energy_ev",
-        "energy_kcal",
-        "rel_energy_kcal",
-        "charge",
-        "multiplicity",
-    ]
-    with csv_path.open("w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=fieldnames)
-        writer.writeheader()
-        for row in rows:
-            writer.writerow({k: row.get(k, "") for k in fieldnames})
-
-
 @pytest.mark.parametrize("mlip", MODELS.items())
 def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
     """
@@ -123,22 +84,24 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
     mlip
         Model name and MLIP calculator wrapper.
     """
-    struct_root = _resolve_structure_root()
-    if struct_root is None or not struct_root.exists():
-        pytest.skip(
-            "No lanthanide structure root found. "
-            "Set ML_PEG_LANTHANIDE_STRUCTURES to the isomer_structures path."
+    # download lanthanide isomer complexes dataset
+    isomer_complexes_dir = (
+        download_s3_data(
+            key="inputs/lanthanides/isomer_complexes/isomer_complexes.zip",
+            filename="isomer_complexes.zip",
         )
+        / "isomer_complexes"
+    )
 
-    entries = _load_isomer_entries(struct_root)
+    entries = _load_isomer_entries(isomer_complexes_dir)
     if not entries:
-        pytest.skip(f"No isomer structures found under {struct_root}.")
+        pytest.skip(f"No isomer structures found under {isomer_complexes_dir}.")
 
     model_name, model = mlip
     calc = model.get_calculator()
 
-    results: list[dict[str, Any]] = []
-    for entry in entries:
+    # results: list[dict[str, Any]] = []
+    for entry in tqdm(entries, desc=f"Calculating energies for {model_name}"):
         atoms = read(entry["xyz"])
         atoms.info["charge"] = entry["charge"]
         atoms.info["spin_multiplicity"] = entry["multiplicity"]
@@ -146,27 +109,17 @@ def test_isomer_complexes(mlip: tuple[str, Any]) -> None:
         atoms.calc = calc
         energy_ev = float(atoms.get_potential_energy())
         energy_kcal = energy_ev * KCAL_PER_EV
-        results.append(
-            {
-                "model": model_name,
-                "system": entry["system"],
-                "isomer": entry["isomer"],
-                "energy_ev": energy_ev,
-                "energy_kcal": energy_kcal,
-                "charge": entry["charge"],
-                "multiplicity": entry["multiplicity"],
-            }
-        )
 
-    results.sort(key=lambda row: (row["model"], row["system"], row["isomer"]))
-    grouped: dict[tuple[str, str], list[dict[str, Any]]] = {}
-    for row in results:
-        key = (row["model"], row["system"])
-        grouped.setdefault(key, []).append(row)
+        atoms.info["model"] = model_name
+        atoms.info["energy_ev"] = energy_ev
+        atoms.info["energy_kcal"] = energy_kcal
+        atoms.info["system"] = entry["system"]
+        atoms.info["isomer"] = entry["isomer"]
 
-    for rows in grouped.values():
-        min_energy = min(row["energy_kcal"] for row in rows)
-        for row in rows:
-            row["rel_energy_kcal"] = row["energy_kcal"] - min_energy
+        atoms.info["ref_energy_kcal"] = R2SCAN_REF.get(entry["system"], {}).get(
+            entry["isomer"]
+        )
 
-    _write_model_csv(model_name, results, OUT_PATH / model_name)
+        write_dir = OUT_PATH / model_name
+        write_dir.mkdir(parents=True, exist_ok=True)
+        write(write_dir / f"{entry['system']}_{entry['isomer']}.xyz", atoms)

From 9946eaae8246dd7a4525655092cf8ca4d8b31991 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Fri, 30 Jan 2026 15:12:14 +0000
Subject: [PATCH 09/14] only calculate energies we have references for

---
 .../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index e4e7d4a62..1658ce1d8 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -50,6 +50,8 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
     for system_dir in sorted(struct_root.glob("*")):
         if not system_dir.is_dir():
             continue
+        if system_dir.name not in R2SCAN_REF:
+            continue
         for iso_dir in sorted(system_dir.glob("iso*")):
             xyz_path = iso_dir / "orca.xyz"
             if not xyz_path.exists():

From 4a8a0bdcff7749444491650bfb602289a7e10809 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Fri, 30 Jan 2026 15:16:24 +0000
Subject: [PATCH 10/14] analysis makeover and add structure visualisaiton to
 app

---
 .../analyse_isomer_complexes.py               | 172 ++++++++++--------
 .../isomer_complexes/app_isomer_complexes.py  |  36 ++--
 2 files changed, 118 insertions(+), 90 deletions(-)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 00dd0b206..73742bdd0 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -4,7 +4,7 @@
 
 from pathlib import Path
 
-import pandas as pd
+from ase.io import read, write
 import pytest
 
 from ml_peg.analysis.utils.decorators import build_table, plot_parity
@@ -24,6 +24,7 @@
 )
 
 # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
+# These are relative energies (relative to lowest energy isomer for each system)
 R2SCAN_REF: dict[str, dict[str, float]] = {
     "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
     "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
@@ -35,48 +36,62 @@
 }
 
 
-def _load_isomer_dataframe() -> pd.DataFrame:
+def get_system_names() -> list[str]:
     """
-    Load isomer energies from per-model outputs.
+    Get sorted list of system names.
 
     Returns
     -------
-    pandas.DataFrame
-        Loaded dataframe, or an empty dataframe if no data are found.
+    list[str]
+        Sorted list of system names from R2SCAN_REF.
     """
-    combined_path = CALC_PATH / "isomer_energies.csv"
-    if combined_path.exists():
-        return pd.read_csv(combined_path)
+    return sorted(R2SCAN_REF.keys())
 
-    csv_paths = sorted(CALC_PATH.glob("*/isomer_energies.csv"))
-    frames = [pd.read_csv(path) for path in csv_paths]
-    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
 
+def get_reference_keys() -> list[tuple[str, str]]:
+    """
+    Get sorted list of (system, isomer) tuples for consistent ordering.
 
-def _build_reference_df() -> pd.DataFrame:
+    Returns
+    -------
+    list[tuple[str, str]]
+        List of (system, isomer) tuples sorted by system then isomer.
     """
-    Build a reference dataframe from the r2SCAN-3c table.
+    system_names = get_system_names()
+    return [
+        (system, isomer)
+        for system in system_names
+        for isomer in sorted(R2SCAN_REF[system].keys())
+    ]
+
+
+def get_reference_values() -> list[float]:
+    """
+    Get reference relative energies in sorted order.
 
     Returns
     -------
-    pandas.DataFrame
-        Dataframe with columns: system, isomer, ref.
+    list[float]
+        Reference relative energies matching the order of get_reference_keys().
     """
-    records = []
-    for system, iso_map in R2SCAN_REF.items():
-        for iso, ref in iso_map.items():
-            records.append({"system": system, "isomer": iso, "ref": ref})
-    return pd.DataFrame.from_records(records)
+    reference_keys = get_reference_keys()
+    return [R2SCAN_REF[system][isomer] for system, isomer in reference_keys]
 
 
-REFERENCE_DF = (
-    _build_reference_df().sort_values(["system", "isomer"]).reset_index(drop=True)
-)
-REFERENCE_INDEX = pd.MultiIndex.from_frame(REFERENCE_DF[["system", "isomer"]])
-REFERENCE_HOVERDATA = {
-    "System": REFERENCE_DF["system"].tolist(),
-    "Isomer": REFERENCE_DF["isomer"].tolist(),
-}
+def build_hoverdata() -> dict[str, list[str]]:
+    """
+    Build hoverdata dictionary for parity plot.
+
+    Returns
+    -------
+    dict[str, list[str]]
+        Dictionary with "System" and "Isomer" keys for hover information.
+    """
+    reference_keys = get_reference_keys()
+    return {
+        "System": [system for system, _ in reference_keys],
+        "Isomer": [isomer for _, isomer in reference_keys],
+    }
 
 
 @pytest.fixture
@@ -85,7 +100,7 @@ def _build_reference_df() -> pd.DataFrame:
     title="Lanthanide isomer relative energies",
     x_label="Model Delta E (kcal/mol)",
     y_label="r2SCAN-3c Delta E (kcal/mol)",
-    hoverdata=REFERENCE_HOVERDATA,
+    hoverdata=build_hoverdata(),
 )
 def isomer_relative_energies() -> dict[str, list]:
     """
@@ -96,65 +111,80 @@ def isomer_relative_energies() -> dict[str, list]:
     dict[str, list]
         Reference and per-model relative energies.
     """
-    df = _load_isomer_dataframe()
-    df = (
-        df.merge(REFERENCE_DF, on=["system", "isomer"], how="inner")
-        if not df.empty
-        else df
-    )
-
-    prediction_table = pd.DataFrame(index=REFERENCE_INDEX)
-    if not df.empty:
-        prediction_table = df.pivot_table(
-            index=["system", "isomer"],
-            columns="model",
-            values="rel_energy_kcal",
-            aggfunc="first",
-        ).reindex(REFERENCE_INDEX)
-
-    results: dict[str, list] = {"ref": REFERENCE_DF["ref"].tolist()}
-    for model in MODELS:
-        if model in prediction_table.columns:
-            series = prediction_table[model]
-            results[model] = series.where(series.notna(), None).tolist()
-        else:
-            results[model] = [None] * len(results["ref"])
+    results = {"ref": get_reference_values()} | {mlip: [] for mlip in MODELS}
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if not model_dir.exists():
+            # Model directory doesn't exist, fill with None
+            results[model_name] = [None] * len(get_reference_keys())
+            continue
+
+        structs_dir = OUT_PATH / model_name
+        structs_dir.mkdir(parents=True, exist_ok=True)
+
+        # Process each system separately to compute relative energies
+        preds: list[float | None] = []
+        for system_name in get_system_names():
+            # Collect all isomers for this system
+            isomer_data: dict[str, tuple[float, object]] = {}
+            for isomer in sorted(R2SCAN_REF[system_name].keys()):
+                xyz_path = model_dir / f"{system_name}_{isomer}.xyz"
+                if xyz_path.exists():
+                    atoms = read(xyz_path)
+                    energy_kcal = atoms.info.get("energy_kcal")
+                    if energy_kcal is not None:
+                        isomer_data[isomer] = (energy_kcal, atoms)
+
+            # Compute relative energies
+            min_energy = min(energy for energy, _ in isomer_data.values())
+
+            # Add predictions in sorted isomer order
+            for isomer in sorted(R2SCAN_REF[system_name].keys()):
+                if isomer in isomer_data:
+                    energy_kcal, atoms = isomer_data[isomer]
+                    rel_energy = energy_kcal - min_energy
+                    preds.append(rel_energy)
+
+                    # Copy structure to app directory
+                    write(structs_dir / f"{system_name}_{isomer}.xyz", atoms)
+                else:
+                    preds.append(None)
+
+        results[model_name] = preds
 
     return results
 
 
 @pytest.fixture
-def isomer_complex_outputs(
-    isomer_relative_energies: dict[str, list],
-) -> dict[str, float | None]:
+def isomer_complex_errors(isomer_relative_energies) -> dict[str, float | None]:
     """
-    Build outputs for lanthanide isomer complexes benchmark.
+    Get mean absolute error for relative energies.
 
     Parameters
     ----------
     isomer_relative_energies
-        Reference and per-model relative energies.
+        Dictionary of reference and predicted relative energies.
 
     Returns
     -------
-    dict[str, float | None]
-        Mean absolute errors by model.
+    dict[str, float]
+        Dictionary of predicted relative energy errors for all models.
     """
-    ref_vals = isomer_relative_energies["ref"]
-    mae_by_model: dict[str, float | None] = {}
-    for model in MODELS:
-        preds = isomer_relative_energies[model]
+    results: dict[str, float | None] = {}
+    for model_name in MODELS:
+        preds = isomer_relative_energies.get(model_name, [])
         pairs = [
             (ref, pred)
-            for ref, pred in zip(ref_vals, preds, strict=True)
+            for ref, pred in zip(isomer_relative_energies["ref"], preds, strict=True)
             if pred is not None
         ]
         if not pairs:
-            mae_by_model[model] = None
+            results[model_name] = None
             continue
-        ref, pred = zip(*pairs, strict=True)
-        mae_by_model[model] = mae(list(ref), list(pred))
-    return mae_by_model
+        ref_vals, pred_vals = zip(*pairs, strict=True)
+        results[model_name] = mae(list(ref_vals), list(pred_vals))
+    return results
 
 
 @pytest.fixture
@@ -164,13 +194,13 @@ def isomer_complex_outputs(
     thresholds=DEFAULT_THRESHOLDS,
     weights=DEFAULT_WEIGHTS,
 )
-def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]:
+def metrics(isomer_complex_errors: dict[str, float | None]) -> dict[str, dict]:
     """
     Collect metrics for lanthanide isomer complexes.
 
     Parameters
     ----------
-    isomer_complex_outputs
+    isomer_complex_errors
         Mean absolute errors for all models.
 
     Returns
@@ -178,7 +208,7 @@ def metrics(isomer_complex_outputs: dict[str, float | None]) -> dict[str, dict]:
     dict[str, dict]
         Metrics keyed by name for all models.
     """
-    return {"MAE": isomer_complex_outputs}
+    return {"MAE": isomer_complex_errors}
 
 
 def test_isomer_complexes(metrics: dict[str, dict]) -> None:
diff --git a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
index 35c925825..b77619ade 100644
--- a/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
+++ b/ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
@@ -9,7 +9,10 @@
 from ml_peg.app.base_app import BaseApp
 from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter
 from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
 
+MODELS = get_model_names(current_models)
 BENCHMARK_NAME = "Lanthanide Isomer Complexes"
 DOCS_URL = (
     "https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html"
@@ -34,25 +37,20 @@ def register_callbacks(self) -> None:
             column_to_plot={"MAE": scatter},
         )
 
-        struct_root = DATA_PATH / "structures"
-        if struct_root.exists():
-            structs = []
-            for system_dir in sorted(struct_root.glob("*")):
-                if not system_dir.is_dir():
-                    continue
-                for struct_file in sorted(system_dir.glob("*.xyz")):
-                    structs.append(
-                        f"assets/lanthanides/isomer_complexes/structures/"
-                        f"{system_dir.name}/{struct_file.name}"
-                    )
-
-            if structs:
-                struct_from_scatter(
-                    scatter_id=f"{BENCHMARK_NAME}-figure",
-                    struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
-                    structs=structs,
-                    mode="struct",
-                )
+        # Use first model's structures for visualization
+        if MODELS:
+            structs_dir = DATA_PATH / MODELS[0]
+            structs = [
+                f"assets/lanthanides/isomer_complexes/{MODELS[0]}/{struct_file.stem}.xyz"
+                for struct_file in sorted(structs_dir.glob("*.xyz"))
+            ]
+
+            struct_from_scatter(
+                scatter_id=f"{BENCHMARK_NAME}-figure",
+                struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+                structs=structs,
+                mode="struct",
+            )
 
 
 def get_app() -> IsomerComplexesApp:

From b9ed690924d0c1048e9e053f7372452ddff373eb Mon Sep 17 00:00:00 2001
From: Joseph Hart <92541539+joehart2001@users.noreply.github.com>
Date: Mon, 2 Feb 2026 19:20:12 +0000
Subject: [PATCH 11/14] Update
 ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py

Co-authored-by: Elliott Kasoar <45317199+ElliottKasoar@users.noreply.github.com>
---
 .../lanthanides/isomer_complexes/calc_isomer_complexes.py      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 1658ce1d8..5a9863e6e 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 from typing import Any
 
+from ase import units
 from ase.io import read, write
 import pytest
 from tqdm import tqdm
@@ -17,7 +18,7 @@
 MODELS = load_models(current_models)
 
 OUT_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
-KCAL_PER_EV = 23.060547
+KCAL_PER_EV = units.mol / units.kcal
 
 
 # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)

From 5deffbcd3cafa80106da5a5abac4ce58e0b096dc Mon Sep 17 00:00:00 2001
From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com>
Date: Tue, 3 Feb 2026 15:07:49 +0000
Subject: [PATCH 12/14] Set integer charges

---
 .../lanthanides/isomer_complexes/calc_isomer_complexes.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 5a9863e6e..a99e93a93 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -59,12 +59,8 @@ def _load_isomer_entries(struct_root: Path) -> list[dict[str, Any]]:
                 continue
             charge_path = iso_dir / ".CHRG"
             uhf_path = iso_dir / ".UHF"
-            charge = (
-                float(charge_path.read_text().strip()) if charge_path.exists() else 0.0
-            )
-            multiplicity = (
-                int(float(uhf_path.read_text().strip())) if uhf_path.exists() else 1
-            )
+            charge = int(charge_path.read_text().strip()) if charge_path.exists() else 0
+            multiplicity = int(uhf_path.read_text().strip()) if uhf_path.exists() else 1
             entries.append(
                 {
                     "system": system_dir.name,

From 3d1186b839f0b425ffa219e4efbe8393d883d6fa Mon Sep 17 00:00:00 2001
From: Joseph Hart <92541539+joehart2001@users.noreply.github.com>
Date: Tue, 3 Feb 2026 16:44:17 +0000
Subject: [PATCH 13/14] Update reference data with two extra lanthanides

---
 .../lanthanides/isomer_complexes/analyse_isomer_complexes.py    | 2 ++
 .../calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 73742bdd0..3c93b02a4 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -28,9 +28,11 @@
 R2SCAN_REF: dict[str, dict[str, float]] = {
     "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
     "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+    "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
     "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
     "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
     "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+    "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
     "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
     "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
 }
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index a99e93a93..32e2a810b 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -25,9 +25,11 @@
 R2SCAN_REF: dict[str, dict[str, float]] = {
     "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
     "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
+    "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
     "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
     "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
     "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
+    "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
     "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
     "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
 }

From dff39d3b6ab289180b7e791df969e18d30a548da Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Wed, 4 Feb 2026 14:20:39 +0000
Subject: [PATCH 14/14] add new ref values from orca files

---
 .../analyse_isomer_complexes.py                | 18 +++++++++---------
 .../isomer_complexes/calc_isomer_complexes.py  | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
index 3c93b02a4..72294fa22 100644
--- a/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
+++ b/ml_peg/analysis/lanthanides/isomer_complexes/analyse_isomer_complexes.py
@@ -26,15 +26,15 @@
 # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
 # These are relative energies (relative to lowest energy isomer for each system)
 R2SCAN_REF: dict[str, dict[str, float]] = {
-    "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
-    "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
-    "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
-    "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
-    "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
-    "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
-    "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
-    "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
-    "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+    "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52},
+    "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67},
+    "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17},
+    "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74},
+    "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11},
+    "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08},
+    "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82},
+    "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0},
+    "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23},
 }
 
 
diff --git a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
index 32e2a810b..691350752 100644
--- a/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
+++ b/ml_peg/calcs/lanthanides/isomer_complexes/calc_isomer_complexes.py
@@ -23,15 +23,15 @@
 
 # r2SCAN-3c references (kcal/mol) from Table S4 (lanthanides only)
 R2SCAN_REF: dict[str, dict[str, float]] = {
-    "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.00, "iso4": 2.08},
-    "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.00, "iso4": 2.17},
-    "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.00, "iso4": 1.23},
-    "Ce_1d271a": {"iso1": 0.00, "iso2": 2.20},
-    "Sm_ed79e8": {"iso1": 2.99, "iso2": 0.00},
-    "La_f1a50d": {"iso1": 0.00, "iso2": 3.11},
-    "Ac_f1a50d": {"iso1": 0.00, "iso2": 3.52},
-    "Eu_ff6372": {"iso1": 0.00, "iso2": 6.74},
-    "Nd_c5f44a": {"iso1": 0.00, "iso2": 1.61},
+    "Ac_f1a50d": {"iso1": 0.02, "iso2": 0.0, "iso3": 3.52},
+    "Ce_1d271a": {"iso1": 0.0, "iso2": 2.2, "iso3": 1.67},
+    "Ce_ff6372": {"iso1": 2.47, "iso2": 7.13, "iso3": 0.0, "iso4": 2.17},
+    "Eu_ff6372": {"iso1": 0.0, "iso2": 6.74},
+    "La_f1a50d": {"iso1": 0.23, "iso2": 0.0, "iso3": 3.11},
+    "Lu_ff6372": {"iso1": 2.15, "iso2": 12.96, "iso3": 0.0, "iso4": 2.08},
+    "Nd_c5f44a": {"iso1": 0.0, "iso2": 1.61, "iso3": 0.82},
+    "Sm_ed79e8": {"iso1": 2.99, "iso2": 8.97, "iso3": 0.0},
+    "Th_ff6372": {"iso1": 2.13, "iso2": 8.03, "iso3": 0.0, "iso4": 1.23},
 }