From 521298f7d61b5499917ba7ca6ab53ee7a3d84415 Mon Sep 17 00:00:00 2001
From: Charlotte Avery <charlotte.avery@centrefornetzero.org>
Date: Tue, 16 Dec 2025 12:10:26 +0000
Subject: [PATCH 1/5] Split up split & preprocess steps

Signed-off-by: Charlotte Avery <charlotte.avery@centrefornetzero.org>
---
 app/app.py                                    | 15 +++++
 .../datasets/low_carbon_london/get_data.py    | 66 +++++++++++--------
 2 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/app/app.py b/app/app.py
index e225e9e..6d5ff72 100644
--- a/app/app.py
+++ b/app/app.py
@@ -32,6 +32,19 @@ def download_lcl_data(
 
 @app.command()
 def preprocess_data(
+    split: Annotated[
+        bool,
+        typer.Option(
+            "--split", help="Splits LCL households into training/holdout set"
+        ),
+    ] = False,
+    preprocess: Annotated[
+        bool,
+        typer.Option(
+            "--preprocess",
+            help="Preprocesses LCL data into daily load profiles",
+        ),
+    ] = False,
     data_dir: Annotated[
         str, typer.Option("--loc", help="Location of data directory.")
     ] = "./data",
@@ -145,6 +158,8 @@ def preprocess_data(
     """
 
     get_data.split_preprocess_data(
+        split,
+        preprocess,
         data_dir,
         csv_data_path,
         sample_fraction,
diff --git a/src/opensynth/datasets/low_carbon_london/get_data.py b/src/opensynth/datasets/low_carbon_london/get_data.py
index 3bc0f5b..192899b 100644
--- a/src/opensynth/datasets/low_carbon_london/get_data.py
+++ b/src/opensynth/datasets/low_carbon_london/get_data.py
@@ -34,6 +34,8 @@ def download_lcl_data(data_dir: str = "./data"):
 
 
 def split_preprocess_data(
+    split: bool,
+    preprocess: bool,
     data_dir: str,
     csv_data_path: str,
     sample_fraction: float,
@@ -93,38 +95,46 @@ def split_preprocess_data(
         f"Reading data from {CSV_FILE_NAME}. Storing data in {data_dir}."
     )
 
-    # Split dataset into training/ holdout sets
-    split_households.split_data(
-        data_dir,
-        CSV_FILE_NAME,
-        sample_fraction=sample_fraction,
-        id_col=id_col,
-        kwh_col=kwh_col,
-        datetime_col=datetime_col,
-        utc=utc,
-        datetime_format=datetime_format,
-        historical_start=historical_start,
-        historical_end=historical_end,
-        future_start=future_start,
-        future_end=future_end,
-    )
-    # Preprocess the data into daily load profiles
-    preprocess_lcl.preprocess_data(
-        data_dir,
-        datetime_col=datetime_col,
-        kwh_col=kwh_col,
-        id_col=id_col,
-        utc=utc,
-        datetime_format=datetime_format,
-        time_resolution=time_resolution,
-        feature_cols=feature_cols,
-        drop_nulls=drop_nulls,
-    )
+    if split:
+        # Split dataset into training/ holdout sets
+        split_households.split_data(
+            data_dir,
+            CSV_FILE_NAME,
+            sample_fraction=sample_fraction,
+            id_col=id_col,
+            kwh_col=kwh_col,
+            datetime_col=datetime_col,
+            utc=utc,
+            datetime_format=datetime_format,
+            historical_start=historical_start,
+            historical_end=historical_end,
+            future_start=future_start,
+            future_end=future_end,
+        )
+    if preprocess:
+        # Preprocess the data into daily load profiles
+        preprocess_lcl.preprocess_data(
+            data_dir,
+            datetime_col=datetime_col,
+            kwh_col=kwh_col,
+            id_col=id_col,
+            utc=utc,
+            datetime_format=datetime_format,
+            time_resolution=time_resolution,
+            feature_cols=feature_cols,
+            drop_nulls=drop_nulls,
+        )
 
 
 if __name__ == "__main__":
+    # Whether to split and/or preprocess the data
+    split = True
+    preprocess = True
+
+    # Data directory
     data_dir = "./data"
 
+    # Fraction of households to include in training set
     sample_fraction = 0.75
 
     # Dataset location
@@ -147,6 +157,8 @@ def split_preprocess_data(
     drop_nulls = True
 
     split_preprocess_data(
+        split,
+        preprocess,
         data_dir,
         csv_data_path,
         sample_fraction,

From b8313c4246af46b1d36bbdc0dd97fc6417c87750 Mon Sep 17 00:00:00 2001
From: Charlotte Avery <charlotte.avery@centrefornetzero.org>
Date: Wed, 17 Dec 2025 09:44:22 +0000
Subject: [PATCH 2/5] Reduce line length to 79 characters

---
 .../datasets/low_carbon_london/load.py        |  5 +-
 src/opensynth/utils/polars.py                 | 54 ++++++++++---------
 .../fidelity/test_autocorrelation.py          |  5 +-
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py
index c948209..2d91e0e 100644
--- a/src/opensynth/datasets/low_carbon_london/load.py
+++ b/src/opensynth/datasets/low_carbon_london/load.py
@@ -15,7 +15,8 @@ def load_lcl_data_by_year(
 ) -> pd.DataFrame | pl.DataFrame:
     """Load LCL data for a specific year.
 
-    Returns a DataFrame in wide format. The first column contains the timestamp.
+    Returns a DataFrame in wide format. The first column contains the
+    timestamp.
 
     Args:
         fname (str or Path): Location of the `train.csv` data file.
@@ -25,7 +26,7 @@ def load_lcl_data_by_year(
         pl.DataFrame with KWH/hh measurements.
     """
     fname = (
-        Path(__file__).parents[0] / "../../../../data/raw/historical/train.csv"
+        Path(__file__).parents[0] / "./data/raw/historical/train.csv"
         if fname is None
         else Path(fname)
     )
diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py
index fffa9b5..2296aca 100644
--- a/src/opensynth/utils/polars.py
+++ b/src/opensynth/utils/polars.py
@@ -17,9 +17,9 @@ def infer_date_column(df: pl.DataFrame) -> str:
 
     Returns the column name of a column in Date format, or a String column that
     matches a Date string. If the DataFrame contains only one matching column,
-    this function will return that column name. If multiple columns match, it will
-    return the column name that matches a canonical Date name, such as "DATUM".
-    In all other cases the function will raise a ValueError().
+    this function will return that column name. If multiple columns match, it
+    will return the column name that matches a canonical Date name, such as
+    "DATUM". In all other cases the function will raise a ValueError().
 
     Args:
         df (pl.DataFrame): DataFrame.
@@ -28,8 +28,8 @@ def infer_date_column(df: pl.DataFrame) -> str:
         str: column name of a column in Date or Date-like format.
 
     Raises:
-        ValueError: if no columns are in a Date-like format or multiple columns are
-        in Date-like format and match a canonical name.
+        ValueError: if no columns are in a Date-like format or multiple columns
+            are in Date-like format and match a canonical name.
 
     """
     date_columns = df.select(pl.col(pl.Date)).columns
@@ -49,7 +49,8 @@ def infer_date_column(df: pl.DataFrame) -> str:
             return list(canonical_columns)[0]
         case _:
             raise ValueError(
-                "Multiple Date-like columns found with a matching canonical name!"
+                "Multiple Date-like columns found with a matching canonical \
+                name!"
             )
 
 
@@ -68,20 +69,21 @@ def semiwide_to_long(
     default in "%HH%mm" format.
 
     Args:
-        df (polars.DataFrame): DataFrame in semi-wide wide format, containing DateTime-
-            compatible column names.
-        on (list, optional): Columns to use as timepoints. By default, all columns that
-            match the pattern '[0-9][0-9][0-9][0-9]' will be used.
-        date_col (str, optional): Column that contains the Date values. By default,
-            a column that is in Date format, or that is a Date-compatible string, will
-            be used, if there is only one column in that format. If there are multiple
-            Date-compatible, columns, but only one matches a canonical name such as
-            DATUM, that column will be used. Otherwise, this method will fail, and the
-            date_col needs to be explicitly specified.
+        df (polars.DataFrame): DataFrame in semi-wide wide format, containing
+            DateTime-compatible column names.
+        on (list, optional): Columns to use as timepoints. By default, all
+            columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used.
+        date_col (str, optional): Column that contains the Date values. By
+            default, a column that is in Date format, or that is a
+            Date-compatible string, will be used, if there is only one column
+            in that format. If there are multiple Date-compatible columns, but
+            only one matches a canonical name such as DATUM, that column will
+            be used. Otherwise, this method will fail, and the date_col needs
+            to be explicitly specified.
         datetime_name (str, optional): Name for the DateTime column in the long
             DataFrame, "DATUM_TIJD" by default.
-        value_name (str, optional): Name to give to the value column. Defaults to
-            "value".
+        value_name (str, optional): Name to give to the value column. Defaults
+            to "value".
 
     Returns:
         polars.DataFrame in long format.
@@ -137,12 +139,13 @@ def semiwide_to_wide(
             DateTime-compatible column names.
         on (list, optional): Columns to use as timepoints. By default, all
             columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used.
-        date_col (str, optional): Column that contains the Date values. By default,
-            a column that is in Date format, or that is a Date-compatible string,
-            will be used, if there is only one column in that format. If there are
-            multiple Date-compatible, columns, but only one matches a canonical name
-            such as DATUM, that column will be used. Otherwise, this method will fail,
-            and the date_col needs to be explicitly specified.
+        date_col (str, optional): Column that contains the Date values. By
+            default, a column that is in Date format, or that is a
+            Date-compatible string, will be used, if there is only one column
+            in that format. If there are multiple Date-compatible columns, but
+            only one matches a canonical name such as DATUM, that column will
+            be used. Otherwise, this method will fail, and the date_col needs
+            to be explicitly specified.
         datetime_name (str, optional): Name for the DateTime column in the long
             DataFrame, "datetime" by default.
 
@@ -178,7 +181,8 @@ def randomize_index_column(
     Args:
         df (DataFrame): Input DataFrame.
         index_col_name (str): Name of index column.
-        sample_col_name (str): Name of new column containing the randomized index.
+        sample_col_name (str): Name of new column containing the randomized
+            index.
 
     Returns:
         DataFrame with index column values randomized.
diff --git a/tests/evaluation/fidelity/test_autocorrelation.py b/tests/evaluation/fidelity/test_autocorrelation.py
index 27b3ca0..d7ab203 100644
--- a/tests/evaluation/fidelity/test_autocorrelation.py
+++ b/tests/evaluation/fidelity/test_autocorrelation.py
@@ -47,7 +47,10 @@ def test_dataframe_half_hour_pandas(test_dataframe_half_hour):
 
 @pytest.fixture(scope="module")
 def test_dataframe_quarterly():
-    """ "DataFrame with 15-minute timesteps and high correlation with a week time-lag."""
+    """
+    DataFrame with 15-minute timesteps and high correlation with a week
+    time-lag.
+    """
     n_minutes = 15
     n_values = 60 // n_minutes * 24 * 7  # 1 week
     n_timesteps = 35041

From 127b8da14139cde07028c6260f6f3cd69349d41e Mon Sep 17 00:00:00 2001
From: Charlotte Avery <charlotte.avery@centrefornetzero.org>
Date: Wed, 17 Dec 2025 10:54:04 +0000
Subject: [PATCH 3/5] Add licenses

---
 src/opensynth/datasets/low_carbon_london/load.py              | 3 +++
 src/opensynth/evaluation/fidelity/__init__.py                 | 2 ++
 src/opensynth/evaluation/fidelity/autocorrelation.py          | 3 +++
 src/opensynth/evaluation/fidelity/seasonal_statistics.py      | 3 +++
 src/opensynth/models/faraday/stitched_model/stitch.py         | 3 +++
 src/opensynth/models/faraday/stitched_model/stitched_model.py | 3 +++
 src/opensynth/models/faraday/stitched_model/utils.py          | 3 +++
 src/opensynth/utils/__init__.py                               | 2 ++
 src/opensynth/utils/polars.py                                 | 3 +++
 9 files changed, 25 insertions(+)

diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py
index 2d91e0e..6464ffe 100644
--- a/src/opensynth/datasets/low_carbon_london/load.py
+++ b/src/opensynth/datasets/low_carbon_london/load.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 import logging
 from pathlib import Path
 from typing import Literal
diff --git a/src/opensynth/evaluation/fidelity/__init__.py b/src/opensynth/evaluation/fidelity/__init__.py
index e69de29..a19fd9c 100644
--- a/src/opensynth/evaluation/fidelity/__init__.py
+++ b/src/opensynth/evaluation/fidelity/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/src/opensynth/evaluation/fidelity/autocorrelation.py b/src/opensynth/evaluation/fidelity/autocorrelation.py
index c15da90..6260e96 100644
--- a/src/opensynth/evaluation/fidelity/autocorrelation.py
+++ b/src/opensynth/evaluation/fidelity/autocorrelation.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 """Auto-correlation metrics for fidelity evaluation.
 
 Note regarding suitability of this metric: Faraday generates independent daily
diff --git a/src/opensynth/evaluation/fidelity/seasonal_statistics.py b/src/opensynth/evaluation/fidelity/seasonal_statistics.py
index 3ec0aff..b2c21c0 100644
--- a/src/opensynth/evaluation/fidelity/seasonal_statistics.py
+++ b/src/opensynth/evaluation/fidelity/seasonal_statistics.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 """Seasonal statistics metrics for fidelity evaluation.
 
 Note regarding suitability of this metric: Faraday generates independent daily
diff --git a/src/opensynth/models/faraday/stitched_model/stitch.py b/src/opensynth/models/faraday/stitched_model/stitch.py
index c2201c9..b0263f3 100644
--- a/src/opensynth/models/faraday/stitched_model/stitch.py
+++ b/src/opensynth/models/faraday/stitched_model/stitch.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 import polars as pl
 
 
diff --git a/src/opensynth/models/faraday/stitched_model/stitched_model.py b/src/opensynth/models/faraday/stitched_model/stitched_model.py
index f0e67f6..b50f10e 100644
--- a/src/opensynth/models/faraday/stitched_model/stitched_model.py
+++ b/src/opensynth/models/faraday/stitched_model/stitched_model.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 import logging
 from collections.abc import Generator
 from datetime import date
diff --git a/src/opensynth/models/faraday/stitched_model/utils.py b/src/opensynth/models/faraday/stitched_model/utils.py
index 9566a8e..7e69188 100644
--- a/src/opensynth/models/faraday/stitched_model/utils.py
+++ b/src/opensynth/models/faraday/stitched_model/utils.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 from calendar import monthrange
 from datetime import date
 
diff --git a/src/opensynth/utils/__init__.py b/src/opensynth/utils/__init__.py
index e69de29..a19fd9c 100644
--- a/src/opensynth/utils/__init__.py
+++ b/src/opensynth/utils/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py
index 2296aca..3f67331 100644
--- a/src/opensynth/utils/polars.py
+++ b/src/opensynth/utils/polars.py
@@ -1,3 +1,6 @@
+# Copyright Contributors to the Opensynth-energy Project.
+# SPDX-License-Identifier: Apache-2.0
+
 import logging
 import random
 from typing import Optional

From 404f7e2081e9c5a39728fbc334f0e3e48181a53d Mon Sep 17 00:00:00 2001
From: Charlotte Avery <charlotte.avery@centrefornetzero.org>
Date: Wed, 17 Dec 2025 11:12:48 +0000
Subject: [PATCH 4/5] Revert "Add licenses"

This reverts commit 127b8da14139cde07028c6260f6f3cd69349d41e.
---
 src/opensynth/datasets/low_carbon_london/load.py              | 3 ---
 src/opensynth/evaluation/fidelity/__init__.py                 | 2 --
 src/opensynth/evaluation/fidelity/autocorrelation.py          | 3 ---
 src/opensynth/evaluation/fidelity/seasonal_statistics.py      | 3 ---
 src/opensynth/models/faraday/stitched_model/stitch.py         | 3 ---
 src/opensynth/models/faraday/stitched_model/stitched_model.py | 3 ---
 src/opensynth/models/faraday/stitched_model/utils.py          | 3 ---
 src/opensynth/utils/__init__.py                               | 2 --
 src/opensynth/utils/polars.py                                 | 3 ---
 9 files changed, 25 deletions(-)

diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py
index 6464ffe..2d91e0e 100644
--- a/src/opensynth/datasets/low_carbon_london/load.py
+++ b/src/opensynth/datasets/low_carbon_london/load.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 import logging
 from pathlib import Path
 from typing import Literal
diff --git a/src/opensynth/evaluation/fidelity/__init__.py b/src/opensynth/evaluation/fidelity/__init__.py
index a19fd9c..e69de29 100644
--- a/src/opensynth/evaluation/fidelity/__init__.py
+++ b/src/opensynth/evaluation/fidelity/__init__.py
@@ -1,2 +0,0 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
diff --git a/src/opensynth/evaluation/fidelity/autocorrelation.py b/src/opensynth/evaluation/fidelity/autocorrelation.py
index 6260e96..c15da90 100644
--- a/src/opensynth/evaluation/fidelity/autocorrelation.py
+++ b/src/opensynth/evaluation/fidelity/autocorrelation.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 """Auto-correlation metrics for fidelity evaluation.
 
 Note regarding suitability of this metric: Faraday generates independent daily
diff --git a/src/opensynth/evaluation/fidelity/seasonal_statistics.py b/src/opensynth/evaluation/fidelity/seasonal_statistics.py
index b2c21c0..3ec0aff 100644
--- a/src/opensynth/evaluation/fidelity/seasonal_statistics.py
+++ b/src/opensynth/evaluation/fidelity/seasonal_statistics.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 """Seasonal statistics metrics for fidelity evaluation.
 
 Note regarding suitability of this metric: Faraday generates independent daily
diff --git a/src/opensynth/models/faraday/stitched_model/stitch.py b/src/opensynth/models/faraday/stitched_model/stitch.py
index b0263f3..c2201c9 100644
--- a/src/opensynth/models/faraday/stitched_model/stitch.py
+++ b/src/opensynth/models/faraday/stitched_model/stitch.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 import polars as pl
 
 
diff --git a/src/opensynth/models/faraday/stitched_model/stitched_model.py b/src/opensynth/models/faraday/stitched_model/stitched_model.py
index b50f10e..f0e67f6 100644
--- a/src/opensynth/models/faraday/stitched_model/stitched_model.py
+++ b/src/opensynth/models/faraday/stitched_model/stitched_model.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 import logging
 from collections.abc import Generator
 from datetime import date
diff --git a/src/opensynth/models/faraday/stitched_model/utils.py b/src/opensynth/models/faraday/stitched_model/utils.py
index 7e69188..9566a8e 100644
--- a/src/opensynth/models/faraday/stitched_model/utils.py
+++ b/src/opensynth/models/faraday/stitched_model/utils.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 from calendar import monthrange
 from datetime import date
 
diff --git a/src/opensynth/utils/__init__.py b/src/opensynth/utils/__init__.py
index a19fd9c..e69de29 100644
--- a/src/opensynth/utils/__init__.py
+++ b/src/opensynth/utils/__init__.py
@@ -1,2 +0,0 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py
index 3f67331..2296aca 100644
--- a/src/opensynth/utils/polars.py
+++ b/src/opensynth/utils/polars.py
@@ -1,6 +1,3 @@
-# Copyright Contributors to the Opensynth-energy Project.
-# SPDX-License-Identifier: Apache-2.0
-
 import logging
 import random
 from typing import Optional

From cf0bf37ff3e94b8bc0c8028b4ce5469122c00c06 Mon Sep 17 00:00:00 2001
From: Charlotte Avery <charlotte.avery@centrefornetzero.org>
Date: Wed, 17 Dec 2025 09:44:22 +0000
Subject: [PATCH 5/5] Reduce line length to 79 characters

This reverts commit 127b8da14139cde07028c6260f6f3cd69349d41e.
---
 .../datasets/low_carbon_london/load.py        |  5 +-
 src/opensynth/utils/polars.py                 | 54 ++++++++++---------
 .../fidelity/test_autocorrelation.py          |  5 +-
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py
index c948209..2d91e0e 100644
--- a/src/opensynth/datasets/low_carbon_london/load.py
+++ b/src/opensynth/datasets/low_carbon_london/load.py
@@ -15,7 +15,8 @@ def load_lcl_data_by_year(
 ) -> pd.DataFrame | pl.DataFrame:
     """Load LCL data for a specific year.
 
-    Returns a DataFrame in wide format. The first column contains the timestamp.
+    Returns a DataFrame in wide format. The first column contains the
+    timestamp.
 
     Args:
         fname (str or Path): Location of the `train.csv` data file.
@@ -25,7 +26,7 @@ def load_lcl_data_by_year(
         pl.DataFrame with KWH/hh measurements.
     """
     fname = (
-        Path(__file__).parents[0] / "../../../../data/raw/historical/train.csv"
+        Path(__file__).parents[0] / "./data/raw/historical/train.csv"
         if fname is None
         else Path(fname)
     )
diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py
index fffa9b5..2296aca 100644
--- a/src/opensynth/utils/polars.py
+++ b/src/opensynth/utils/polars.py
@@ -17,9 +17,9 @@ def infer_date_column(df: pl.DataFrame) -> str:
 
     Returns the column name of a column in Date format, or a String column that
     matches a Date string. If the DataFrame contains only one matching column,
-    this function will return that column name. If multiple columns match, it will
-    return the column name that matches a canonical Date name, such as "DATUM".
-    In all other cases the function will raise a ValueError().
+    this function will return that column name. If multiple columns match, it
+    will return the column name that matches a canonical Date name, such as
+    "DATUM". In all other cases the function will raise a ValueError().
 
     Args:
         df (pl.DataFrame): DataFrame.
@@ -28,8 +28,8 @@ def infer_date_column(df: pl.DataFrame) -> str:
         str: column name of a column in Date or Date-like format.
 
     Raises:
-        ValueError: if no columns are in a Date-like format or multiple columns are
-        in Date-like format and match a canonical name.
+        ValueError: if no columns are in a Date-like format or multiple columns
+            are in Date-like format and match a canonical name.
 
     """
     date_columns = df.select(pl.col(pl.Date)).columns
@@ -49,7 +49,8 @@ def infer_date_column(df: pl.DataFrame) -> str:
             return list(canonical_columns)[0]
         case _:
             raise ValueError(
-                "Multiple Date-like columns found with a matching canonical name!"
+                "Multiple Date-like columns found with a matching canonical \
+                name!"
             )
 
 
@@ -68,20 +69,21 @@ def semiwide_to_long(
     default in "%HH%mm" format.
 
     Args:
-        df (polars.DataFrame): DataFrame in semi-wide wide format, containing DateTime-
-            compatible column names.
-        on (list, optional): Columns to use as timepoints. By default, all columns that
-            match the pattern '[0-9][0-9][0-9][0-9]' will be used.
-        date_col (str, optional): Column that contains the Date values. By default,
-            a column that is in Date format, or that is a Date-compatible string, will
-            be used, if there is only one column in that format. If there are multiple
-            Date-compatible, columns, but only one matches a canonical name such as
-            DATUM, that column will be used. Otherwise, this method will fail, and the
-            date_col needs to be explicitly specified.
+        df (polars.DataFrame): DataFrame in semi-wide wide format, containing
+            DateTime-compatible column names.
+        on (list, optional): Columns to use as timepoints. By default, all
+            columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used.
+        date_col (str, optional): Column that contains the Date values. By
+            default, a column that is in Date format, or that is a
+            Date-compatible string, will be used, if there is only one column
+            in that format. If there are multiple Date-compatible columns, but
+            only one matches a canonical name such as DATUM, that column will
+            be used. Otherwise, this method will fail, and the date_col needs
+            to be explicitly specified.
         datetime_name (str, optional): Name for the DateTime column in the long
             DataFrame, "DATUM_TIJD" by default.
-        value_name (str, optional): Name to give to the value column. Defaults to
-            "value".
+        value_name (str, optional): Name to give to the value column. Defaults
+            to "value".
 
     Returns:
         polars.DataFrame in long format.
@@ -137,12 +139,13 @@ def semiwide_to_wide(
             DateTime-compatible column names.
         on (list, optional): Columns to use as timepoints. By default, all
             columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used.
-        date_col (str, optional): Column that contains the Date values. By default,
-            a column that is in Date format, or that is a Date-compatible string,
-            will be used, if there is only one column in that format. If there are
-            multiple Date-compatible, columns, but only one matches a canonical name
-            such as DATUM, that column will be used. Otherwise, this method will fail,
-            and the date_col needs to be explicitly specified.
+        date_col (str, optional): Column that contains the Date values. By
+            default, a column that is in Date format, or that is a
+            Date-compatible string, will be used, if there is only one column
+            in that format. If there are multiple Date-compatible columns, but
+            only one matches a canonical name such as DATUM, that column will
+            be used. Otherwise, this method will fail, and the date_col needs
+            to be explicitly specified.
         datetime_name (str, optional): Name for the DateTime column in the long
             DataFrame, "datetime" by default.
 
@@ -178,7 +181,8 @@ def randomize_index_column(
     Args:
         df (DataFrame): Input DataFrame.
         index_col_name (str): Name of index column.
-        sample_col_name (str): Name of new column containing the randomized index.
+        sample_col_name (str): Name of new column containing the randomized
+            index.
 
     Returns:
         DataFrame with index column values randomized.
diff --git a/tests/evaluation/fidelity/test_autocorrelation.py b/tests/evaluation/fidelity/test_autocorrelation.py
index 27b3ca0..d7ab203 100644
--- a/tests/evaluation/fidelity/test_autocorrelation.py
+++ b/tests/evaluation/fidelity/test_autocorrelation.py
@@ -47,7 +47,10 @@ def test_dataframe_half_hour_pandas(test_dataframe_half_hour):
 
 @pytest.fixture(scope="module")
 def test_dataframe_quarterly():
-    """ "DataFrame with 15-minute timesteps and high correlation with a week time-lag."""
+    """
+    DataFrame with 15-minute timesteps and high correlation with a week
+    time-lag.
+    """
     n_minutes = 15
     n_values = 60 // n_minutes * 24 * 7  # 1 week
     n_timesteps = 35041