From 521298f7d61b5499917ba7ca6ab53ee7a3d84415 Mon Sep 17 00:00:00 2001 From: Charlotte Avery Date: Tue, 16 Dec 2025 12:10:26 +0000 Subject: [PATCH 1/5] Split up split & preprocess steps Signed-off-by: Charlotte Avery --- app/app.py | 15 +++++ .../datasets/low_carbon_london/get_data.py | 66 +++++++++++-------- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/app/app.py b/app/app.py index e225e9e..6d5ff72 100644 --- a/app/app.py +++ b/app/app.py @@ -32,6 +32,19 @@ def download_lcl_data( @app.command() def preprocess_data( + split: Annotated[ + bool, + typer.Option( + "--split", help="Splits LCL households into training/holdout set" + ), + ] = False, + preprocess: Annotated[ + bool, + typer.Option( + "--preprocess", + help="Preprocesses LCL data into daily load profiles", + ), + ] = False, data_dir: Annotated[ str, typer.Option("--loc", help="Location of data directory.") ] = "./data", @@ -145,6 +158,8 @@ def preprocess_data( """ get_data.split_preprocess_data( + split, + preprocess, data_dir, csv_data_path, sample_fraction, diff --git a/src/opensynth/datasets/low_carbon_london/get_data.py b/src/opensynth/datasets/low_carbon_london/get_data.py index 3bc0f5b..192899b 100644 --- a/src/opensynth/datasets/low_carbon_london/get_data.py +++ b/src/opensynth/datasets/low_carbon_london/get_data.py @@ -34,6 +34,8 @@ def download_lcl_data(data_dir: str = "./data"): def split_preprocess_data( + split: bool, + preprocess: bool, data_dir: str, csv_data_path: str, sample_fraction: float, @@ -93,38 +95,46 @@ def split_preprocess_data( f"Reading data from {CSV_FILE_NAME}. Storing data in {data_dir}." ) - # Split dataset into training/ holdout sets - split_households.split_data( - data_dir, - CSV_FILE_NAME, - sample_fraction=sample_fraction, - id_col=id_col, - kwh_col=kwh_col, - datetime_col=datetime_col, - utc=utc, - datetime_format=datetime_format, - historical_start=historical_start, - historical_end=historical_end, - future_start=future_start, - future_end=future_end, - ) - # Preprocess the data into daily load profiles - preprocess_lcl.preprocess_data( - data_dir, - datetime_col=datetime_col, - kwh_col=kwh_col, - id_col=id_col, - utc=utc, - datetime_format=datetime_format, - time_resolution=time_resolution, - feature_cols=feature_cols, - drop_nulls=drop_nulls, - ) + if split: + # Split dataset into training/ holdout sets + split_households.split_data( + data_dir, + CSV_FILE_NAME, + sample_fraction=sample_fraction, + id_col=id_col, + kwh_col=kwh_col, + datetime_col=datetime_col, + utc=utc, + datetime_format=datetime_format, + historical_start=historical_start, + historical_end=historical_end, + future_start=future_start, + future_end=future_end, + ) + if preprocess: + # Preprocess the data into daily load profiles + preprocess_lcl.preprocess_data( + data_dir, + datetime_col=datetime_col, + kwh_col=kwh_col, + id_col=id_col, + utc=utc, + datetime_format=datetime_format, + time_resolution=time_resolution, + feature_cols=feature_cols, + drop_nulls=drop_nulls, + ) if __name__ == "__main__": + # Whether to split and/or preprocess the data + split = True + preprocess = True + + # Data directory data_dir = "./data" + # Fraction of households to include in training set sample_fraction = 0.75 # Dataset location @@ -147,6 +157,8 @@ def split_preprocess_data( drop_nulls = True split_preprocess_data( + split, + preprocess, data_dir, csv_data_path, sample_fraction, From b8313c4246af46b1d36bbdc0dd97fc6417c87750 Mon Sep 17 00:00:00 2001 From: Charlotte Avery Date: Wed, 17 Dec 2025 09:44:22 +0000 Subject: [PATCH 2/5] Reduce line length to 79 characters --- .../datasets/low_carbon_london/load.py | 5 +- src/opensynth/utils/polars.py | 54 ++++++++++--------- .../fidelity/test_autocorrelation.py | 5 +- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py index c948209..2d91e0e 100644 --- a/src/opensynth/datasets/low_carbon_london/load.py +++ b/src/opensynth/datasets/low_carbon_london/load.py @@ -15,7 +15,8 @@ def load_lcl_data_by_year( ) -> pd.DataFrame | pl.DataFrame: """Load LCL data for a specific year. - Returns a DataFrame in wide format. The first column contains the timestamp. + Returns a DataFrame in wide format. The first column contains the + timestamp. Args: fname (str or Path): Location of the `train.csv` data file. @@ -25,7 +26,7 @@ def load_lcl_data_by_year( pl.DataFrame with KWH/hh measurements. """ fname = ( - Path(__file__).parents[0] / "../../../../data/raw/historical/train.csv" + Path(__file__).parents[0] / "./data/raw/historical/train.csv" if fname is None else Path(fname) ) diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py index fffa9b5..2296aca 100644 --- a/src/opensynth/utils/polars.py +++ b/src/opensynth/utils/polars.py @@ -17,9 +17,9 @@ def infer_date_column(df: pl.DataFrame) -> str: Returns the column name of a column in Date format, or a String column that matches a Date string. If the DataFrame contains only one matching column, - this function will return that column name. If multiple columns match, it will - return the column name that matches a canonical Date name, such as "DATUM". - In all other cases the function will raise a ValueError(). + this function will return that column name. If multiple columns match, it + will return the column name that matches a canonical Date name, such as + "DATUM". In all other cases the function will raise a ValueError(). Args: df (pl.DataFrame): DataFrame. @@ -28,8 +28,8 @@ def infer_date_column(df: pl.DataFrame) -> str: str: column name of a column in Date or Date-like format. Raises: - ValueError: if no columns are in a Date-like format or multiple columns are - in Date-like format and match a canonical name. + ValueError: if no columns are in a Date-like format or multiple columns + are in Date-like format and match a canonical name. """ date_columns = df.select(pl.col(pl.Date)).columns @@ -49,7 +49,8 @@ def infer_date_column(df: pl.DataFrame) -> str: return list(canonical_columns)[0] case _: raise ValueError( - "Multiple Date-like columns found with a matching canonical name!" + "Multiple Date-like columns found with a matching canonical \ + name!" ) @@ -68,20 +69,21 @@ def semiwide_to_long( default in "%HH%mm" format. Args: - df (polars.DataFrame): DataFrame in semi-wide wide format, containing DateTime- - compatible column names. - on (list, optional): Columns to use as timepoints. By default, all columns that - match the pattern '[0-9][0-9][0-9][0-9]' will be used. - date_col (str, optional): Column that contains the Date values. By default, - a column that is in Date format, or that is a Date-compatible string, will - be used, if there is only one column in that format. If there are multiple - Date-compatible, columns, but only one matches a canonical name such as - DATUM, that column will be used. Otherwise, this method will fail, and the - date_col needs to be explicitly specified. + df (polars.DataFrame): DataFrame in semi-wide wide format, containing + DateTime-compatible column names. + on (list, optional): Columns to use as timepoints. By default, all + columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used. + date_col (str, optional): Column that contains the Date values. By + default, a column that is in Date format, or that is a + Date-compatible string, will be used, if there is only one column + in that format. If there are multiple Date-compatible columns, but + only one matches a canonical name such as DATUM, that column will + be used. Otherwise, this method will fail, and the date_col needs + to be explicitly specified. datetime_name (str, optional): Name for the DateTime column in the long DataFrame, "DATUM_TIJD" by default. - value_name (str, optional): Name to give to the value column. Defaults to - "value". + value_name (str, optional): Name to give to the value column. Defaults + to "value". Returns: polars.DataFrame in long format. @@ -137,12 +139,13 @@ def semiwide_to_wide( DateTime-compatible column names. on (list, optional): Columns to use as timepoints. By default, all columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used. - date_col (str, optional): Column that contains the Date values. By default, - a column that is in Date format, or that is a Date-compatible string, - will be used, if there is only one column in that format. If there are - multiple Date-compatible, columns, but only one matches a canonical name - such as DATUM, that column will be used. Otherwise, this method will fail, - and the date_col needs to be explicitly specified. + date_col (str, optional): Column that contains the Date values. By + default, a column that is in Date format, or that is a + Date-compatible string, will be used, if there is only one column + in that format. If there are multiple Date-compatible columns, but + only one matches a canonical name such as DATUM, that column will + be used. Otherwise, this method will fail, and the date_col needs + to be explicitly specified. datetime_name (str, optional): Name for the DateTime column in the long DataFrame, "datetime" by default. @@ -178,7 +181,8 @@ def randomize_index_column( Args: df (DataFrame): Input DataFrame. index_col_name (str): Name of index column. - sample_col_name (str): Name of new column containing the randomized index. + sample_col_name (str): Name of new column containing the randomized + index. Returns: DataFrame with index column values randomized. diff --git a/tests/evaluation/fidelity/test_autocorrelation.py b/tests/evaluation/fidelity/test_autocorrelation.py index 27b3ca0..d7ab203 100644 --- a/tests/evaluation/fidelity/test_autocorrelation.py +++ b/tests/evaluation/fidelity/test_autocorrelation.py @@ -47,7 +47,10 @@ def test_dataframe_half_hour_pandas(test_dataframe_half_hour): @pytest.fixture(scope="module") def test_dataframe_quarterly(): - """ "DataFrame with 15-minute timesteps and high correlation with a week time-lag.""" + """ + DataFrame with 15-minute timesteps and high correlation with a week + time-lag. + """ n_minutes = 15 n_values = 60 // n_minutes * 24 * 7 # 1 week n_timesteps = 35041 From 127b8da14139cde07028c6260f6f3cd69349d41e Mon Sep 17 00:00:00 2001 From: Charlotte Avery Date: Wed, 17 Dec 2025 10:54:04 +0000 Subject: [PATCH 3/5] Add licenses --- src/opensynth/datasets/low_carbon_london/load.py | 3 +++ src/opensynth/evaluation/fidelity/__init__.py | 2 ++ src/opensynth/evaluation/fidelity/autocorrelation.py | 3 +++ src/opensynth/evaluation/fidelity/seasonal_statistics.py | 3 +++ src/opensynth/models/faraday/stitched_model/stitch.py | 3 +++ src/opensynth/models/faraday/stitched_model/stitched_model.py | 3 +++ src/opensynth/models/faraday/stitched_model/utils.py | 3 +++ src/opensynth/utils/__init__.py | 2 ++ src/opensynth/utils/polars.py | 3 +++ 9 files changed, 25 insertions(+) diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py index 2d91e0e..6464ffe 100644 --- a/src/opensynth/datasets/low_carbon_london/load.py +++ b/src/opensynth/datasets/low_carbon_london/load.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + import logging from pathlib import Path from typing import Literal diff --git a/src/opensynth/evaluation/fidelity/__init__.py b/src/opensynth/evaluation/fidelity/__init__.py index e69de29..a19fd9c 100644 --- a/src/opensynth/evaluation/fidelity/__init__.py +++ b/src/opensynth/evaluation/fidelity/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/opensynth/evaluation/fidelity/autocorrelation.py b/src/opensynth/evaluation/fidelity/autocorrelation.py index c15da90..6260e96 100644 --- a/src/opensynth/evaluation/fidelity/autocorrelation.py +++ b/src/opensynth/evaluation/fidelity/autocorrelation.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + """Auto-correlation metrics for fidelity evaluation. Note regarding suitability of this metric: Faraday generates independent daily diff --git a/src/opensynth/evaluation/fidelity/seasonal_statistics.py b/src/opensynth/evaluation/fidelity/seasonal_statistics.py index 3ec0aff..b2c21c0 100644 --- a/src/opensynth/evaluation/fidelity/seasonal_statistics.py +++ b/src/opensynth/evaluation/fidelity/seasonal_statistics.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + """Seasonal statistics metrics for fidelity evaluation. Note regarding suitability of this metric: Faraday generates independent daily diff --git a/src/opensynth/models/faraday/stitched_model/stitch.py b/src/opensynth/models/faraday/stitched_model/stitch.py index c2201c9..b0263f3 100644 --- a/src/opensynth/models/faraday/stitched_model/stitch.py +++ b/src/opensynth/models/faraday/stitched_model/stitch.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + import polars as pl diff --git a/src/opensynth/models/faraday/stitched_model/stitched_model.py b/src/opensynth/models/faraday/stitched_model/stitched_model.py index f0e67f6..b50f10e 100644 --- a/src/opensynth/models/faraday/stitched_model/stitched_model.py +++ b/src/opensynth/models/faraday/stitched_model/stitched_model.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + import logging from collections.abc import Generator from datetime import date diff --git a/src/opensynth/models/faraday/stitched_model/utils.py b/src/opensynth/models/faraday/stitched_model/utils.py index 9566a8e..7e69188 100644 --- a/src/opensynth/models/faraday/stitched_model/utils.py +++ b/src/opensynth/models/faraday/stitched_model/utils.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + from calendar import monthrange from datetime import date diff --git a/src/opensynth/utils/__init__.py b/src/opensynth/utils/__init__.py index e69de29..a19fd9c 100644 --- a/src/opensynth/utils/__init__.py +++ b/src/opensynth/utils/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py index 2296aca..3f67331 100644 --- a/src/opensynth/utils/polars.py +++ b/src/opensynth/utils/polars.py @@ -1,3 +1,6 @@ +# Copyright Contributors to the Opensynth-energy Project. +# SPDX-License-Identifier: Apache-2.0 + import logging import random from typing import Optional From 404f7e2081e9c5a39728fbc334f0e3e48181a53d Mon Sep 17 00:00:00 2001 From: Charlotte Avery Date: Wed, 17 Dec 2025 11:12:48 +0000 Subject: [PATCH 4/5] Revert "Add licenses" This reverts commit 127b8da14139cde07028c6260f6f3cd69349d41e. --- src/opensynth/datasets/low_carbon_london/load.py | 3 --- src/opensynth/evaluation/fidelity/__init__.py | 2 -- src/opensynth/evaluation/fidelity/autocorrelation.py | 3 --- src/opensynth/evaluation/fidelity/seasonal_statistics.py | 3 --- src/opensynth/models/faraday/stitched_model/stitch.py | 3 --- src/opensynth/models/faraday/stitched_model/stitched_model.py | 3 --- src/opensynth/models/faraday/stitched_model/utils.py | 3 --- src/opensynth/utils/__init__.py | 2 -- src/opensynth/utils/polars.py | 3 --- 9 files changed, 25 deletions(-) diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py index 6464ffe..2d91e0e 100644 --- a/src/opensynth/datasets/low_carbon_london/load.py +++ b/src/opensynth/datasets/low_carbon_london/load.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - import logging from pathlib import Path from typing import Literal diff --git a/src/opensynth/evaluation/fidelity/__init__.py b/src/opensynth/evaluation/fidelity/__init__.py index a19fd9c..e69de29 100644 --- a/src/opensynth/evaluation/fidelity/__init__.py +++ b/src/opensynth/evaluation/fidelity/__init__.py @@ -1,2 +0,0 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 diff --git a/src/opensynth/evaluation/fidelity/autocorrelation.py b/src/opensynth/evaluation/fidelity/autocorrelation.py index 6260e96..c15da90 100644 --- a/src/opensynth/evaluation/fidelity/autocorrelation.py +++ b/src/opensynth/evaluation/fidelity/autocorrelation.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - """Auto-correlation metrics for fidelity evaluation. Note regarding suitability of this metric: Faraday generates independent daily diff --git a/src/opensynth/evaluation/fidelity/seasonal_statistics.py b/src/opensynth/evaluation/fidelity/seasonal_statistics.py index b2c21c0..3ec0aff 100644 --- a/src/opensynth/evaluation/fidelity/seasonal_statistics.py +++ b/src/opensynth/evaluation/fidelity/seasonal_statistics.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - """Seasonal statistics metrics for fidelity evaluation. Note regarding suitability of this metric: Faraday generates independent daily diff --git a/src/opensynth/models/faraday/stitched_model/stitch.py b/src/opensynth/models/faraday/stitched_model/stitch.py index b0263f3..c2201c9 100644 --- a/src/opensynth/models/faraday/stitched_model/stitch.py +++ b/src/opensynth/models/faraday/stitched_model/stitch.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - import polars as pl diff --git a/src/opensynth/models/faraday/stitched_model/stitched_model.py b/src/opensynth/models/faraday/stitched_model/stitched_model.py index b50f10e..f0e67f6 100644 --- a/src/opensynth/models/faraday/stitched_model/stitched_model.py +++ b/src/opensynth/models/faraday/stitched_model/stitched_model.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - import logging from collections.abc import Generator from datetime import date diff --git a/src/opensynth/models/faraday/stitched_model/utils.py b/src/opensynth/models/faraday/stitched_model/utils.py index 7e69188..9566a8e 100644 --- a/src/opensynth/models/faraday/stitched_model/utils.py +++ b/src/opensynth/models/faraday/stitched_model/utils.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - from calendar import monthrange from datetime import date diff --git a/src/opensynth/utils/__init__.py b/src/opensynth/utils/__init__.py index a19fd9c..e69de29 100644 --- a/src/opensynth/utils/__init__.py +++ b/src/opensynth/utils/__init__.py @@ -1,2 +0,0 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py index 3f67331..2296aca 100644 --- a/src/opensynth/utils/polars.py +++ b/src/opensynth/utils/polars.py @@ -1,6 +1,3 @@ -# Copyright Contributors to the Opensynth-energy Project. -# SPDX-License-Identifier: Apache-2.0 - import logging import random from typing import Optional From cf0bf37ff3e94b8bc0c8028b4ce5469122c00c06 Mon Sep 17 00:00:00 2001 From: Charlotte Avery Date: Wed, 17 Dec 2025 09:44:22 +0000 Subject: [PATCH 5/5] Reduce line length to 79 characters This reverts commit 127b8da14139cde07028c6260f6f3cd69349d41e. --- .../datasets/low_carbon_london/load.py | 5 +- src/opensynth/utils/polars.py | 54 ++++++++++--------- .../fidelity/test_autocorrelation.py | 5 +- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/src/opensynth/datasets/low_carbon_london/load.py b/src/opensynth/datasets/low_carbon_london/load.py index c948209..2d91e0e 100644 --- a/src/opensynth/datasets/low_carbon_london/load.py +++ b/src/opensynth/datasets/low_carbon_london/load.py @@ -15,7 +15,8 @@ def load_lcl_data_by_year( ) -> pd.DataFrame | pl.DataFrame: """Load LCL data for a specific year. - Returns a DataFrame in wide format. The first column contains the timestamp. + Returns a DataFrame in wide format. The first column contains the + timestamp. Args: fname (str or Path): Location of the `train.csv` data file. @@ -25,7 +26,7 @@ def load_lcl_data_by_year( pl.DataFrame with KWH/hh measurements. """ fname = ( - Path(__file__).parents[0] / "../../../../data/raw/historical/train.csv" + Path(__file__).parents[0] / "./data/raw/historical/train.csv" if fname is None else Path(fname) ) diff --git a/src/opensynth/utils/polars.py b/src/opensynth/utils/polars.py index fffa9b5..2296aca 100644 --- a/src/opensynth/utils/polars.py +++ b/src/opensynth/utils/polars.py @@ -17,9 +17,9 @@ def infer_date_column(df: pl.DataFrame) -> str: Returns the column name of a column in Date format, or a String column that matches a Date string. If the DataFrame contains only one matching column, - this function will return that column name. If multiple columns match, it will - return the column name that matches a canonical Date name, such as "DATUM". - In all other cases the function will raise a ValueError(). + this function will return that column name. If multiple columns match, it + will return the column name that matches a canonical Date name, such as + "DATUM". In all other cases the function will raise a ValueError(). Args: df (pl.DataFrame): DataFrame. @@ -28,8 +28,8 @@ def infer_date_column(df: pl.DataFrame) -> str: str: column name of a column in Date or Date-like format. Raises: - ValueError: if no columns are in a Date-like format or multiple columns are - in Date-like format and match a canonical name. + ValueError: if no columns are in a Date-like format or multiple columns + are in Date-like format and match a canonical name. """ date_columns = df.select(pl.col(pl.Date)).columns @@ -49,7 +49,8 @@ def infer_date_column(df: pl.DataFrame) -> str: return list(canonical_columns)[0] case _: raise ValueError( - "Multiple Date-like columns found with a matching canonical name!" + "Multiple Date-like columns found with a matching canonical \ + name!" ) @@ -68,20 +69,21 @@ def semiwide_to_long( default in "%HH%mm" format. Args: - df (polars.DataFrame): DataFrame in semi-wide wide format, containing DateTime- - compatible column names. - on (list, optional): Columns to use as timepoints. By default, all columns that - match the pattern '[0-9][0-9][0-9][0-9]' will be used. - date_col (str, optional): Column that contains the Date values. By default, - a column that is in Date format, or that is a Date-compatible string, will - be used, if there is only one column in that format. If there are multiple - Date-compatible, columns, but only one matches a canonical name such as - DATUM, that column will be used. Otherwise, this method will fail, and the - date_col needs to be explicitly specified. + df (polars.DataFrame): DataFrame in semi-wide wide format, containing + DateTime-compatible column names. + on (list, optional): Columns to use as timepoints. By default, all + columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used. + date_col (str, optional): Column that contains the Date values. By + default, a column that is in Date format, or that is a + Date-compatible string, will be used, if there is only one column + in that format. If there are multiple Date-compatible columns, but + only one matches a canonical name such as DATUM, that column will + be used. Otherwise, this method will fail, and the date_col needs + to be explicitly specified. datetime_name (str, optional): Name for the DateTime column in the long DataFrame, "DATUM_TIJD" by default. - value_name (str, optional): Name to give to the value column. Defaults to - "value". + value_name (str, optional): Name to give to the value column. Defaults + to "value". Returns: polars.DataFrame in long format. @@ -137,12 +139,13 @@ def semiwide_to_wide( DateTime-compatible column names. on (list, optional): Columns to use as timepoints. By default, all columns that match the pattern '[0-9][0-9][0-9][0-9]' will be used. - date_col (str, optional): Column that contains the Date values. By default, - a column that is in Date format, or that is a Date-compatible string, - will be used, if there is only one column in that format. If there are - multiple Date-compatible, columns, but only one matches a canonical name - such as DATUM, that column will be used. Otherwise, this method will fail, - and the date_col needs to be explicitly specified. + date_col (str, optional): Column that contains the Date values. By + default, a column that is in Date format, or that is a + Date-compatible string, will be used, if there is only one column + in that format. If there are multiple Date-compatible columns, but + only one matches a canonical name such as DATUM, that column will + be used. Otherwise, this method will fail, and the date_col needs + to be explicitly specified. datetime_name (str, optional): Name for the DateTime column in the long DataFrame, "datetime" by default. @@ -178,7 +181,8 @@ def randomize_index_column( Args: df (DataFrame): Input DataFrame. index_col_name (str): Name of index column. - sample_col_name (str): Name of new column containing the randomized index. + sample_col_name (str): Name of new column containing the randomized + index. Returns: DataFrame with index column values randomized. diff --git a/tests/evaluation/fidelity/test_autocorrelation.py b/tests/evaluation/fidelity/test_autocorrelation.py index 27b3ca0..d7ab203 100644 --- a/tests/evaluation/fidelity/test_autocorrelation.py +++ b/tests/evaluation/fidelity/test_autocorrelation.py @@ -47,7 +47,10 @@ def test_dataframe_half_hour_pandas(test_dataframe_half_hour): @pytest.fixture(scope="module") def test_dataframe_quarterly(): - """ "DataFrame with 15-minute timesteps and high correlation with a week time-lag.""" + """ + DataFrame with 15-minute timesteps and high correlation with a week + time-lag. + """ n_minutes = 15 n_values = 60 // n_minutes * 24 * 7 # 1 week n_timesteps = 35041