From 6d5d7e20e5a11e12553260c579a1b20f49f951de Mon Sep 17 00:00:00 2001 From: izofat Date: Fri, 6 Sep 2024 15:37:11 +0300 Subject: [PATCH 1/4] Optimize method performance; note that output dataframes are different --- wind_up/detrend.py | 56 ++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/wind_up/detrend.py b/wind_up/detrend.py index 605e7d1..5d8e493 100644 --- a/wind_up/detrend.py +++ b/wind_up/detrend.py @@ -31,40 +31,52 @@ def calc_wsratio_v_wd( # IEC says only use 4-16 m/s test_ws_ll = 4 test_ws_ul = 16 - ref_ws_ll = test_ws_ll * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean() - ref_ws_ul = test_ws_ul * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean() - detrend_df = detrend_df[(detrend_df[test_ws_col] >= test_ws_ll) & (detrend_df[test_ws_col] < test_ws_ul)] - detrend_df = detrend_df[(detrend_df[ref_ws_col] >= ref_ws_ll) & (detrend_df[ref_ws_col] < ref_ws_ul)] + test_ws_mean = detrend_df[test_ws_col].mean() + ref_ws_mean = detrend_df[ref_ws_col].mean() + + ref_ws_ll = test_ws_ll * ref_ws_mean / test_ws_mean + ref_ws_ul = test_ws_ul * ref_ws_mean / test_ws_mean + + detrend_df = detrend_df[ + (detrend_df[test_ws_col] >= test_ws_ll) & + (detrend_df[test_ws_col] < test_ws_ul) & + (detrend_df[ref_ws_col] >= ref_ws_ll) & + (detrend_df[ref_ws_col] < ref_ws_ul) + ] + + rows_per_hour = 3600 / timebase_s + min_count = min_hours * rows_per_hour + iec_ws_threshold = 8 + + # Vectorized circular difference calculation + directions = np.arange(0, 360) + circ_diffs = circ_diff(detrend_df[ref_wd_col].values[:, None], directions) + + within_dir_bins = np.abs(circ_diffs) < dir_bin_width / 2 + hours = within_dir_bins.sum(axis=0) / rows_per_hour - directions = [] - hours = [] test_rf_ws_roms = [] - for d in list(range(0, 360, 1)): - detrend_df["circ_diff_to_d"] = circ_diff(detrend_df[ref_wd_col], d) - detrend_df["within_dir_bin"] = detrend_df["circ_diff_to_d"].abs() < dir_bin_width / 2 - subsector_df = detrend_df[detrend_df["within_dir_bin"]].copy() - if len(subsector_df) > 0: - directions.append(d) - rows_per_hour = 3600 / timebase_s - hours.append(len(subsector_df) / rows_per_hour) - # 61400-12-1 requires >=24h data, >=6h above 8m/s, >= below 8m/s - min_count = min_hours * rows_per_hour - accept_sector = len(subsector_df) >= min_count - iec_ws_threshold = 8 - accept_sector = accept_sector and ((subsector_df[test_ws_col] < iec_ws_threshold).sum() >= (min_count / 4)) - accept_sector = accept_sector and ((subsector_df[test_ws_col] >= iec_ws_threshold).sum() >= (min_count / 4)) - if accept_sector: + for idx, direction_mask in enumerate(within_dir_bins.T): + subsector_df = detrend_df[direction_mask] + + if len(subsector_df) >= min_count: + below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum() + above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum() + + if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4): rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean() test_rf_ws_roms.append(rom) else: test_rf_ws_roms.append(np.nan) + else: + test_rf_ws_roms.append(np.nan) return pd.DataFrame( { "direction": directions, "hours": hours, "ws_rom": test_rf_ws_roms, - }, + } ) From b7b74b639923284bce7bb60add9904c77de53322 Mon Sep 17 00:00:00 2001 From: izofat Date: Sun, 8 Sep 2024 22:28:46 +0300 Subject: [PATCH 2/4] Improve performance by 11%, reducing test duration by 10% --- wind_up/detrend.py | 47 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/wind_up/detrend.py b/wind_up/detrend.py index 8ecbd21..bfce5eb 100644 --- a/wind_up/detrend.py +++ b/wind_up/detrend.py @@ -42,10 +42,10 @@ def calc_wsratio_v_wd( ref_ws_ul = test_ws_ul * ref_ws_mean / test_ws_mean detrend_df = detrend_df[ - (detrend_df[test_ws_col] >= test_ws_ll) & - (detrend_df[test_ws_col] < test_ws_ul) & - (detrend_df[ref_ws_col] >= ref_ws_ll) & - (detrend_df[ref_ws_col] < ref_ws_ul) + (detrend_df[test_ws_col] >= test_ws_ll) + & (detrend_df[test_ws_col] < test_ws_ul) + & (detrend_df[ref_ws_col] >= ref_ws_ll) + & (detrend_df[ref_ws_col] < ref_ws_ul) ] rows_per_hour = 3600 / timebase_s @@ -53,32 +53,37 @@ def calc_wsratio_v_wd( iec_ws_threshold = 8 # Vectorized circular difference calculation - directions = np.arange(0, 360) - circ_diffs = circ_diff(detrend_df[ref_wd_col].values[:, None], directions) + directions = np.arange(0, 360, 1) + circ_diffs = np.array([circ_diff(detrend_df[ref_wd_col], d) for d in directions]) within_dir_bins = np.abs(circ_diffs) < dir_bin_width / 2 - hours = within_dir_bins.sum(axis=0) / rows_per_hour + valid_directions = [] + valid_hours = [] test_rf_ws_roms = [] - for idx, direction_mask in enumerate(within_dir_bins.T): - subsector_df = detrend_df[direction_mask] - - if len(subsector_df) >= min_count: - below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum() - above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum() - - if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4): - rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean() - test_rf_ws_roms.append(rom) + for i, direction in enumerate(directions): + subsector_df = detrend_df[within_dir_bins[i]].copy() + + if (subsector_df_len := len(subsector_df)) > 0: + valid_directions.append(direction) + valid_hours.append(subsector_df_len / rows_per_hour) + + if subsector_df_len >= min_count: + below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum() + above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum() + + if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4): + rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean() + test_rf_ws_roms.append(rom) + else: + test_rf_ws_roms.append(np.nan) else: test_rf_ws_roms.append(np.nan) - else: - test_rf_ws_roms.append(np.nan) return pd.DataFrame( { - "direction": directions, - "hours": hours, + "direction": valid_directions, + "hours": valid_hours, "ws_rom": test_rf_ws_roms, } ) From 329ba8f4fff4186fbc41595cf0b6689d5138520f Mon Sep 17 00:00:00 2001 From: aclerc Date: Fri, 13 Sep 2024 14:38:10 +0100 Subject: [PATCH 3/4] use benchmark in test_calc_wsratio_v_wd_scen --- pyproject.toml | 1 + tests/test_detrend.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2e83e4a..e0bade4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ build-backend = "setuptools.build_meta" [project.optional-dependencies] dev = [ 'pytest', + 'pytest-benchmark', 'coverage', 'poethepoet', 'types-pyyaml', diff --git a/tests/test_detrend.py b/tests/test_detrend.py index b158499..6b069cf 100644 --- a/tests/test_detrend.py +++ b/tests/test_detrend.py @@ -66,7 +66,7 @@ def test_check_applied_detrend(test_lsa_t13_config: WindUpConfig) -> None: assert detrend_post_r2_improvement == pytest.approx(0.03776561982402227) -def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None: +def test_calc_wsratio_v_wd_scen(benchmark, test_lsa_t13_config: WindUpConfig) -> None: # this test case borrows logic and results from check_applied_detrend where data which has already been detrended # is used to calculate the wsratio_v_wd_scen again to check it is flat cfg = test_lsa_t13_config @@ -85,7 +85,8 @@ def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None: expected_pre_df = pd.read_parquet( Path(__file__).parents[0] / "test_data/LSA_T13_LSA_T12_check_pre_wsratio_v_dir_scen.parquet", ) - actual_pre_df = calc_wsratio_v_wd_scen( + actual_pre_df = benchmark( + calc_wsratio_v_wd_scen, test_name=test_name, ref_name=ref_name, ref_lat=ref_lat, From 9682a1428b63b23ad675adc854028ab4213b1805 Mon Sep 17 00:00:00 2001 From: aclerc Date: Fri, 13 Sep 2024 14:39:51 +0100 Subject: [PATCH 4/4] fix ruff issue --- tests/test_detrend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_detrend.py b/tests/test_detrend.py index 6b069cf..3ed3010 100644 --- a/tests/test_detrend.py +++ b/tests/test_detrend.py @@ -3,6 +3,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal +from pytest_benchmark.fixture import BenchmarkFixture from wind_up.detrend import apply_wsratio_v_wd_scen, calc_wsratio_v_wd_scen, check_applied_detrend from wind_up.models import WindUpConfig @@ -66,7 +67,7 @@ def test_check_applied_detrend(test_lsa_t13_config: WindUpConfig) -> None: assert detrend_post_r2_improvement == pytest.approx(0.03776561982402227) -def test_calc_wsratio_v_wd_scen(benchmark, test_lsa_t13_config: WindUpConfig) -> None: +def test_calc_wsratio_v_wd_scen(benchmark: BenchmarkFixture, test_lsa_t13_config: WindUpConfig) -> None: # this test case borrows logic and results from check_applied_detrend where data which has already been detrended # is used to calculate the wsratio_v_wd_scen again to check it is flat cfg = test_lsa_t13_config