From 6d5d7e20e5a11e12553260c579a1b20f49f951de Mon Sep 17 00:00:00 2001
From: izofat <gorkemkacar12@gmail.com>
Date: Fri, 6 Sep 2024 15:37:11 +0300
Subject: [PATCH 1/4] Optimize method performance; note that output dataframes
 are different

---
 wind_up/detrend.py | 56 ++++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/wind_up/detrend.py b/wind_up/detrend.py
index 605e7d1..5d8e493 100644
--- a/wind_up/detrend.py
+++ b/wind_up/detrend.py
@@ -31,40 +31,52 @@ def calc_wsratio_v_wd(
     # IEC says only use 4-16 m/s
     test_ws_ll = 4
     test_ws_ul = 16
-    ref_ws_ll = test_ws_ll * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean()
-    ref_ws_ul = test_ws_ul * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean()
-    detrend_df = detrend_df[(detrend_df[test_ws_col] >= test_ws_ll) & (detrend_df[test_ws_col] < test_ws_ul)]
-    detrend_df = detrend_df[(detrend_df[ref_ws_col] >= ref_ws_ll) & (detrend_df[ref_ws_col] < ref_ws_ul)]
+    test_ws_mean = detrend_df[test_ws_col].mean()
+    ref_ws_mean = detrend_df[ref_ws_col].mean()
+
+    ref_ws_ll = test_ws_ll * ref_ws_mean / test_ws_mean
+    ref_ws_ul = test_ws_ul * ref_ws_mean / test_ws_mean
+
+    detrend_df = detrend_df[
+        (detrend_df[test_ws_col] >= test_ws_ll) &
+        (detrend_df[test_ws_col] < test_ws_ul) &
+        (detrend_df[ref_ws_col] >= ref_ws_ll) &
+        (detrend_df[ref_ws_col] < ref_ws_ul)
+    ]
+
+    rows_per_hour = 3600 / timebase_s
+    min_count = min_hours * rows_per_hour
+    iec_ws_threshold = 8
+
+    # Vectorized circular difference calculation
+    directions = np.arange(0, 360)
+    circ_diffs = circ_diff(detrend_df[ref_wd_col].values[:, None], directions)
+
+    within_dir_bins = np.abs(circ_diffs) < dir_bin_width / 2
+    hours = within_dir_bins.sum(axis=0) / rows_per_hour
 
-    directions = []
-    hours = []
     test_rf_ws_roms = []
-    for d in list(range(0, 360, 1)):
-        detrend_df["circ_diff_to_d"] = circ_diff(detrend_df[ref_wd_col], d)
-        detrend_df["within_dir_bin"] = detrend_df["circ_diff_to_d"].abs() < dir_bin_width / 2
-        subsector_df = detrend_df[detrend_df["within_dir_bin"]].copy()
-        if len(subsector_df) > 0:
-            directions.append(d)
-            rows_per_hour = 3600 / timebase_s
-            hours.append(len(subsector_df) / rows_per_hour)
-            # 61400-12-1 requires >=24h data, >=6h above 8m/s, >= below 8m/s
-            min_count = min_hours * rows_per_hour
-            accept_sector = len(subsector_df) >= min_count
-            iec_ws_threshold = 8
-            accept_sector = accept_sector and ((subsector_df[test_ws_col] < iec_ws_threshold).sum() >= (min_count / 4))
-            accept_sector = accept_sector and ((subsector_df[test_ws_col] >= iec_ws_threshold).sum() >= (min_count / 4))
-            if accept_sector:
+    for idx, direction_mask in enumerate(within_dir_bins.T):
+        subsector_df = detrend_df[direction_mask]
+
+        if len(subsector_df) >= min_count:
+            below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum()
+            above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum()
+
+            if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4):
                 rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean()
                 test_rf_ws_roms.append(rom)
             else:
                 test_rf_ws_roms.append(np.nan)
+        else:
+            test_rf_ws_roms.append(np.nan)
 
     return pd.DataFrame(
         {
             "direction": directions,
             "hours": hours,
             "ws_rom": test_rf_ws_roms,
-        },
+        }
     )
 
 

From b7b74b639923284bce7bb60add9904c77de53322 Mon Sep 17 00:00:00 2001
From: izofat <gorkemkacar12@gmail.com>
Date: Sun, 8 Sep 2024 22:28:46 +0300
Subject: [PATCH 2/4] Improve performance by 11%, reducing test duration by 10%

---
 wind_up/detrend.py | 47 +++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/wind_up/detrend.py b/wind_up/detrend.py
index 8ecbd21..bfce5eb 100644
--- a/wind_up/detrend.py
+++ b/wind_up/detrend.py
@@ -42,10 +42,10 @@ def calc_wsratio_v_wd(
     ref_ws_ul = test_ws_ul * ref_ws_mean / test_ws_mean
 
     detrend_df = detrend_df[
-        (detrend_df[test_ws_col] >= test_ws_ll) &
-        (detrend_df[test_ws_col] < test_ws_ul) &
-        (detrend_df[ref_ws_col] >= ref_ws_ll) &
-        (detrend_df[ref_ws_col] < ref_ws_ul)
+        (detrend_df[test_ws_col] >= test_ws_ll)
+        & (detrend_df[test_ws_col] < test_ws_ul)
+        & (detrend_df[ref_ws_col] >= ref_ws_ll)
+        & (detrend_df[ref_ws_col] < ref_ws_ul)
     ]
 
     rows_per_hour = 3600 / timebase_s
@@ -53,32 +53,37 @@ def calc_wsratio_v_wd(
     iec_ws_threshold = 8
 
     # Vectorized circular difference calculation
-    directions = np.arange(0, 360)
-    circ_diffs = circ_diff(detrend_df[ref_wd_col].values[:, None], directions)
+    directions = np.arange(0, 360, 1)
+    circ_diffs = np.array([circ_diff(detrend_df[ref_wd_col], d) for d in directions])
 
     within_dir_bins = np.abs(circ_diffs) < dir_bin_width / 2
-    hours = within_dir_bins.sum(axis=0) / rows_per_hour
 
+    valid_directions = []
+    valid_hours = []
     test_rf_ws_roms = []
-    for idx, direction_mask in enumerate(within_dir_bins.T):
-        subsector_df = detrend_df[direction_mask]
-
-        if len(subsector_df) >= min_count:
-            below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum()
-            above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum()
-
-            if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4):
-                rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean()
-                test_rf_ws_roms.append(rom)
+    for i, direction in enumerate(directions):
+        subsector_df = detrend_df[within_dir_bins[i]].copy()
+
+        if (subsector_df_len := len(subsector_df)) > 0:
+            valid_directions.append(direction)
+            valid_hours.append(subsector_df_len / rows_per_hour)
+
+            if subsector_df_len >= min_count:
+                below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum()
+                above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum()
+
+                if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4):
+                    rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean()
+                    test_rf_ws_roms.append(rom)
+                else:
+                    test_rf_ws_roms.append(np.nan)
             else:
                 test_rf_ws_roms.append(np.nan)
-        else:
-            test_rf_ws_roms.append(np.nan)
 
     return pd.DataFrame(
         {
-            "direction": directions,
-            "hours": hours,
+            "direction": valid_directions,
+            "hours": valid_hours,
             "ws_rom": test_rf_ws_roms,
         }
     )

From 329ba8f4fff4186fbc41595cf0b6689d5138520f Mon Sep 17 00:00:00 2001
From: aclerc <Alex.Clerc@res-group.com>
Date: Fri, 13 Sep 2024 14:38:10 +0100
Subject: [PATCH 3/4] use benchmark in test_calc_wsratio_v_wd_scen

---
 pyproject.toml        | 1 +
 tests/test_detrend.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2e83e4a..e0bade4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,7 @@ build-backend = "setuptools.build_meta"
 [project.optional-dependencies]
 dev = [
     'pytest',
+    'pytest-benchmark',
     'coverage',
     'poethepoet',
     'types-pyyaml',
diff --git a/tests/test_detrend.py b/tests/test_detrend.py
index b158499..6b069cf 100644
--- a/tests/test_detrend.py
+++ b/tests/test_detrend.py
@@ -66,7 +66,7 @@ def test_check_applied_detrend(test_lsa_t13_config: WindUpConfig) -> None:
     assert detrend_post_r2_improvement == pytest.approx(0.03776561982402227)
 
 
-def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None:
+def test_calc_wsratio_v_wd_scen(benchmark, test_lsa_t13_config: WindUpConfig) -> None:
     # this test case borrows logic and results from check_applied_detrend where data which has already been detrended
     # is used to calculate the wsratio_v_wd_scen again to check it is flat
     cfg = test_lsa_t13_config
@@ -85,7 +85,8 @@ def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None:
     expected_pre_df = pd.read_parquet(
         Path(__file__).parents[0] / "test_data/LSA_T13_LSA_T12_check_pre_wsratio_v_dir_scen.parquet",
     )
-    actual_pre_df = calc_wsratio_v_wd_scen(
+    actual_pre_df = benchmark(
+        calc_wsratio_v_wd_scen,
         test_name=test_name,
         ref_name=ref_name,
         ref_lat=ref_lat,

From 9682a1428b63b23ad675adc854028ab4213b1805 Mon Sep 17 00:00:00 2001
From: aclerc <Alex.Clerc@res-group.com>
Date: Fri, 13 Sep 2024 14:39:51 +0100
Subject: [PATCH 4/4] fix ruff issue

---
 tests/test_detrend.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_detrend.py b/tests/test_detrend.py
index 6b069cf..3ed3010 100644
--- a/tests/test_detrend.py
+++ b/tests/test_detrend.py
@@ -3,6 +3,7 @@
 import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal
+from pytest_benchmark.fixture import BenchmarkFixture
 
 from wind_up.detrend import apply_wsratio_v_wd_scen, calc_wsratio_v_wd_scen, check_applied_detrend
 from wind_up.models import WindUpConfig
@@ -66,7 +67,7 @@ def test_check_applied_detrend(test_lsa_t13_config: WindUpConfig) -> None:
     assert detrend_post_r2_improvement == pytest.approx(0.03776561982402227)
 
 
-def test_calc_wsratio_v_wd_scen(benchmark, test_lsa_t13_config: WindUpConfig) -> None:
+def test_calc_wsratio_v_wd_scen(benchmark: BenchmarkFixture, test_lsa_t13_config: WindUpConfig) -> None:
     # this test case borrows logic and results from check_applied_detrend where data which has already been detrended
     # is used to calculate the wsratio_v_wd_scen again to check it is flat
     cfg = test_lsa_t13_config