From acd3fe81d79f744e5c86b697e765d453aa716c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Jyrki=C3=A4inen?= Date: Mon, 8 Dec 2025 16:01:40 +0200 Subject: [PATCH] tz converts, clean up --- python/common/recluster.py | 13 ++++--------- python/common/utils.py | 2 -- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/python/common/recluster.py b/python/common/recluster.py index 99c4551..5533d8d 100644 --- a/python/common/recluster.py +++ b/python/common/recluster.py @@ -421,12 +421,10 @@ def calculate_cluster_features(df: pd.DataFrame, cluster_id_vars_on_2nd_level: l if "tst_median" in df.columns: df["tst_median"] = pd.to_datetime(df["tst_median"], errors="coerce", utc=True) - df["tst_median_ns"] = df["tst_median"].view("int64") + df["tst_median_ns"] = df["tst_median"].astype("int64") else: df["tst_median_ns"] = pd.Series(index=df.index, dtype="float64") - if "oday" in df.columns: - df["oday"] = pd.to_datetime(df["oday"], errors="coerce") clust_counts = df.drop_duplicates( subset=[ @@ -450,17 +448,14 @@ def calculate_cluster_features(df: pd.DataFrame, cluster_id_vars_on_2nd_level: l if "tst_median_ns" in median_vars.columns: median_vars["tst_median"] = pd.to_datetime(median_vars["tst_median_ns"], utc=True) + median_vars["tst_median"] = median_vars["tst_median"].dt.tz_convert("Europe/Helsinki") median_vars = median_vars.drop(columns=["tst_median_ns"]) res = median_vars.merge(clust_counts, on=cluster_id_vars_on_2nd_level, how="outer") res = res.merge(clust_delay_feats, on=cluster_id_vars_on_2nd_level, how="outer") - if "oday" in df.columns: - res["oday_min"] = df["oday"].min() - res["oday_max"] = df["oday"].max() - else: - res["oday_min"] = pd.NaT - res["oday_max"] = pd.NaT + res["oday_min"] = df["oday"].min() + res["oday_max"] = df["oday"].max() return res diff --git a/python/common/utils.py b/python/common/utils.py index 40ec4cf..f91377b 100644 --- a/python/common/utils.py +++ b/python/common/utils.py @@ -54,8 +54,6 @@ def get_target_oday(offset=1): return start_date def get_season(month, seasons_and_months): - logger.debug(month) - logger.debug(seasons_and_months) if month is None or pd.isna(month): return None