From a5ac200f9b497f3030ec489c3ef2d5131d8fbaaa Mon Sep 17 00:00:00 2001 From: threeTrave <3532077306@qq.com> Date: Fri, 30 May 2025 09:02:38 +0800 Subject: [PATCH 1/2] more detail for calculate features --- openfe/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/openfe/utils.py b/openfe/utils.py index 306884e..81ed18f 100644 --- a/openfe/utils.py +++ b/openfe/utils.py @@ -1,9 +1,9 @@ import traceback from .FeatureGenerator import Node, FNode -from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import ProcessPoolExecutor, as_completed import pandas as pd import numpy as np - +from tqdm import tqdm def tree_to_formula(tree): if isinstance(tree, Node): @@ -135,8 +135,13 @@ def transform(X_train, X_test, new_features_list, n_jobs, name=""): n_train = len(X_train) ex = ProcessPoolExecutor(n_jobs) results = [] - for feature in new_features_list: - results.append(ex.submit(_cal, feature, n_train)) + with tqdm( + total=len(new_features_list), desc="Calculating new features..." + ) as progress_bar: + results = [ex.submit(_cal, feature, n_train) for feature in new_features_list] + for feature in as_completed(results): + progress_bar.set_postfix(feature=feature.result()[-1]) + progress_bar.update(1) ex.shutdown(wait=True) _train = [] _test = [] From 34204b3b68d56d7a82ca7ac1ee4d79ca62584020 Mon Sep 17 00:00:00 2001 From: threeTrave <3532077306@qq.com> Date: Fri, 30 May 2025 09:23:25 +0800 Subject: [PATCH 2/2] Fixed LightGBM verbosity issue --- openfe/FeatureSelector.py | 6 +++--- openfe/openfe.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/openfe/FeatureSelector.py b/openfe/FeatureSelector.py index b0e7337..7636a42 100644 --- a/openfe/FeatureSelector.py +++ b/openfe/FeatureSelector.py @@ -234,7 +234,7 @@ def get_metric(self): def get_estimator(self): if self.estimator is None: - params = {'n_jobs': self.n_jobs, 'importance_type': 'gain', 'n_estimators': 200, "verbose": 1 if self.verbose else -1 } + params = {'n_jobs': self.n_jobs, 'importance_type': 'gain', 'n_estimators': 200, "verbose": -1 } if self.task == 'classification': self.estimator = lgb.LGBMClassifier(**params) else: @@ -530,7 +530,7 @@ def stage2_select(self): self.myprint("Finish data processing.") if self.stage2_params is None: params = {"n_estimators": 1000, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "n_jobs": self.n_jobs, "verbose": 1 if self.verbose else -1} + "seed": 1, "n_jobs": self.n_jobs, "verbose": -1} else: params = self.stage2_params if self.metric is not None: @@ -609,7 +609,7 @@ def _evaluate(self, data_temp, candidate_feature, train_y, val_y, train_init, va val_x = pd.DataFrame(data_temp[candidate_feature].loc[val_y.index]) if self.stage1_metric == 'predictive': params = {"n_estimators": 100, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": 1 if self.verbose else -1 } + "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": -1 } if self.metric is not None: params.update({"metric": self.metric}) if self.task == 'classification': diff --git a/openfe/openfe.py b/openfe/openfe.py index 8d7596c..3f670d8 100644 --- a/openfe/openfe.py +++ b/openfe/openfe.py @@ -406,7 +406,7 @@ def get_init_score(self, init_scores, use_train=False): label = self.label.copy() params = {"n_estimators": 10000, "learning_rate": 0.1, "metric": self.metric, - "seed": self.seed, "n_jobs": self.n_jobs, "verbose": 1 if self.verbose else -1 } + "seed": self.seed, "n_jobs": self.n_jobs, "verbose": -1} if self.task == "regression": gbm = lgb.LGBMRegressor(**params) else: @@ -536,7 +536,7 @@ def stage2_select(self): self.myprint("Finish data processing.") if self.stage2_params is None: params = {"n_estimators": 1000, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "n_jobs": self.n_jobs, "verbose": 1 if self.verbose else -1 } + "seed": 1, "n_jobs": self.n_jobs, "verbose": -1 } else: params = self.stage2_params if self.metric is not None: @@ -602,7 +602,7 @@ def _evaluate(self, candidate_feature, train_y, val_y, train_init, val_init, ini val_x = pd.DataFrame(candidate_feature.data.loc[val_y.index]) if self.stage1_metric == 'predictive': params = {"n_estimators": 100, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": 1 if self.verbose else -1 } + "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": -1} if self.metric is not None: params.update({"metric": self.metric}) if self.task == 'classification':