diff --git a/openfe/FeatureSelector.py b/openfe/FeatureSelector.py index b0e7337..7636a42 100644 --- a/openfe/FeatureSelector.py +++ b/openfe/FeatureSelector.py @@ -234,7 +234,7 @@ def get_metric(self): def get_estimator(self): if self.estimator is None: - params = {'n_jobs': self.n_jobs, 'importance_type': 'gain', 'n_estimators': 200, "verbose": 1 if self.verbose else -1 } + params = {'n_jobs': self.n_jobs, 'importance_type': 'gain', 'n_estimators': 200, "verbose": -1 } if self.task == 'classification': self.estimator = lgb.LGBMClassifier(**params) else: @@ -530,7 +530,7 @@ def stage2_select(self): self.myprint("Finish data processing.") if self.stage2_params is None: params = {"n_estimators": 1000, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "n_jobs": self.n_jobs, "verbose": 1 if self.verbose else -1} + "seed": 1, "n_jobs": self.n_jobs, "verbose": -1} else: params = self.stage2_params if self.metric is not None: @@ -609,7 +609,7 @@ def _evaluate(self, data_temp, candidate_feature, train_y, val_y, train_init, va val_x = pd.DataFrame(data_temp[candidate_feature].loc[val_y.index]) if self.stage1_metric == 'predictive': params = {"n_estimators": 100, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": 1 if self.verbose else -1 } + "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": -1 } if self.metric is not None: params.update({"metric": self.metric}) if self.task == 'classification': diff --git a/openfe/openfe.py b/openfe/openfe.py index 8d7596c..3f670d8 100644 --- a/openfe/openfe.py +++ b/openfe/openfe.py @@ -406,7 +406,7 @@ def get_init_score(self, init_scores, use_train=False): label = self.label.copy() params = {"n_estimators": 10000, "learning_rate": 0.1, "metric": self.metric, - "seed": self.seed, "n_jobs": self.n_jobs, "verbose": 1 if self.verbose else -1 } + "seed": self.seed, "n_jobs": self.n_jobs, "verbose": -1} if self.task == "regression": gbm = lgb.LGBMRegressor(**params) else: @@ -536,7 +536,7 @@ def stage2_select(self): self.myprint("Finish data processing.") if self.stage2_params is None: params = {"n_estimators": 1000, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "n_jobs": self.n_jobs, "verbose": 1 if self.verbose else -1 } + "seed": 1, "n_jobs": self.n_jobs, "verbose": -1 } else: params = self.stage2_params if self.metric is not None: @@ -602,7 +602,7 @@ def _evaluate(self, candidate_feature, train_y, val_y, train_init, val_init, ini val_x = pd.DataFrame(candidate_feature.data.loc[val_y.index]) if self.stage1_metric == 'predictive': params = {"n_estimators": 100, "importance_type": "gain", "num_leaves": 16, - "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": 1 if self.verbose else -1 } + "seed": 1, "deterministic": True, "n_jobs": 1, "verbose": -1} if self.metric is not None: params.update({"metric": self.metric}) if self.task == 'classification': diff --git a/openfe/utils.py b/openfe/utils.py index 306884e..81ed18f 100644 --- a/openfe/utils.py +++ b/openfe/utils.py @@ -1,9 +1,9 @@ import traceback from .FeatureGenerator import Node, FNode -from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import ProcessPoolExecutor, as_completed import pandas as pd import numpy as np - +from tqdm import tqdm def tree_to_formula(tree): if isinstance(tree, Node): @@ -135,8 +135,13 @@ def transform(X_train, X_test, new_features_list, n_jobs, name=""): n_train = len(X_train) ex = ProcessPoolExecutor(n_jobs) results = [] - for feature in new_features_list: - results.append(ex.submit(_cal, feature, n_train)) + with tqdm( + total=len(new_features_list), desc="Calculating new features..." + ) as progress_bar: + results = [ex.submit(_cal, feature, n_train) for feature in new_features_list] + for feature in as_completed(results): + progress_bar.set_postfix(feature=feature.result()[-1]) + progress_bar.update(1) ex.shutdown(wait=True) _train = [] _test = []