From b68629bb84cb841c26c01cb1ec7d8b34878267b0 Mon Sep 17 00:00:00 2001 From: eschmidt42 <11818904+eschmidt42@users.noreply.github.com> Date: Sun, 17 Aug 2025 09:43:42 +0200 Subject: [PATCH] chore: moved tests from test_utils to test_params, and well fixed a bunch of other things --- .../decisiontree/estimators.py | 36 +-- src/random_tree_models/decisiontree/split.py | 36 +-- .../gradientboostedtrees.py | 10 +- src/random_tree_models/utils.py | 9 + src/random_tree_models/xgboost.py | 2 +- tests/test_extratrees.py | 11 +- tests/test_gradientboostedtrees.py | 37 +-- tests/test_leafweights.py | 11 +- tests/test_params.py | 216 ++++++++++++++++ tests/test_randomforest.py | 11 +- tests/test_utils.py | 240 ++---------------- tests/test_xgboost.py | 10 +- 12 files changed, 320 insertions(+), 309 deletions(-) create mode 100644 tests/test_params.py diff --git a/src/random_tree_models/decisiontree/estimators.py b/src/random_tree_models/decisiontree/estimators.py index 87c8b2c..df267d2 100644 --- a/src/random_tree_models/decisiontree/estimators.py +++ b/src/random_tree_models/decisiontree/estimators.py @@ -5,11 +5,17 @@ from sklearn.utils.multiclass import check_classification_targets, type_of_target from sklearn.utils.validation import check_is_fitted, validate_data # type: ignore -import random_tree_models.params from random_tree_models.decisiontree.node import Node from random_tree_models.decisiontree.predict import predict_with_tree from random_tree_models.decisiontree.train import grow_tree -from random_tree_models.params import MetricNames +from random_tree_models.params import ( + ColumnSelectionMethod, + ColumnSelectionParameters, + MetricNames, + ThresholdSelectionMethod, + ThresholdSelectionParameters, + TreeGrowthParameters, +) class DecisionTreeTemplate(base.BaseEstimator): @@ -19,32 +25,32 @@ class DecisionTreeTemplate(base.BaseEstimator): """ max_depth: int - measure_name: random_tree_models.params.MetricNames + measure_name: MetricNames min_improvement: float lam: float frac_subsamples: float frac_features: float random_state: int - threshold_method: random_tree_models.params.ThresholdSelectionMethod + threshold_method: ThresholdSelectionMethod threshold_quantile: float n_thresholds: int - column_method: random_tree_models.params.ColumnSelectionMethod + column_method: ColumnSelectionMethod n_columns_to_try: int | None ensure_all_finite: bool tree_: Node def __init__( self, - measure_name: random_tree_models.params.MetricNames, + measure_name: MetricNames, max_depth: int = 2, min_improvement: float = 0.0, lam: float = 0.0, frac_subsamples: float = 1.0, frac_features: float = 1.0, - threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce, + threshold_method: ThresholdSelectionMethod = ThresholdSelectionMethod.bruteforce, threshold_quantile: float = 0.1, n_thresholds: int = 100, - column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending, + column_method: ColumnSelectionMethod = ColumnSelectionMethod.ascending, n_columns_to_try: int | None = None, random_state: int = 42, ensure_all_finite: bool = True, @@ -64,20 +70,20 @@ def __init__( self.ensure_all_finite = ensure_all_finite def _organize_growth_parameters(self): - self.growth_params_ = random_tree_models.params.TreeGrowthParameters( + self.growth_params_ = TreeGrowthParameters( max_depth=self.max_depth, min_improvement=self.min_improvement, lam=-abs(self.lam), frac_subsamples=float(self.frac_subsamples), frac_features=float(self.frac_features), random_state=int(self.random_state), - threshold_params=random_tree_models.params.ThresholdSelectionParameters( + threshold_params=ThresholdSelectionParameters( method=self.threshold_method, quantile=self.threshold_quantile, n_thresholds=self.n_thresholds, random_state=int(self.random_state), ), - column_params=random_tree_models.params.ColumnSelectionParameters( + column_params=ColumnSelectionParameters( method=self.column_method, n_trials=self.n_columns_to_try, ), @@ -142,10 +148,10 @@ def __init__( lam: float = 0.0, frac_subsamples: float = 1.0, frac_features: float = 1.0, - threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce, + threshold_method: ThresholdSelectionMethod = ThresholdSelectionMethod.bruteforce, threshold_quantile: float = 0.1, n_thresholds: int = 100, - column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending, + column_method: ColumnSelectionMethod = ColumnSelectionMethod.ascending, n_columns_to_try: int | None = None, random_state: int = 42, ensure_all_finite: bool = True, @@ -215,10 +221,10 @@ def __init__( lam: float = 0.0, frac_subsamples: float = 1.0, frac_features: float = 1.0, - threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce, + threshold_method: ThresholdSelectionMethod = ThresholdSelectionMethod.bruteforce, threshold_quantile: float = 0.1, n_thresholds: int = 100, - column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending, + column_method: ColumnSelectionMethod = ColumnSelectionMethod.ascending, n_columns_to_try: int | None = None, random_state: int = 42, ensure_all_finite: bool = True, diff --git a/src/random_tree_models/decisiontree/split.py b/src/random_tree_models/decisiontree/split.py index e79c0df..25716a9 100644 --- a/src/random_tree_models/decisiontree/split.py +++ b/src/random_tree_models/decisiontree/split.py @@ -2,15 +2,22 @@ import numpy as np -import random_tree_models.params import random_tree_models.scoring as scoring from random_tree_models.decisiontree.node import Node from random_tree_models.decisiontree.split_objects import BestSplit +from random_tree_models.params import ( + ColumnSelectionMethod, + ColumnSelectionParameters, + MetricNames, + ThresholdSelectionMethod, + ThresholdSelectionParameters, + TreeGrowthParameters, +) def select_thresholds( feature_values: np.ndarray, - threshold_params: random_tree_models.params.ThresholdSelectionParameters, + threshold_params: ThresholdSelectionParameters, rng: np.random.RandomState, ) -> np.ndarray: "Selects thresholds to use for splitting" @@ -19,9 +26,9 @@ def select_thresholds( n_thresholds = threshold_params.n_thresholds num_quantile_steps = threshold_params.num_quantile_steps - if method == random_tree_models.params.ThresholdSelectionMethod.bruteforce: + if method == ThresholdSelectionMethod.bruteforce: return feature_values[1:] - elif method == random_tree_models.params.ThresholdSelectionMethod.random: + elif method == ThresholdSelectionMethod.random: if len(feature_values) - 1 <= n_thresholds: return feature_values[1:] else: @@ -30,10 +37,10 @@ def select_thresholds( size=(n_thresholds,), replace=False, ) - elif method == random_tree_models.params.ThresholdSelectionMethod.quantile: + elif method == ThresholdSelectionMethod.quantile: qs = np.linspace(0, 1, num_quantile_steps) return np.quantile(feature_values[1:], qs) - elif method == random_tree_models.params.ThresholdSelectionMethod.uniform: + elif method == ThresholdSelectionMethod.uniform: x = np.linspace( feature_values.min(), feature_values.max(), @@ -46,7 +53,7 @@ def select_thresholds( def get_thresholds_and_target_groups( feature_values: np.ndarray, - threshold_params: random_tree_models.params.ThresholdSelectionParameters, + threshold_params: ThresholdSelectionParameters, rng: np.random.RandomState, ) -> T.Generator[T.Tuple[np.ndarray, np.ndarray, bool | None], None, None]: "Creates a generator for split finding, returning the used threshold, the target groups and a bool indicating if the default direction is left" @@ -77,7 +84,7 @@ def get_thresholds_and_target_groups( def get_column( X: np.ndarray, - column_params: random_tree_models.params.ColumnSelectionParameters, + column_params: ColumnSelectionParameters, rng: np.random.RandomState, ) -> list[int]: # select column order to split on @@ -85,13 +92,13 @@ def get_column( n_columns_to_try = column_params.n_trials columns = list(range(X.shape[1])) - if method == random_tree_models.params.ColumnSelectionMethod.ascending: + if method == ColumnSelectionMethod.ascending: pass - elif method == random_tree_models.params.ColumnSelectionMethod.random: + elif method == ColumnSelectionMethod.random: columns = np.array(columns) rng.shuffle(columns) columns = columns.tolist() - elif method == random_tree_models.params.ColumnSelectionMethod.largest_delta: + elif method == ColumnSelectionMethod.largest_delta: deltas = X.max(axis=0) - X.min(axis=0) weights = deltas / deltas.sum() columns = np.array(columns) @@ -114,8 +121,7 @@ def find_best_split( yhat: np.ndarray | None = None, g: np.ndarray | None = None, h: np.ndarray | None = None, - growth_params: random_tree_models.params.TreeGrowthParameters - | None = None, # TODO: make required + growth_params: TreeGrowthParameters | None = None, # TODO: make required rng: np.random.RandomState = np.random.RandomState(42), ) -> BestSplit: """Find the best split, detecting the "default direction" with missing data.""" @@ -141,7 +147,7 @@ def find_best_split( feature_values, growth_params.threshold_params, rng ): split_score = scoring.calc_split_score( - random_tree_models.params.MetricNames(measure_name), + MetricNames(measure_name), y, target_groups, yhat=yhat, @@ -167,7 +173,7 @@ def find_best_split( def check_if_split_sensible( best: BestSplit, parent_node: Node | None, - growth_params: random_tree_models.params.TreeGrowthParameters, + growth_params: TreeGrowthParameters, ) -> tuple[bool, float | None]: "Verifies if split is sensible, considering score gain and left/right group sizes" parent_is_none = parent_node is None diff --git a/src/random_tree_models/gradientboostedtrees.py b/src/random_tree_models/gradientboostedtrees.py index 7bbe872..9857378 100644 --- a/src/random_tree_models/gradientboostedtrees.py +++ b/src/random_tree_models/gradientboostedtrees.py @@ -15,6 +15,7 @@ import random_tree_models.decisiontree as dtree from random_tree_models.params import MetricNames +from random_tree_models.utils import bool_to_float class GradientBoostedTreesTemplate(base.BaseEstimator): @@ -134,15 +135,6 @@ def predict(self, X: np.ndarray) -> np.ndarray: return y -def bool_to_float(x: bool) -> float: - if x == True: - return 1.0 - elif x == False: - return -1.0 - else: - raise ValueError(f"{x=}, expected bool") - - class GradientBoostedTreesClassifier( base.ClassifierMixin, GradientBoostedTreesTemplate, diff --git a/src/random_tree_models/utils.py b/src/random_tree_models/utils.py index 313a9a3..58bc1d3 100644 --- a/src/random_tree_models/utils.py +++ b/src/random_tree_models/utils.py @@ -19,3 +19,12 @@ def _get_logger(level=logging.INFO): logger = _get_logger() + + +def bool_to_float(x: bool) -> float: + if x == True: + return 1.0 + elif x == False: + return -1.0 + else: + raise ValueError(f"{x=}, expected bool") diff --git a/src/random_tree_models/xgboost.py b/src/random_tree_models/xgboost.py index 6a36310..a227f08 100644 --- a/src/random_tree_models/xgboost.py +++ b/src/random_tree_models/xgboost.py @@ -27,7 +27,7 @@ ) import random_tree_models.decisiontree as dtree -import random_tree_models.gradientboostedtrees as gbt +import random_tree_models.utils as gbt from random_tree_models.params import MetricNames diff --git a/tests/test_extratrees.py b/tests/test_extratrees.py index f1c2953..c35a9c5 100644 --- a/tests/test_extratrees.py +++ b/tests/test_extratrees.py @@ -2,8 +2,11 @@ import pytest from sklearn.utils.estimator_checks import parametrize_with_checks -import random_tree_models.decisiontree as dtree import random_tree_models.extratrees as et +from random_tree_models.decisiontree import ( + DecisionTreeClassifier, + DecisionTreeRegressor, +) from random_tree_models.params import MetricNames from tests.conftest import expected_failed_checks @@ -37,9 +40,7 @@ class TestExtraTreesRegressor: def test_fit(self): model = et.ExtraTreesRegressor() model.fit(self.X, self.y) - assert all( - [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_] - ) + assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_]) def test_predict(self): model = et.ExtraTreesRegressor() @@ -69,7 +70,7 @@ def test_fit(self): model.fit(self.X, self.y) assert not hasattr(self.model, "classes_") assert all( - [isinstance(model, dtree.DecisionTreeClassifier) for model in model.trees_] + [isinstance(model, DecisionTreeClassifier) for model in model.trees_] ) def test_predict(self): diff --git a/tests/test_gradientboostedtrees.py b/tests/test_gradientboostedtrees.py index e2dbede..d939b7b 100644 --- a/tests/test_gradientboostedtrees.py +++ b/tests/test_gradientboostedtrees.py @@ -2,8 +2,10 @@ import pytest from sklearn.utils.estimator_checks import parametrize_with_checks -import random_tree_models.decisiontree as dtree import random_tree_models.gradientboostedtrees as gbt +from random_tree_models.decisiontree import ( + DecisionTreeRegressor, +) from tests.conftest import expected_failed_checks @@ -36,9 +38,7 @@ class TestGradientBoostedTreesRegressor: def test_fit(self): model = gbt.GradientBoostedTreesRegressor() model.fit(self.X, self.y) - assert all( - [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_] - ) + assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_]) def test_predict(self): model = gbt.GradientBoostedTreesRegressor() @@ -67,9 +67,7 @@ def test_fit(self): model = gbt.GradientBoostedTreesClassifier() model.fit(self.X, self.y) assert not hasattr(self.model, "classes_") - assert all( - [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_] - ) + assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_]) def test_predict(self): model = gbt.GradientBoostedTreesClassifier() @@ -88,28 +86,3 @@ def test_gbt_estimators_with_sklearn_checks(estimator, check): Reference: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks """ check(estimator) - - -@pytest.mark.parametrize( - "x,exp,is_bad", - [ - (True, 1, False), - (False, -1, False), - ("a", None, True), - (1, 1, False), - (0, -1, False), - (-1, None, True), - (None, None, True), - ], -) -def test_bool_to_float(x, exp, is_bad: bool): - try: - # line to test - res = gbt.bool_to_float(x) - except ValueError as ex: - if is_bad: - pass # Failed expectedly to convert non-bool values - else: - if is_bad: - pytest.fail(f"Passed unexpectedly for non-bool value {x} returning {res}") - assert res == exp diff --git a/tests/test_leafweights.py b/tests/test_leafweights.py index 2b03d68..4f49643 100644 --- a/tests/test_leafweights.py +++ b/tests/test_leafweights.py @@ -2,8 +2,7 @@ import pytest import random_tree_models.leafweights as leafweights -import random_tree_models.params as utils -from random_tree_models.params import MetricNames +from random_tree_models.params import MetricNames, TreeGrowthParameters def test_leaf_weight_mean(): @@ -19,14 +18,14 @@ def test_leaf_weight_binary_classification_friedman2001(): def test_leaf_weight_xgboost(): g = np.array([1, 2, 3]) * 2 h = np.array([1, 2, 3]) * 4 - params = utils.TreeGrowthParameters(max_depth=2, lam=0.0) + params = TreeGrowthParameters(max_depth=2, lam=0.0) assert leafweights.leaf_weight_xgboost(g=g, h=h, growth_params=params) == -0.5 class Test_calc_leaf_weight: def test_error_for_unknown_scheme(self): y = np.array([1, 2, 3]) - growth_params = utils.TreeGrowthParameters(max_depth=2, lam=0.0) + growth_params = TreeGrowthParameters(max_depth=2, lam=0.0) with pytest.raises(NotImplementedError): leafweights.calc_leaf_weight( y=y, @@ -36,7 +35,7 @@ def test_error_for_unknown_scheme(self): def test_leaf_weight_none_if_y_empty(self): y = np.array([]) - growth_params = utils.TreeGrowthParameters(max_depth=2, lam=0.0) + growth_params = TreeGrowthParameters(max_depth=2, lam=0.0) weight = leafweights.calc_leaf_weight( y=y, growth_params=growth_params, measure_name=MetricNames.gini @@ -45,7 +44,7 @@ def test_leaf_weight_none_if_y_empty(self): def test_leaf_weight_float_if_y_not_empty(self): y = np.array([1, 2, 3]) - growth_params = utils.TreeGrowthParameters(max_depth=2, lam=0.0) + growth_params = TreeGrowthParameters(max_depth=2, lam=0.0) weight = leafweights.calc_leaf_weight( y=y, growth_params=growth_params, measure_name=MetricNames.variance diff --git a/tests/test_params.py b/tests/test_params.py new file mode 100644 index 0000000..547cdc8 --- /dev/null +++ b/tests/test_params.py @@ -0,0 +1,216 @@ +import pytest +from pydantic import ValidationError + +from random_tree_models.params import ( + ColumnSelectionMethod, + ColumnSelectionParameters, + ThresholdSelectionMethod, + ThresholdSelectionParameters, + TreeGrowthParameters, +) + + +def test_ColumnSelectionMethod(): + expected = ["ascending", "largest_delta", "random"] + assert list(ColumnSelectionMethod.__members__.keys()) == expected + + +def test_ThresholdSelectionMethod(): + expected = ["bruteforce", "quantile", "random", "uniform"] + assert list(ThresholdSelectionMethod.__members__.keys()) == expected + + +# method, quantile, random_state, n_thresholds +class TestThresholdSelectionParameters: + def test_expected_okay(self): + params = ThresholdSelectionParameters( + method=ThresholdSelectionMethod.quantile, + quantile=0.1, + random_state=0, + n_thresholds=100, + ) + assert params.method == ThresholdSelectionMethod.quantile + assert params.quantile == 0.1 + assert params.random_state == 0 + assert params.n_thresholds == 100 + assert params.num_quantile_steps == 11 + + def test_method_fail(self): + try: + _ = ThresholdSelectionParameters( + method="wuppy", # type: ignore + quantile=0.1, + random_state=0, + n_thresholds=100, + ) + except ValueError: + pass # f"init with unknown method should fail: {ex}" + else: + raise + + @pytest.mark.parametrize( + "q,fail", + [(-0.1, True), (0.0, True), (0.5, False), (1.0, True), (1.1, True)], + ) + def test_quantile(self, q: float, fail: bool): + try: + _ = ThresholdSelectionParameters( + method=ThresholdSelectionMethod.quantile, + quantile=q, + random_state=0, + n_thresholds=100, + ) + except ValueError as ex: + if fail: + pass # f"init with quantile {q} should fail: {ex}" + else: + pytest.fail(f"init with quantile {q} should fail: {ex}") + else: + if fail: + raise + + @pytest.mark.parametrize( + "random_state,fail", + [ + (-1, True), + (0, False), + (1, False), + ], + ) + def test_random_state(self, random_state: int, fail: bool): + try: + _ = ThresholdSelectionParameters( + method=ThresholdSelectionMethod.quantile, + quantile=0.1, + random_state=random_state, + n_thresholds=100, + ) + except ValueError as ex: + if fail: + pass # f"init with {random_state=} should fail: {ex}" + else: + pytest.fail(f"init with {random_state=} should fail: {ex}") + else: + if fail: + pytest.fail(f"init with {random_state=} should fail") + + @pytest.mark.parametrize( + "n_thresholds,fail", + [ + (-1, True), + (0, True), + ( + 1, + False, + ), + (10, False), + ], + ) + def test_n_thresholds(self, n_thresholds: int, fail: bool): + try: + _ = ThresholdSelectionParameters( + method=ThresholdSelectionMethod.quantile, + quantile=0.1, + random_state=42, + n_thresholds=n_thresholds, + ) + except ValueError as ex: + if fail: + pass # f"init with {n_thresholds=} should fail: {ex}" + else: + pytest.fail(f"init with {n_thresholds=} should fail: {ex}") + else: + if fail: + raise + + +def test_ColumnSelectionParameters(): + params = ColumnSelectionParameters(method=ColumnSelectionMethod.random, n_trials=10) + assert params.method == ColumnSelectionMethod.random + assert params.n_trials == 10 + + +class TestTreeGrowthParameters: + def test_expected_okay(self): + params = TreeGrowthParameters( + max_depth=10, + min_improvement=0.0, + lam=0.0, + frac_subsamples=1.0, + frac_features=1.0, + random_state=0, + threshold_params=ThresholdSelectionParameters( + method=ThresholdSelectionMethod.quantile, + quantile=0.1, + random_state=0, + n_thresholds=100, + ), + column_params=ColumnSelectionParameters( + method=ColumnSelectionMethod.random, n_trials=10 + ), + ) + assert params.max_depth == 10 + assert params.min_improvement == 0.0 + assert params.lam == 0.0 + assert params.frac_subsamples == 1.0 + assert params.frac_features == 1.0 + assert params.random_state == 0 + assert isinstance( + params.threshold_params, + ThresholdSelectionParameters, + ) + assert isinstance(params.column_params, ColumnSelectionParameters) + + @pytest.mark.parametrize( + "frac_subsamples,fail", + [ + (-0.1, True), + (0.0, True), + (0.5, False), + (1.0, False), + (1.1, True), + ], + ) + def test_frac_subsamples(self, frac_subsamples: float, fail: bool): + try: + _ = TreeGrowthParameters( + max_depth=10, + frac_subsamples=frac_subsamples, + ) + except ValueError as ex: + if fail: + pass # f"init with {frac_subsamples=} should fail: {ex}" + else: + pytest.fail(f"init with {frac_subsamples=} should fail: {ex}") + else: + if fail: + pytest.fail(f"init with {frac_subsamples=} should fail") + + @pytest.mark.parametrize( + "frac_features,fail", + [ + (-0.1, True), + (0.0, True), + (0.5, False), + (1.0, False), + (1.1, True), + ], + ) + def test_frac_features(self, frac_features: float, fail: bool): + try: + _ = TreeGrowthParameters( + max_depth=10, + frac_features=frac_features, + ) + except ValueError as ex: + if fail: + pass # f"init with {frac_features=} should fail: {ex}" + else: + pytest.fail(f"init with {frac_features=} should fail: {ex}") + else: + if fail: + pytest.fail(f"init with {frac_features=} should fail") + + def test_fail_if_max_depth_missing(self): + with pytest.raises(ValidationError): + _ = TreeGrowthParameters() # type: ignore diff --git a/tests/test_randomforest.py b/tests/test_randomforest.py index 6d4dac7..ce377d5 100644 --- a/tests/test_randomforest.py +++ b/tests/test_randomforest.py @@ -2,8 +2,11 @@ import pytest from sklearn.utils.estimator_checks import parametrize_with_checks -import random_tree_models.decisiontree as dtree import random_tree_models.randomforest as rf +from random_tree_models.decisiontree import ( + DecisionTreeClassifier, + DecisionTreeRegressor, +) from tests.conftest import expected_failed_checks @@ -36,9 +39,7 @@ class TestRandomForestRegressor: def test_fit(self): model = rf.RandomForestRegressor() model.fit(self.X, self.y) - assert all( - [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_] - ) + assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_]) def test_predict(self): model = rf.RandomForestRegressor() @@ -68,7 +69,7 @@ def test_fit(self): model.fit(self.X, self.y) assert not hasattr(self.model, "classes_") assert all( - [isinstance(model, dtree.DecisionTreeClassifier) for model in model.trees_] + [isinstance(model, DecisionTreeClassifier) for model in model.trees_] ) def test_predict(self): diff --git a/tests/test_utils.py b/tests/test_utils.py index a6a301f..7c852fa 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,222 +1,9 @@ import logging import pytest -from pydantic import ValidationError +import random_tree_models.utils import random_tree_models.utils as utils -from random_tree_models.params import ( - ColumnSelectionMethod, - ColumnSelectionParameters, - ThresholdSelectionMethod, - ThresholdSelectionParameters, - TreeGrowthParameters, -) - - -def test_ColumnSelectionMethod(): - expected = ["ascending", "largest_delta", "random"] - assert list(ColumnSelectionMethod.__members__.keys()) == expected - - -def test_ThresholdSelectionMethod(): - expected = ["bruteforce", "quantile", "random", "uniform"] - assert list(ThresholdSelectionMethod.__members__.keys()) == expected - - -# method, quantile, random_state, n_thresholds -class TestThresholdSelectionParameters: - def test_expected_okay(self): - params = ThresholdSelectionParameters( - method=ThresholdSelectionMethod.quantile, - quantile=0.1, - random_state=0, - n_thresholds=100, - ) - assert params.method == ThresholdSelectionMethod.quantile - assert params.quantile == 0.1 - assert params.random_state == 0 - assert params.n_thresholds == 100 - assert params.num_quantile_steps == 11 - - def test_method_fail(self): - try: - _ = ThresholdSelectionParameters( - method="wuppy", # type: ignore - quantile=0.1, - random_state=0, - n_thresholds=100, - ) - except ValueError: - pass # f"init with unknown method should fail: {ex}" - else: - raise - - @pytest.mark.parametrize( - "q,fail", - [(-0.1, True), (0.0, True), (0.5, False), (1.0, True), (1.1, True)], - ) - def test_quantile(self, q: float, fail: bool): - try: - _ = ThresholdSelectionParameters( - method=ThresholdSelectionMethod.quantile, - quantile=q, - random_state=0, - n_thresholds=100, - ) - except ValueError as ex: - if fail: - pass # f"init with quantile {q} should fail: {ex}" - else: - pytest.fail(f"init with quantile {q} should fail: {ex}") - else: - if fail: - raise - - @pytest.mark.parametrize( - "random_state,fail", - [ - (-1, True), - (0, False), - (1, False), - ], - ) - def test_random_state(self, random_state: int, fail: bool): - try: - _ = ThresholdSelectionParameters( - method=ThresholdSelectionMethod.quantile, - quantile=0.1, - random_state=random_state, - n_thresholds=100, - ) - except ValueError as ex: - if fail: - pass # f"init with {random_state=} should fail: {ex}" - else: - pytest.fail(f"init with {random_state=} should fail: {ex}") - else: - if fail: - pytest.fail(f"init with {random_state=} should fail") - - @pytest.mark.parametrize( - "n_thresholds,fail", - [ - (-1, True), - (0, True), - ( - 1, - False, - ), - (10, False), - ], - ) - def test_n_thresholds(self, n_thresholds: int, fail: bool): - try: - _ = ThresholdSelectionParameters( - method=ThresholdSelectionMethod.quantile, - quantile=0.1, - random_state=42, - n_thresholds=n_thresholds, - ) - except ValueError as ex: - if fail: - pass # f"init with {n_thresholds=} should fail: {ex}" - else: - pytest.fail(f"init with {n_thresholds=} should fail: {ex}") - else: - if fail: - raise - - -def test_ColumnSelectionParameters(): - params = ColumnSelectionParameters(method=ColumnSelectionMethod.random, n_trials=10) - assert params.method == ColumnSelectionMethod.random - assert params.n_trials == 10 - - -class TestTreeGrowthParameters: - def test_expected_okay(self): - params = TreeGrowthParameters( - max_depth=10, - min_improvement=0.0, - lam=0.0, - frac_subsamples=1.0, - frac_features=1.0, - random_state=0, - threshold_params=ThresholdSelectionParameters( - method=ThresholdSelectionMethod.quantile, - quantile=0.1, - random_state=0, - n_thresholds=100, - ), - column_params=ColumnSelectionParameters( - method=ColumnSelectionMethod.random, n_trials=10 - ), - ) - assert params.max_depth == 10 - assert params.min_improvement == 0.0 - assert params.lam == 0.0 - assert params.frac_subsamples == 1.0 - assert params.frac_features == 1.0 - assert params.random_state == 0 - assert isinstance( - params.threshold_params, - ThresholdSelectionParameters, - ) - assert isinstance(params.column_params, ColumnSelectionParameters) - - @pytest.mark.parametrize( - "frac_subsamples,fail", - [ - (-0.1, True), - (0.0, True), - (0.5, False), - (1.0, False), - (1.1, True), - ], - ) - def test_frac_subsamples(self, frac_subsamples: float, fail: bool): - try: - _ = TreeGrowthParameters( - max_depth=10, - frac_subsamples=frac_subsamples, - ) - except ValueError as ex: - if fail: - pass # f"init with {frac_subsamples=} should fail: {ex}" - else: - pytest.fail(f"init with {frac_subsamples=} should fail: {ex}") - else: - if fail: - pytest.fail(f"init with {frac_subsamples=} should fail") - - @pytest.mark.parametrize( - "frac_features,fail", - [ - (-0.1, True), - (0.0, True), - (0.5, False), - (1.0, False), - (1.1, True), - ], - ) - def test_frac_features(self, frac_features: float, fail: bool): - try: - _ = TreeGrowthParameters( - max_depth=10, - frac_features=frac_features, - ) - except ValueError as ex: - if fail: - pass # f"init with {frac_features=} should fail: {ex}" - else: - pytest.fail(f"init with {frac_features=} should fail: {ex}") - else: - if fail: - pytest.fail(f"init with {frac_features=} should fail") - - def test_fail_if_max_depth_missing(self): - with pytest.raises(ValidationError): - _ = TreeGrowthParameters() # type: ignore def test_get_logger(): @@ -224,3 +11,28 @@ def test_get_logger(): assert isinstance(logger, logging.Logger) assert logger.name == "rich" assert logger.level == logging.INFO + + +@pytest.mark.parametrize( + "x,exp,is_bad", + [ + (True, 1, False), + (False, -1, False), + ("a", None, True), + (1, 1, False), + (0, -1, False), + (-1, None, True), + (None, None, True), + ], +) +def test_bool_to_float(x, exp, is_bad: bool): + try: + # line to test + res = random_tree_models.utils.bool_to_float(x) + except ValueError as ex: + if is_bad: + pass # Failed expectedly to convert non-bool values + else: + if is_bad: + pytest.fail(f"Passed unexpectedly for non-bool value {x} returning {res}") + assert res == exp diff --git a/tests/test_xgboost.py b/tests/test_xgboost.py index aada2a5..188a7f2 100644 --- a/tests/test_xgboost.py +++ b/tests/test_xgboost.py @@ -2,8 +2,8 @@ import pytest from sklearn.utils.estimator_checks import parametrize_with_checks -import random_tree_models.decisiontree as dtree import random_tree_models.xgboost as xgboost +from random_tree_models.decisiontree import DecisionTreeRegressor from tests.conftest import expected_failed_checks @@ -36,9 +36,7 @@ class TestXGBoostRegressor: def test_fit(self): model = xgboost.XGBoostRegressor() model.fit(self.X, self.y) - assert all( - [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_] - ) + assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_]) def test_predict(self): model = xgboost.XGBoostRegressor() @@ -67,9 +65,7 @@ def test_fit(self): model = xgboost.XGBoostClassifier() model.fit(self.X, self.y) assert not hasattr(self.model, "classes_") - assert all( - [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_] - ) + assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_]) def test_predict(self): model = xgboost.XGBoostClassifier()