eschmidt42 · eschmidt42 · Aug 17, 2025 · Aug 17, 2025
diff --git a/src/random_tree_models/decisiontree/estimators.py b/src/random_tree_models/decisiontree/estimators.py
@@ -5,11 +5,17 @@
 from sklearn.utils.multiclass import check_classification_targets, type_of_target
 from sklearn.utils.validation import check_is_fitted, validate_data  # type: ignore
 
-import random_tree_models.params
 from random_tree_models.decisiontree.node import Node
 from random_tree_models.decisiontree.predict import predict_with_tree
 from random_tree_models.decisiontree.train import grow_tree
-from random_tree_models.params import MetricNames
+from random_tree_models.params import (
+    ColumnSelectionMethod,
+    ColumnSelectionParameters,
+    MetricNames,
+    ThresholdSelectionMethod,
+    ThresholdSelectionParameters,
+    TreeGrowthParameters,
+)
 
 
 class DecisionTreeTemplate(base.BaseEstimator):
@@ -19,32 +25,32 @@ class DecisionTreeTemplate(base.BaseEstimator):
     """
 
     max_depth: int
-    measure_name: random_tree_models.params.MetricNames
+    measure_name: MetricNames
     min_improvement: float
     lam: float
     frac_subsamples: float
     frac_features: float
     random_state: int
-    threshold_method: random_tree_models.params.ThresholdSelectionMethod
+    threshold_method: ThresholdSelectionMethod
     threshold_quantile: float
     n_thresholds: int
-    column_method: random_tree_models.params.ColumnSelectionMethod
+    column_method: ColumnSelectionMethod
     n_columns_to_try: int | None
     ensure_all_finite: bool
     tree_: Node
 
     def __init__(
         self,
-        measure_name: random_tree_models.params.MetricNames,
+        measure_name: MetricNames,
         max_depth: int = 2,
         min_improvement: float = 0.0,
         lam: float = 0.0,
         frac_subsamples: float = 1.0,
         frac_features: float = 1.0,
-        threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce,
+        threshold_method: ThresholdSelectionMethod = ThresholdSelectionMethod.bruteforce,
         threshold_quantile: float = 0.1,
         n_thresholds: int = 100,
-        column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending,
+        column_method: ColumnSelectionMethod = ColumnSelectionMethod.ascending,
         n_columns_to_try: int | None = None,
         random_state: int = 42,
         ensure_all_finite: bool = True,
@@ -64,20 +70,20 @@ def __init__(
         self.ensure_all_finite = ensure_all_finite
 
     def _organize_growth_parameters(self):
-        self.growth_params_ = random_tree_models.params.TreeGrowthParameters(
+        self.growth_params_ = TreeGrowthParameters(
             max_depth=self.max_depth,
             min_improvement=self.min_improvement,
             lam=-abs(self.lam),
             frac_subsamples=float(self.frac_subsamples),
             frac_features=float(self.frac_features),
             random_state=int(self.random_state),
-            threshold_params=random_tree_models.params.ThresholdSelectionParameters(
+            threshold_params=ThresholdSelectionParameters(
                 method=self.threshold_method,
                 quantile=self.threshold_quantile,
                 n_thresholds=self.n_thresholds,
                 random_state=int(self.random_state),
             ),
-            column_params=random_tree_models.params.ColumnSelectionParameters(
+            column_params=ColumnSelectionParameters(
                 method=self.column_method,
                 n_trials=self.n_columns_to_try,
             ),
@@ -142,10 +148,10 @@ def __init__(
         lam: float = 0.0,
         frac_subsamples: float = 1.0,
         frac_features: float = 1.0,
-        threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce,
+        threshold_method: ThresholdSelectionMethod = ThresholdSelectionMethod.bruteforce,
         threshold_quantile: float = 0.1,
         n_thresholds: int = 100,
-        column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending,
+        column_method: ColumnSelectionMethod = ColumnSelectionMethod.ascending,
         n_columns_to_try: int | None = None,
         random_state: int = 42,
         ensure_all_finite: bool = True,
@@ -215,10 +221,10 @@ def __init__(
         lam: float = 0.0,
         frac_subsamples: float = 1.0,
         frac_features: float = 1.0,
-        threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce,
+        threshold_method: ThresholdSelectionMethod = ThresholdSelectionMethod.bruteforce,
         threshold_quantile: float = 0.1,
         n_thresholds: int = 100,
-        column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending,
+        column_method: ColumnSelectionMethod = ColumnSelectionMethod.ascending,
         n_columns_to_try: int | None = None,
         random_state: int = 42,
         ensure_all_finite: bool = True,

diff --git a/src/random_tree_models/decisiontree/split.py b/src/random_tree_models/decisiontree/split.py
@@ -2,15 +2,22 @@
 
 import numpy as np
 
-import random_tree_models.params
 import random_tree_models.scoring as scoring
 from random_tree_models.decisiontree.node import Node
 from random_tree_models.decisiontree.split_objects import BestSplit
+from random_tree_models.params import (
+    ColumnSelectionMethod,
+    ColumnSelectionParameters,
+    MetricNames,
+    ThresholdSelectionMethod,
+    ThresholdSelectionParameters,
+    TreeGrowthParameters,
+)
 
 
 def select_thresholds(
     feature_values: np.ndarray,
-    threshold_params: random_tree_models.params.ThresholdSelectionParameters,
+    threshold_params: ThresholdSelectionParameters,
     rng: np.random.RandomState,
 ) -> np.ndarray:
     "Selects thresholds to use for splitting"
@@ -19,9 +26,9 @@ def select_thresholds(
     n_thresholds = threshold_params.n_thresholds
     num_quantile_steps = threshold_params.num_quantile_steps
 
-    if method == random_tree_models.params.ThresholdSelectionMethod.bruteforce:
+    if method == ThresholdSelectionMethod.bruteforce:
         return feature_values[1:]
-    elif method == random_tree_models.params.ThresholdSelectionMethod.random:
+    elif method == ThresholdSelectionMethod.random:
         if len(feature_values) - 1 <= n_thresholds:
             return feature_values[1:]
         else:
@@ -30,10 +37,10 @@ def select_thresholds(
                 size=(n_thresholds,),
                 replace=False,
             )
-    elif method == random_tree_models.params.ThresholdSelectionMethod.quantile:
+    elif method == ThresholdSelectionMethod.quantile:
         qs = np.linspace(0, 1, num_quantile_steps)
         return np.quantile(feature_values[1:], qs)
-    elif method == random_tree_models.params.ThresholdSelectionMethod.uniform:
+    elif method == ThresholdSelectionMethod.uniform:
         x = np.linspace(
             feature_values.min(),
             feature_values.max(),
@@ -46,7 +53,7 @@ def select_thresholds(
 
 def get_thresholds_and_target_groups(
     feature_values: np.ndarray,
-    threshold_params: random_tree_models.params.ThresholdSelectionParameters,
+    threshold_params: ThresholdSelectionParameters,
     rng: np.random.RandomState,
 ) -> T.Generator[T.Tuple[np.ndarray, np.ndarray, bool | None], None, None]:
     "Creates a generator for split finding, returning the used threshold, the target groups and a bool indicating if the default direction is left"
@@ -77,21 +84,21 @@ def get_thresholds_and_target_groups(
 
 def get_column(
     X: np.ndarray,
-    column_params: random_tree_models.params.ColumnSelectionParameters,
+    column_params: ColumnSelectionParameters,
     rng: np.random.RandomState,
 ) -> list[int]:
     # select column order to split on
     method = column_params.method
     n_columns_to_try = column_params.n_trials
 
     columns = list(range(X.shape[1]))
-    if method == random_tree_models.params.ColumnSelectionMethod.ascending:
+    if method == ColumnSelectionMethod.ascending:
         pass
-    elif method == random_tree_models.params.ColumnSelectionMethod.random:
+    elif method == ColumnSelectionMethod.random:
         columns = np.array(columns)
         rng.shuffle(columns)
         columns = columns.tolist()
-    elif method == random_tree_models.params.ColumnSelectionMethod.largest_delta:
+    elif method == ColumnSelectionMethod.largest_delta:
         deltas = X.max(axis=0) - X.min(axis=0)
         weights = deltas / deltas.sum()
         columns = np.array(columns)
@@ -114,8 +121,7 @@ def find_best_split(
     yhat: np.ndarray | None = None,
     g: np.ndarray | None = None,
     h: np.ndarray | None = None,
-    growth_params: random_tree_models.params.TreeGrowthParameters
-    | None = None,  # TODO: make required
+    growth_params: TreeGrowthParameters | None = None,  # TODO: make required
     rng: np.random.RandomState = np.random.RandomState(42),
 ) -> BestSplit:
     """Find the best split, detecting the "default direction" with missing data."""
@@ -141,7 +147,7 @@ def find_best_split(
             feature_values, growth_params.threshold_params, rng
         ):
             split_score = scoring.calc_split_score(
-                random_tree_models.params.MetricNames(measure_name),
+                MetricNames(measure_name),
                 y,
                 target_groups,
                 yhat=yhat,
@@ -167,7 +173,7 @@ def find_best_split(
 def check_if_split_sensible(
     best: BestSplit,
     parent_node: Node | None,
-    growth_params: random_tree_models.params.TreeGrowthParameters,
+    growth_params: TreeGrowthParameters,
 ) -> tuple[bool, float | None]:
     "Verifies if split is sensible, considering score gain and left/right group sizes"
     parent_is_none = parent_node is None

diff --git a/src/random_tree_models/gradientboostedtrees.py b/src/random_tree_models/gradientboostedtrees.py
@@ -15,6 +15,7 @@
 
 import random_tree_models.decisiontree as dtree
 from random_tree_models.params import MetricNames
+from random_tree_models.utils import bool_to_float
 
 
 class GradientBoostedTreesTemplate(base.BaseEstimator):
@@ -134,15 +135,6 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         return y
 
 
-def bool_to_float(x: bool) -> float:
-    if x == True:
-        return 1.0
-    elif x == False:
-        return -1.0
-    else:
-        raise ValueError(f"{x=}, expected bool")
-
-
 class GradientBoostedTreesClassifier(
     base.ClassifierMixin,
     GradientBoostedTreesTemplate,

diff --git a/src/random_tree_models/utils.py b/src/random_tree_models/utils.py
@@ -19,3 +19,12 @@ def _get_logger(level=logging.INFO):
 
 
 logger = _get_logger()
+
+
+def bool_to_float(x: bool) -> float:
+    if x == True:
+        return 1.0
+    elif x == False:
+        return -1.0
+    else:
+        raise ValueError(f"{x=}, expected bool")
diff --git a/src/random_tree_models/xgboost.py b/src/random_tree_models/xgboost.py
@@ -27,7 +27,7 @@
 )
 
 import random_tree_models.decisiontree as dtree
-import random_tree_models.gradientboostedtrees as gbt
+import random_tree_models.utils as gbt
 from random_tree_models.params import MetricNames
 
 

diff --git a/tests/test_extratrees.py b/tests/test_extratrees.py
@@ -2,8 +2,11 @@
 import pytest
 from sklearn.utils.estimator_checks import parametrize_with_checks
 
-import random_tree_models.decisiontree as dtree
 import random_tree_models.extratrees as et
+from random_tree_models.decisiontree import (
+    DecisionTreeClassifier,
+    DecisionTreeRegressor,
+)
 from random_tree_models.params import MetricNames
 from tests.conftest import expected_failed_checks
 
@@ -37,9 +40,7 @@ class TestExtraTreesRegressor:
     def test_fit(self):
         model = et.ExtraTreesRegressor()
         model.fit(self.X, self.y)
-        assert all(
-            [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_]
-        )
+        assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_])
 
     def test_predict(self):
         model = et.ExtraTreesRegressor()
@@ -69,7 +70,7 @@ def test_fit(self):
         model.fit(self.X, self.y)
         assert not hasattr(self.model, "classes_")
         assert all(
-            [isinstance(model, dtree.DecisionTreeClassifier) for model in model.trees_]
+            [isinstance(model, DecisionTreeClassifier) for model in model.trees_]
         )
 
     def test_predict(self):

diff --git a/tests/test_gradientboostedtrees.py b/tests/test_gradientboostedtrees.py
@@ -2,8 +2,10 @@
 import pytest
 from sklearn.utils.estimator_checks import parametrize_with_checks
 
-import random_tree_models.decisiontree as dtree
 import random_tree_models.gradientboostedtrees as gbt
+from random_tree_models.decisiontree import (
+    DecisionTreeRegressor,
+)
 from tests.conftest import expected_failed_checks
 
 
@@ -36,9 +38,7 @@ class TestGradientBoostedTreesRegressor:
     def test_fit(self):
         model = gbt.GradientBoostedTreesRegressor()
         model.fit(self.X, self.y)
-        assert all(
-            [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_]
-        )
+        assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_])
 
     def test_predict(self):
         model = gbt.GradientBoostedTreesRegressor()
@@ -67,9 +67,7 @@ def test_fit(self):
         model = gbt.GradientBoostedTreesClassifier()
         model.fit(self.X, self.y)
         assert not hasattr(self.model, "classes_")
-        assert all(
-            [isinstance(model, dtree.DecisionTreeRegressor) for model in model.trees_]
-        )
+        assert all([isinstance(model, DecisionTreeRegressor) for model in model.trees_])
 
     def test_predict(self):
         model = gbt.GradientBoostedTreesClassifier()
@@ -88,28 +86,3 @@ def test_gbt_estimators_with_sklearn_checks(estimator, check):
     Reference: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks
     """
     check(estimator)
-
-
-@pytest.mark.parametrize(
-    "x,exp,is_bad",
-    [
-        (True, 1, False),
-        (False, -1, False),
-        ("a", None, True),
-        (1, 1, False),
-        (0, -1, False),
-        (-1, None, True),
-        (None, None, True),
-    ],
-)
-def test_bool_to_float(x, exp, is_bad: bool):
-    try:
-        # line to test
-        res = gbt.bool_to_float(x)
-    except ValueError as ex:
-        if is_bad:
-            pass  # Failed expectedly to convert non-bool values
-    else:
-        if is_bad:
-            pytest.fail(f"Passed unexpectedly for non-bool value {x} returning {res}")
-        assert res == exp