diff --git a/nbs/core/decision-tree.ipynb b/nbs/core/decision-tree.ipynb
index 2c8c7b3..16e7d9b 100644
--- a/nbs/core/decision-tree.ipynb
+++ b/nbs/core/decision-tree.ipynb
@@ -63,7 +63,11 @@
     "import seaborn as sns\n",
     "import sklearn.datasets as sk_datasets\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
+    "from random_tree_models.decisiontree import (\n",
+    "    DecisionTreeClassifier,\n",
+    "    DecisionTreeRegressor,\n",
+    ")\n",
+    "from random_tree_models.decisiontree.visualize import show_tree\n",
     "from random_tree_models.scoring import MetricNames"
    ]
   },
@@ -111,7 +115,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = dtree.DecisionTreeClassifier(measure_name=MetricNames.gini, max_depth=4)"
+    "model = DecisionTreeClassifier(measure_name=MetricNames.gini, max_depth=4)"
    ]
   },
   {
@@ -138,7 +142,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model)"
+    "show_tree(model)"
    ]
   },
   {
@@ -216,7 +220,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = dtree.DecisionTreeRegressor(measure_name=MetricNames.variance, max_depth=2)"
+    "model = DecisionTreeRegressor(measure_name=MetricNames.variance, max_depth=2)"
    ]
   },
   {
@@ -234,7 +238,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model)"
+    "show_tree(model)"
    ]
   },
   {
diff --git a/nbs/core/extra-trees.ipynb b/nbs/core/extra-trees.ipynb
index 123bd62..ed44855 100644
--- a/nbs/core/extra-trees.ipynb
+++ b/nbs/core/extra-trees.ipynb
@@ -41,10 +41,10 @@
     "import seaborn as sns\n",
     "import sklearn.datasets as sk_datasets\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
     "import random_tree_models.extratrees as et\n",
     "from random_tree_models.scoring import MetricNames\n",
-    "from random_tree_models.utils import ThresholdSelectionMethod"
+    "from random_tree_models.params import ThresholdSelectionMethod\n",
+    "from random_tree_models.decisiontree.visualize import show_tree"
    ]
   },
   {
@@ -143,7 +143,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
@@ -249,7 +249,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
diff --git a/nbs/core/gradient-boosted-trees.ipynb b/nbs/core/gradient-boosted-trees.ipynb
index f6f9490..81bd07e 100644
--- a/nbs/core/gradient-boosted-trees.ipynb
+++ b/nbs/core/gradient-boosted-trees.ipynb
@@ -71,7 +71,7 @@
     "import seaborn as sns\n",
     "import sklearn.datasets as sk_datasets\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
+    "from random_tree_models.decisiontree.visualize import show_tree\n",
     "import random_tree_models.gradientboostedtrees as gbtree\n",
     "from random_tree_models.scoring import MetricNames"
    ]
@@ -160,7 +160,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
@@ -256,7 +256,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
diff --git a/nbs/core/isolation-forest.ipynb b/nbs/core/isolation-forest.ipynb
index 9c2c12b..5ced2af 100644
--- a/nbs/core/isolation-forest.ipynb
+++ b/nbs/core/isolation-forest.ipynb
@@ -56,9 +56,9 @@
     "import seaborn as sns\n",
     "import sklearn.datasets as sk_datasets\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
+    "from random_tree_models.decisiontree.visualize import show_tree\n",
     "import random_tree_models.isolationforest as iforest\n",
-    "from random_tree_models.utils import ColumnSelectionMethod, ThresholdSelectionMethod"
+    "from random_tree_models.params import ColumnSelectionMethod, ThresholdSelectionMethod"
    ]
   },
   {
@@ -111,7 +111,6 @@
    "source": [
     "frac_subsamples = 2 / 3\n",
     "frac_features = 1  # math.sqrt(X.shape[1]) / X.shape[1]\n",
-    "frac_subsamples, frac_features, X.shape[1]\n",
     "\n",
     "# threshold_method =  ThresholdSelectionMethod.uniform  # selects a random threshold from the linear space between the min and max values in X\n",
     "threshold_method = (\n",
@@ -167,7 +166,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
diff --git a/nbs/core/random-forest.ipynb b/nbs/core/random-forest.ipynb
index 39d9773..9f5448b 100644
--- a/nbs/core/random-forest.ipynb
+++ b/nbs/core/random-forest.ipynb
@@ -41,9 +41,9 @@
     "import seaborn as sns\n",
     "import sklearn.datasets as sk_datasets\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
+    "from random_tree_models.decisiontree.visualize import show_tree\n",
     "import random_tree_models.randomforest as rf\n",
-    "from random_tree_models.scoring import MetricNames"
+    "from random_tree_models.params import MetricNames"
    ]
   },
   {
@@ -135,7 +135,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
@@ -238,7 +238,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
diff --git a/nbs/core/robust-random-cut-forest.ipynb b/nbs/core/robust-random-cut-forest.ipynb
index 52ac9ae..034230f 100644
--- a/nbs/core/robust-random-cut-forest.ipynb
+++ b/nbs/core/robust-random-cut-forest.ipynb
@@ -55,9 +55,9 @@
     "import seaborn as sns\n",
     "import sklearn.datasets as sk_datasets\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
+    "from random_tree_models.decisiontree.visualize import show_tree\n",
     "import random_tree_models.isolationforest as iforest\n",
-    "from random_tree_models.utils import ColumnSelectionMethod, ThresholdSelectionMethod"
+    "from random_tree_models.params import ColumnSelectionMethod, ThresholdSelectionMethod"
    ]
   },
   {
@@ -164,7 +164,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
diff --git a/nbs/core/xgboost.ipynb b/nbs/core/xgboost.ipynb
index 8c8541f..4813e46 100644
--- a/nbs/core/xgboost.ipynb
+++ b/nbs/core/xgboost.ipynb
@@ -88,7 +88,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "$$ \\text{loss}^{(t)} = \\sum_{i=1}^n l(y_i, \\hat{y}^{(t-1)}_i + f^{(t)}(x_i) ) + \\Omega \\left( f^{(t)} \\right) $$ "
+    "$$ \\text{loss} ^ {(t)} = \\sum_{i=1}^n l(y_i, \\hat{y}^{(t-1)}_i + f^{(t)}(x_i) ) + \\Omega \\left( f^{(t)} \\right) $$"
    ]
   },
   {
@@ -97,7 +97,7 @@
    "metadata": {},
    "source": [
     "For the regularization the authors use\n",
-    "$$ \\Omega (f_t) = \\gamma N^{(t)}_\\text{leafs} + \\frac{1}{2} \\lambda \\sum^{N^{(t)}_\\text{leafs}}_j w_j^2$$\n",
+    "$$ \\Omega (f_t) = \\gamma N^{(t)}_\\text{leafs} + \\frac{1}{2} \\lambda \\sum^{N^{(t)}_\\text{leafs}}_j w_j^2 $$\n",
     "\n",
     "where $\\gamma$ is some constant and $w_j$ is a leaf weight (seems like the $\\gamma_{jm}$ from Friedman et al. but isn't clarified)"
    ]
@@ -284,10 +284,9 @@
     "import sklearn.datasets as sk_datasets\n",
     "from scipy import stats\n",
     "\n",
-    "import random_tree_models.decisiontree as dtree\n",
-    "import random_tree_models.gradientboostedtrees as gbtree\n",
+    "from random_tree_models.decisiontree.visualize import show_tree\n",
     "import random_tree_models.xgboost as xgboost\n",
-    "from random_tree_models.scoring import MetricNames"
+    "from random_tree_models.params import MetricNames"
    ]
   },
   {
@@ -355,7 +354,7 @@
    "outputs": [],
    "source": [
     "model = xgboost.XGBoostClassifier(\n",
-    "    measure_name=\"xgboost\", max_depth=2, n_trees=3, lam=0.0\n",
+    "    measure_name=MetricNames.xgboost, max_depth=2, n_trees=3, lam=0.0\n",
     ")"
    ]
   },
@@ -374,7 +373,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
@@ -449,7 +448,7 @@
    "outputs": [],
    "source": [
     "model = xgboost.XGBoostRegressor(\n",
-    "    measure_name=\"xgboost\", max_depth=2, n_trees=3, lam=0.0\n",
+    "    measure_name=MetricNames.xgboost, max_depth=2, n_trees=3, lam=0.0\n",
     ")"
    ]
   },
@@ -468,7 +467,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
@@ -610,7 +609,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
@@ -719,7 +718,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dtree.show_tree(model.trees_[0])"
+    "show_tree(model.trees_[0])"
    ]
   },
   {
diff --git a/nbs/dev/xgboost-profiling-histogramming-yay-or-nay.ipynb b/nbs/dev/xgboost-profiling-histogramming-yay-or-nay.ipynb
index 899d2ed..2b3ddf7 100644
--- a/nbs/dev/xgboost-profiling-histogramming-yay-or-nay.ipynb
+++ b/nbs/dev/xgboost-profiling-histogramming-yay-or-nay.ipynb
@@ -165,9 +165,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "execution_stats_reg_vanilla = get_class_stats(\n",
-    "    False, None, n_samples_arr, n_features_arr\n",
-    ")"
+    "execution_stats_reg_vanilla = get_class_stats(False, 256, n_samples_arr, n_features_arr)"
    ]
   },
   {
@@ -363,9 +361,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "execution_stats_class_vanilla = get_reg_stats(\n",
-    "    False, None, n_samples_arr, n_features_arr\n",
-    ")"
+    "execution_stats_class_vanilla = get_reg_stats(False, 256, n_samples_arr, n_features_arr)"
    ]
   },
   {
@@ -499,7 +495,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.13.3"
   }
  },
  "nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index 494de84..f370462 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,8 @@ build-backend = "maturin"
 [dependency-groups]
 test = [
     "pytest>=7.3.1",
+    "dirty-equals>=0.9.0",
+    "inline-snapshot>=0.27.2",
 ]
 nb = [
     "ipywidgets>=8.0.6",
@@ -44,6 +46,6 @@ dev = [
     "snakeviz>=2.2.0",
     "pip-audit>=2.9.0",
     "pytest-cov>=6.2.1",
-     {include-group = "nb"},
-     {include-group = "test"},
+    {include-group = "nb"},
+    {include-group = "test"},
 ]
diff --git a/src/random_tree_models/decisiontree.py b/src/random_tree_models/decisiontree.py
deleted file mode 100644
index 9e9bc13..0000000
--- a/src/random_tree_models/decisiontree.py
+++ /dev/null
@@ -1,829 +0,0 @@
-import typing as T
-import uuid
-
-import numpy as np
-import sklearn.base as base
-from pydantic import (
-    ConfigDict,
-    Field,
-    StrictBool,
-    StrictFloat,
-    StrictInt,
-    StrictStr,
-)
-from pydantic.dataclasses import dataclass
-from rich import print as rprint
-from rich.tree import Tree
-from sklearn.utils.multiclass import check_classification_targets, type_of_target
-from sklearn.utils.validation import (
-    check_is_fitted,
-    validate_data,  # type: ignore
-)
-
-import random_tree_models.leafweights as leafweights
-import random_tree_models.scoring as scoring
-import random_tree_models.utils as utils
-from random_tree_models.scoring import MetricNames
-
-logger = utils.logger
-
-
-@dataclass(validate_on_init=True)
-class SplitScore:
-    name: StrictStr  # name of the score used
-    value: StrictFloat | None = None  # optimization value gini etc
-
-
-@dataclass
-class Node:
-    """Decision node in a decision tree"""
-
-    # Stuff for making a decision
-    array_column: StrictInt | None = None  # index of the column to use
-    threshold: float | None = None  # threshold for decision
-    prediction: float | None = None  # value to use for predictions
-    default_is_left: bool | None = None  # default direction is x is nan
-
-    # decendants
-    right: "Node | None" = None  # right decendany of type Node
-    left: "Node | None" = None  # left decendant of type Node
-
-    # misc info
-    measure: SplitScore | None = None
-
-    n_obs: StrictInt | None = None  # number of observations in node
-    reason: StrictStr | None = None  # place for some comment
-
-    depth: StrictInt | None = None  # depth of the node
-
-    def __post_init__(self):
-        # unique identifier of the node
-        self.node_id = uuid.uuid4()
-
-    @property
-    def is_leaf(self) -> bool:
-        return self.left is None and self.right is None
-
-
-def check_is_baselevel(y: np.ndarray, depth: int, max_depth: int) -> T.Tuple[bool, str]:
-    """Verifies if the tree traversal reached the baselevel / a leaf
-    * group homogeneous / cannot sensibly be splitted further
-    * no data in the group
-    * max depth reached
-    """
-    if max_depth is not None and depth >= max_depth:
-        return (True, "max depth reached")
-    elif len(np.unique(y)) == 1:
-        return (True, "homogenous group")
-    elif len(y) <= 1:
-        return (True, "<= 1 data point in group")
-    else:
-        return (False, "")
-
-
-@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
-class BestSplit:
-    score: StrictFloat
-    column: StrictInt
-    threshold: StrictFloat
-    target_groups: np.ndarray = Field(default_factory=lambda: np.zeros(10))
-    default_is_left: StrictBool | None = None
-
-
-def select_thresholds(
-    feature_values: np.ndarray,
-    threshold_params: utils.ThresholdSelectionParameters,
-    rng: np.random.RandomState,
-) -> np.ndarray:
-    "Selects thresholds to use for splitting"
-
-    method = threshold_params.method
-    n_thresholds = threshold_params.n_thresholds
-    num_quantile_steps = threshold_params.num_quantile_steps
-
-    if method == utils.ThresholdSelectionMethod.bruteforce:
-        return feature_values[1:]
-    elif method == utils.ThresholdSelectionMethod.random:
-        if len(feature_values) - 1 <= n_thresholds:
-            return feature_values[1:]
-        else:
-            return rng.choice(
-                feature_values[1:],
-                size=(n_thresholds,),
-                replace=False,
-            )
-    elif method == utils.ThresholdSelectionMethod.quantile:
-        qs = np.linspace(0, 1, num_quantile_steps)
-        return np.quantile(feature_values[1:], qs)
-    elif method == utils.ThresholdSelectionMethod.uniform:
-        x = np.linspace(
-            feature_values.min(),
-            feature_values.max(),
-            n_thresholds + 2,
-        )
-        return rng.choice(x[1:], size=[1])
-    else:
-        raise NotImplementedError(f"Unknown threshold selection method: {method}")
-
-
-def get_thresholds_and_target_groups(
-    feature_values: np.ndarray,
-    threshold_params: utils.ThresholdSelectionParameters,
-    rng: np.random.RandomState,
-) -> T.Generator[T.Tuple[np.ndarray, np.ndarray, bool | None], None, None]:
-    "Creates a generator for split finding, returning the used threshold, the target groups and a bool indicating if the default direction is left"
-    is_missing = np.isnan(feature_values)
-    is_finite = np.logical_not(is_missing)
-    all_finite = is_finite.all()
-
-    if all_finite:
-        default_direction_is_left = None
-        thresholds = select_thresholds(feature_values, threshold_params, rng)
-
-        for threshold in thresholds:
-            target_groups = feature_values < threshold
-            yield (threshold, target_groups, default_direction_is_left)
-    else:
-        finite_feature_values = feature_values[is_finite]
-        thresholds = select_thresholds(finite_feature_values, threshold_params, rng)
-
-        for threshold in thresholds:
-            # default direction left - feature value <= threshold or missing  (i.e. missing are included left of the threshold)
-            target_groups = np.logical_or(feature_values < threshold, is_missing)
-            yield (threshold, target_groups, True)
-
-            # default direction right - feature value <= threshold and finite (i.e. missing are included right of the threshold)
-            target_groups = np.logical_and(feature_values < threshold, is_finite)
-            yield (threshold, target_groups, False)
-
-
-def get_column(
-    X: np.ndarray,
-    column_params: utils.ColumnSelectionParameters,
-    rng: np.random.RandomState,
-) -> list[int]:
-    # select column order to split on
-    method = column_params.method
-    n_columns_to_try = column_params.n_trials
-
-    columns = list(range(X.shape[1]))
-    if method == utils.ColumnSelectionMethod.ascending:
-        pass
-    elif method == utils.ColumnSelectionMethod.random:
-        columns = np.array(columns)
-        rng.shuffle(columns)
-        columns = columns.tolist()
-    elif method == utils.ColumnSelectionMethod.largest_delta:
-        deltas = X.max(axis=0) - X.min(axis=0)
-        weights = deltas / deltas.sum()
-        columns = np.array(columns)
-        columns = rng.choice(columns, p=weights, size=len(columns), replace=False)
-        columns = columns.tolist()
-    else:
-        raise NotImplementedError(
-            f"Unknown column selection method: {column_params.method}"
-        )
-    if n_columns_to_try is not None:
-        columns = columns[:n_columns_to_try]
-
-    return columns
-
-
-def find_best_split(
-    X: np.ndarray,
-    y: np.ndarray,
-    measure_name: str,
-    yhat: np.ndarray | None = None,
-    g: np.ndarray | None = None,
-    h: np.ndarray | None = None,
-    growth_params: utils.TreeGrowthParameters | None = None,  # TODO: make required
-    rng: np.random.RandomState = np.random.RandomState(42),
-) -> BestSplit:
-    """Find the best split, detecting the "default direction" with missing data."""
-
-    if len(np.unique(y)) == 1:
-        raise ValueError(
-            f"Tried to find a split for homogenous y: {y[:3]} ... {y[-3:]}"
-        )
-
-    best = None  # this will be an BestSplit instance
-
-    if growth_params is None:
-        raise ValueError(f"{growth_params=} but is not allowed to be None")
-
-    for array_column in get_column(X, growth_params.column_params, rng):
-        feature_values = X[:, array_column]
-
-        for (
-            threshold,
-            target_groups,
-            default_is_left,
-        ) in get_thresholds_and_target_groups(
-            feature_values, growth_params.threshold_params, rng
-        ):
-            split_score = scoring.calc_split_score(
-                scoring.MetricNames(measure_name),
-                y,
-                target_groups,
-                yhat=yhat,
-                g=g,
-                h=h,
-                growth_params=growth_params,
-            )
-
-            if best is None or split_score > best.score:
-                best = BestSplit(
-                    score=float(split_score),
-                    column=int(array_column),
-                    threshold=float(threshold),
-                    target_groups=target_groups,
-                    default_is_left=default_is_left,
-                )
-
-    if best is None:
-        raise ValueError(f"Something went wrong {best=} cannot be None.")
-    return best
-
-
-def check_if_split_sensible(
-    best: BestSplit,
-    parent_node: Node | None,
-    growth_params: utils.TreeGrowthParameters,
-) -> tuple[bool, float | None]:
-    "Verifies if split is sensible, considering score gain and left/right group sizes"
-    parent_is_none = parent_node is None
-    if parent_is_none:
-        return False, None
-
-    measure_is_none = parent_node.measure is None
-    if measure_is_none:
-        return False, None
-
-    value_is_none = parent_node.measure.value is None  # type: ignore
-    if value_is_none:
-        return False, None
-
-    # score gain
-    gain = best.score - parent_node.measure.value  # type: ignore
-    is_insufficient_gain = gain < growth_params.min_improvement
-
-    # left/right group assignment
-    all_on_one_side = bool(best.target_groups.all())
-    all_on_other_side = bool(np.logical_not(best.target_groups).all())
-    is_all_onesided = all_on_one_side or all_on_other_side
-
-    is_not_sensible = is_all_onesided or is_insufficient_gain
-
-    return is_not_sensible, gain
-
-
-def calc_leaf_weight_and_split_score(
-    y: np.ndarray,
-    measure_name: scoring.MetricNames,
-    growth_params: utils.TreeGrowthParameters,
-    g: np.ndarray | None = None,
-    h: np.ndarray | None = None,
-) -> tuple[float | None, float]:
-    leaf_weight = leafweights.calc_leaf_weight(y, measure_name, growth_params, g=g, h=h)
-
-    yhat = leaf_weight * np.ones_like(y)
-    score = scoring.calc_split_score(
-        measure_name,
-        y,
-        np.ones_like(y, dtype=bool),
-        yhat=yhat,
-        g=g,
-        h=h,
-        growth_params=growth_params,
-    )
-
-    return leaf_weight, score
-
-
-def select_arrays_for_child_node(
-    go_left: bool,
-    best: BestSplit,
-    X: np.ndarray,
-    y: np.ndarray,
-    g: np.ndarray | None = None,
-    h: np.ndarray | None = None,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray | None, np.ndarray | None]:
-    mask = best.target_groups == go_left
-    _X = X[mask, :]
-    _y = y[mask]
-    _g = g[mask] if g is not None else None
-    _h = h[mask] if h is not None else None
-    return _X, _y, _g, _h
-
-
-def grow_tree(
-    X: np.ndarray,
-    y: np.ndarray,
-    measure_name: MetricNames,
-    growth_params: utils.TreeGrowthParameters,
-    parent_node: Node | None = None,
-    depth: int = 0,
-    g: np.ndarray | None = None,
-    h: np.ndarray | None = None,
-    random_state: int = 42,
-    **kwargs,
-) -> Node:
-    """Implementation of the Classification And Regression Tree (CART) algorithm
-
-    Args:
-        X (np.ndarray): Input feature values to do thresholding on.
-        y (np.ndarray): Target values.
-        measure_name (str): Values indicating which functions in scoring.SplitScoreMetrics and leafweights.LeafWeightSchemes to call.
-        parent_node (Node, optional): Parent node in tree. Defaults to None.
-        depth (int, optional): Current tree depth. Defaults to 0.
-        growth_params (utils.TreeGrowthParameters, optional): Parameters controlling tree growth. Defaults to None.
-        g (np.ndarray, optional): Boosting and loss specific precomputed 1st order derivative dloss/dyhat. Defaults to None.
-        h (np.ndarray, optional): Boosting and loss specific precomputed 2nd order derivative d^2loss/dyhat^2. Defaults to None.
-
-    Raises:
-        ValueError: Fails if parent node passes an empty y array.
-
-    Returns:
-        Node: Tree node with leaf weight, node score and potential child nodes.
-
-    Note:
-    Currently measure_name controls how the split score and the leaf weights are computed.
-
-    But only the decision tree algorithm directly uses y for that and can predict y using the leaf weight values directly.
-
-    For the boosting algorithms g and h are used to compute split score and leaf weights. Their leaf weights
-    sometimes also need post-processing, e.g. for binary classification. Computation of g and h and post-processing is not
-    done here but in the respective class implementations of the algorithms.
-    """
-
-    n_obs = len(y)
-    if n_obs == 0:
-        raise ValueError(
-            f"Something went wrong. {parent_node=} handed down an empty set of data points."
-        )
-
-    is_baselevel, reason = check_is_baselevel(
-        y, depth, max_depth=growth_params.max_depth
-    )
-    if parent_node is None:
-        scoring.reset_incrementing_score()
-
-    # compute leaf weight (for prediction) and node score (for split gain check)
-    leaf_weight, score = calc_leaf_weight_and_split_score(
-        y, measure_name, growth_params, g, h
-    )
-
-    if is_baselevel:  # end of the line buddy
-        return Node(
-            prediction=leaf_weight,
-            measure=SplitScore(measure_name, value=score),
-            n_obs=n_obs,
-            reason=reason,
-            depth=depth,
-        )
-
-    # find best split
-    rng = np.random.RandomState(random_state)
-
-    best = find_best_split(
-        X, y, measure_name, g=g, h=h, growth_params=growth_params, rng=rng
-    )
-
-    # check if improvement due to split is below minimum requirement
-    is_not_sensible_split, gain = check_if_split_sensible(
-        best, parent_node, growth_params
-    )
-
-    if is_not_sensible_split:
-        reason = f"gain due split ({gain=}) lower than {growth_params.min_improvement=} or all data points assigned to one side (is left {best.target_groups.mean()=:.2%})"
-        leaf_node = Node(
-            prediction=leaf_weight,
-            measure=SplitScore(measure_name, value=score),
-            n_obs=n_obs,
-            reason=reason,
-            depth=depth,
-        )
-        return leaf_node
-
-    # create new parent node for subsequent child nodes
-    new_node = Node(
-        array_column=best.column,
-        threshold=best.threshold,
-        prediction=leaf_weight,
-        default_is_left=best.default_is_left,
-        measure=SplitScore(measure_name, best.score),
-        n_obs=n_obs,
-        reason="",
-        depth=depth,
-    )
-    random_state_left, random_state_right = rng.randint(0, 2**32, size=2)
-
-    # descend left
-    _X, _y, _g, _h = select_arrays_for_child_node(True, best, X, y, g, h)
-    new_node.left = grow_tree(
-        _X,
-        _y,
-        measure_name=measure_name,
-        growth_params=growth_params,
-        parent_node=new_node,
-        depth=depth + 1,
-        g=_g,
-        h=_h,
-        random_state=random_state_left,
-    )
-
-    # descend right
-    _X, _y, _g, _h = select_arrays_for_child_node(False, best, X, y, g, h)
-    new_node.right = grow_tree(
-        _X,
-        _y,
-        measure_name=measure_name,
-        growth_params=growth_params,
-        parent_node=new_node,
-        depth=depth + 1,
-        g=_g,
-        h=_h,
-        random_state=random_state_right,
-    )
-
-    return new_node
-
-
-def find_leaf_node(node: Node, x: np.ndarray) -> Node:
-    "Traverses tree to find the leaf corresponding to x"
-
-    if node.is_leaf:
-        return node
-
-    is_missing = np.isnan(x[node.array_column])
-    if is_missing:
-        go_left = node.default_is_left
-        if go_left is None:
-            raise ValueError(
-                f"{x[node.array_column]=} is missing but was not observed as a feature that can be missing during training."
-            )
-    else:
-        go_left = x[node.array_column] < node.threshold
-
-    if go_left:
-        if node.left is not None:
-            node = find_leaf_node(node.left, x)
-        else:
-            raise ValueError(f"Oddly tried to access node.left even though it is None.")
-    else:
-        if node.right is not None:
-            node = find_leaf_node(node.right, x)
-        else:
-            raise ValueError(
-                f"Oddly tried to access node.right even though it is None."
-            )
-
-    return node
-
-
-def predict_with_tree(tree: Node, X: np.ndarray) -> np.ndarray:
-    "Traverse a previously built tree to make one prediction per row in X"
-    if not isinstance(tree, Node):
-        raise ValueError(
-            f"Passed `tree` needs to be an instantiation of Node, got {tree=}"
-        )
-    n_obs = len(X)
-    predictions = []
-
-    for i in range(n_obs):
-        leaf_node = find_leaf_node(tree, X[i, :])
-
-        predictions.append(leaf_node.prediction)
-
-    predictions = np.array(predictions)
-    return predictions
-
-
-class DecisionTreeTemplate(base.BaseEstimator):
-    """Template for DecisionTree classes
-
-    Based on: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator
-    """
-
-    max_depth: int
-    measure_name: scoring.MetricNames
-    min_improvement: float
-    lam: float
-    frac_subsamples: float
-    frac_features: float
-    random_state: int
-    threshold_method: utils.ThresholdSelectionMethod
-    threshold_quantile: float
-    n_thresholds: int
-    column_method: utils.ColumnSelectionMethod
-    n_columns_to_try: int | None
-    ensure_all_finite: bool
-    tree_: Node
-
-    def __init__(
-        self,
-        measure_name: scoring.MetricNames,
-        max_depth: int = 2,
-        min_improvement: float = 0.0,
-        lam: float = 0.0,
-        frac_subsamples: float = 1.0,
-        frac_features: float = 1.0,
-        threshold_method: utils.ThresholdSelectionMethod = utils.ThresholdSelectionMethod.bruteforce,
-        threshold_quantile: float = 0.1,
-        n_thresholds: int = 100,
-        column_method: utils.ColumnSelectionMethod = utils.ColumnSelectionMethod.ascending,
-        n_columns_to_try: int | None = None,
-        random_state: int = 42,
-        ensure_all_finite: bool = True,
-    ) -> None:
-        self.max_depth = max_depth
-        self.measure_name = measure_name
-        self.min_improvement = min_improvement
-        self.lam = lam
-        self.frac_subsamples = frac_subsamples
-        self.frac_features = frac_features
-        self.random_state = random_state
-        self.threshold_method = threshold_method
-        self.threshold_quantile = threshold_quantile
-        self.n_thresholds = n_thresholds
-        self.column_method = column_method
-        self.n_columns_to_try = n_columns_to_try
-        self.ensure_all_finite = ensure_all_finite
-
-    def _organize_growth_parameters(self):
-        self.growth_params_ = utils.TreeGrowthParameters(
-            max_depth=self.max_depth,
-            min_improvement=self.min_improvement,
-            lam=-abs(self.lam),
-            frac_subsamples=float(self.frac_subsamples),
-            frac_features=float(self.frac_features),
-            random_state=int(self.random_state),
-            threshold_params=utils.ThresholdSelectionParameters(
-                method=self.threshold_method,
-                quantile=self.threshold_quantile,
-                n_thresholds=self.n_thresholds,
-                random_state=int(self.random_state),
-            ),
-            column_params=utils.ColumnSelectionParameters(
-                method=self.column_method,
-                n_trials=self.n_columns_to_try,
-            ),
-        )
-
-    def _select_samples_and_features(
-        self, X: np.ndarray, y: np.ndarray
-    ) -> T.Tuple[np.ndarray, np.ndarray, np.ndarray]:
-        "Sub-samples rows and columns from X and y"
-        if not hasattr(self, "growth_params_"):
-            raise ValueError(f"Try calling `fit` first.")
-
-        ix = np.arange(len(X))
-        rng = np.random.RandomState(self.growth_params_.random_state)
-        if self.growth_params_.frac_subsamples < 1.0:
-            n_samples = int(self.growth_params_.frac_subsamples * len(X))
-            ix_samples = rng.choice(ix, size=n_samples, replace=False)
-        else:
-            ix_samples = ix
-
-        if self.frac_features < 1.0:
-            n_columns = int(X.shape[1] * self.frac_features)
-            ix_features = rng.choice(
-                np.arange(X.shape[1]),
-                size=n_columns,
-                replace=False,
-            )
-        else:
-            ix_features = np.arange(X.shape[1])
-
-        _X = X[ix_samples, :]
-        _X = _X[:, ix_features]
-
-        _y = y[ix_samples]
-        return _X, _y, ix_features
-
-    def _select_features(self, X: np.ndarray, ix_features: np.ndarray) -> np.ndarray:
-        return X[:, ix_features]
-
-    def fit(
-        self,
-        X: np.ndarray,
-        y: np.ndarray,
-    ) -> "DecisionTreeTemplate":
-        raise NotImplementedError()
-
-    def predict(self, X: np.ndarray) -> np.ndarray:
-        raise NotImplementedError()
-
-
-class DecisionTreeRegressor(base.RegressorMixin, DecisionTreeTemplate):
-    """DecisionTreeRegressor
-
-    Based on: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator
-    """
-
-    def __init__(
-        self,
-        measure_name: MetricNames = MetricNames.variance,
-        max_depth: int = 2,
-        min_improvement: float = 0.0,
-        lam: float = 0.0,
-        frac_subsamples: float = 1.0,
-        frac_features: float = 1.0,
-        threshold_method: utils.ThresholdSelectionMethod = utils.ThresholdSelectionMethod.bruteforce,
-        threshold_quantile: float = 0.1,
-        n_thresholds: int = 100,
-        column_method: utils.ColumnSelectionMethod = utils.ColumnSelectionMethod.ascending,
-        n_columns_to_try: int | None = None,
-        random_state: int = 42,
-        ensure_all_finite: bool = True,
-    ) -> None:
-        super().__init__(
-            measure_name=measure_name,
-            max_depth=max_depth,
-            min_improvement=min_improvement,
-            lam=lam,
-            frac_subsamples=frac_subsamples,
-            frac_features=frac_features,
-            threshold_method=threshold_method,
-            threshold_quantile=threshold_quantile,
-            n_thresholds=n_thresholds,
-            column_method=column_method,
-            n_columns_to_try=n_columns_to_try,
-            random_state=random_state,
-            ensure_all_finite=ensure_all_finite,
-        )
-
-    def fit(
-        self,
-        X: np.ndarray,
-        y: np.ndarray,
-        **kwargs,
-    ) -> "DecisionTreeRegressor":
-        self._organize_growth_parameters()
-
-        X, y = validate_data(self, X, y, ensure_all_finite=False)
-
-        _X, _y, self.ix_features_ = self._select_samples_and_features(X, y)
-
-        self.tree_ = grow_tree(
-            _X,
-            _y,
-            measure_name=self.measure_name,
-            growth_params=self.growth_params_,
-            random_state=self.random_state,
-            **kwargs,
-        )
-
-        return self
-
-    def predict(self, X: np.ndarray) -> np.ndarray:
-        check_is_fitted(self, ("tree_", "growth_params_"))
-
-        X = validate_data(self, X, reset=False, ensure_all_finite=False)
-
-        _X = self._select_features(X, self.ix_features_)
-
-        y = predict_with_tree(self.tree_, _X)
-
-        return y
-
-
-class DecisionTreeClassifier(base.ClassifierMixin, DecisionTreeTemplate):
-    """DecisionTreeClassifier
-
-    Based on: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator
-    """
-
-    def __init__(
-        self,
-        measure_name: MetricNames = MetricNames.gini,
-        max_depth: int = 2,
-        min_improvement: float = 0.0,
-        lam: float = 0.0,
-        frac_subsamples: float = 1.0,
-        frac_features: float = 1.0,
-        threshold_method: utils.ThresholdSelectionMethod = utils.ThresholdSelectionMethod.bruteforce,
-        threshold_quantile: float = 0.1,
-        n_thresholds: int = 100,
-        column_method: utils.ColumnSelectionMethod = utils.ColumnSelectionMethod.ascending,
-        n_columns_to_try: int | None = None,
-        random_state: int = 42,
-        ensure_all_finite: bool = True,
-    ) -> None:
-        super().__init__(
-            measure_name=measure_name,
-            max_depth=max_depth,
-            min_improvement=min_improvement,
-            lam=lam,
-            frac_subsamples=frac_subsamples,
-            frac_features=frac_features,
-            threshold_method=threshold_method,
-            threshold_quantile=threshold_quantile,
-            n_thresholds=n_thresholds,
-            column_method=column_method,
-            n_columns_to_try=n_columns_to_try,
-            random_state=random_state,
-        )
-        self.ensure_all_finite = ensure_all_finite
-
-    def _more_tags(self) -> T.Dict[str, bool]:
-        """Describes to scikit-learn parametrize_with_checks the scope of this class
-
-        Reference: https://scikit-learn.org/stable/developers/develop.html#estimator-tags
-        """
-        return {"binary_only": True}
-
-    def __sklearn_tags__(self):
-        # https://scikit-learn.org/stable/developers/develop.html
-        tags = super().__sklearn_tags__()  # type: ignore
-        tags.classifier_tags.multi_class = False
-        return tags
-
-    def fit(
-        self,
-        X: np.ndarray,
-        y: np.ndarray,
-    ) -> "DecisionTreeClassifier":
-        X, y = validate_data(self, X, y, ensure_all_finite=False)
-
-        check_classification_targets(y)
-
-        y_type = type_of_target(y, input_name="y", raise_unknown=True)  # type: ignore
-        if y_type != "binary":
-            raise ValueError(
-                "Only binary classification is supported. The type of the target "
-                f"is {y_type}."
-            )
-
-        if len(np.unique(y)) == 1:
-            raise ValueError("Cannot train with only one class present")
-
-        self._organize_growth_parameters()
-
-        self.classes_, y = np.unique(y, return_inverse=True)
-
-        _X, _y, self.ix_features_ = self._select_samples_and_features(X, y)
-
-        self.tree_ = grow_tree(
-            _X,
-            _y,
-            measure_name=self.measure_name,
-            growth_params=self.growth_params_,
-            random_state=self.random_state,
-        )
-
-        return self
-
-    def predict_proba(self, X: np.ndarray) -> np.ndarray:
-        check_is_fitted(self, ("tree_", "classes_", "growth_params_"))
-        X = validate_data(self, X, reset=False, ensure_all_finite=False)
-
-        _X = self._select_features(X, self.ix_features_)
-
-        proba = predict_with_tree(self.tree_, _X)
-        proba = np.array([1 - proba, proba]).T
-        return proba
-
-    def predict(self, X: np.ndarray) -> np.ndarray:
-        proba = self.predict_proba(X)
-        ix = np.argmax(proba, axis=1)
-        y = self.classes_[ix]
-
-        return y
-
-
-def walk_tree(
-    decision_tree: Node,
-    tree: Tree,
-    parent: Node | None = None,
-    is_left: bool | None = None,
-):
-    arrow = (
-        ""
-        if parent is None
-        else f"[magenta](< {parent.threshold:.3f})[/magenta]"
-        if is_left
-        else f"[magenta](>= {parent.threshold:.3f})[/magenta]"
-    )
-
-    if decision_tree.is_leaf:  # base cases
-        branch = tree.add(
-            f"{arrow} 🍁 # obs: [cyan]{decision_tree.n_obs}[/cyan], value: [green]{decision_tree.prediction:.3f}[/green], leaf reason '{decision_tree.reason}'"
-        )
-        return None
-    else:
-        branch = tree.add(
-            f"{arrow} col idx: {decision_tree.array_column}, threshold: [magenta]{decision_tree.threshold:.3f}[/magenta]"
-        )
-
-        if decision_tree.left is not None:  # go left
-            walk_tree(decision_tree.left, branch, decision_tree, True)
-
-        if decision_tree.right is not None:  # go right
-            walk_tree(decision_tree.right, branch, decision_tree, False)
-
-
-def show_tree(decision_tree: DecisionTreeTemplate):
-    tree = Tree(f"Represenation of 🌲 ({decision_tree})")
-    walk_tree(decision_tree.tree_, tree)
-    rprint(tree)
diff --git a/src/random_tree_models/decisiontree/__init__.py b/src/random_tree_models/decisiontree/__init__.py
new file mode 100644
index 0000000..a33a9a4
--- /dev/null
+++ b/src/random_tree_models/decisiontree/__init__.py
@@ -0,0 +1,11 @@
+from .estimators import (
+    DecisionTreeClassifier as DecisionTreeClassifier,
+)
+from .estimators import (
+    DecisionTreeRegressor as DecisionTreeRegressor,
+)
+from .estimators import (
+    DecisionTreeTemplate as DecisionTreeTemplate,
+)
+from .predict import find_leaf_node as find_leaf_node
+from .train import grow_tree as grow_tree
diff --git a/src/random_tree_models/decisiontree/estimators.py b/src/random_tree_models/decisiontree/estimators.py
new file mode 100644
index 0000000..87c8b2c
--- /dev/null
+++ b/src/random_tree_models/decisiontree/estimators.py
@@ -0,0 +1,305 @@
+import typing as T
+
+import numpy as np
+import sklearn.base as base
+from sklearn.utils.multiclass import check_classification_targets, type_of_target
+from sklearn.utils.validation import check_is_fitted, validate_data  # type: ignore
+
+import random_tree_models.params
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.predict import predict_with_tree
+from random_tree_models.decisiontree.train import grow_tree
+from random_tree_models.params import MetricNames
+
+
+class DecisionTreeTemplate(base.BaseEstimator):
+    """Template for DecisionTree classes
+
+    Based on: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator
+    """
+
+    max_depth: int
+    measure_name: random_tree_models.params.MetricNames
+    min_improvement: float
+    lam: float
+    frac_subsamples: float
+    frac_features: float
+    random_state: int
+    threshold_method: random_tree_models.params.ThresholdSelectionMethod
+    threshold_quantile: float
+    n_thresholds: int
+    column_method: random_tree_models.params.ColumnSelectionMethod
+    n_columns_to_try: int | None
+    ensure_all_finite: bool
+    tree_: Node
+
+    def __init__(
+        self,
+        measure_name: random_tree_models.params.MetricNames,
+        max_depth: int = 2,
+        min_improvement: float = 0.0,
+        lam: float = 0.0,
+        frac_subsamples: float = 1.0,
+        frac_features: float = 1.0,
+        threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce,
+        threshold_quantile: float = 0.1,
+        n_thresholds: int = 100,
+        column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending,
+        n_columns_to_try: int | None = None,
+        random_state: int = 42,
+        ensure_all_finite: bool = True,
+    ) -> None:
+        self.max_depth = max_depth
+        self.measure_name = measure_name
+        self.min_improvement = min_improvement
+        self.lam = lam
+        self.frac_subsamples = frac_subsamples
+        self.frac_features = frac_features
+        self.random_state = random_state
+        self.threshold_method = threshold_method
+        self.threshold_quantile = threshold_quantile
+        self.n_thresholds = n_thresholds
+        self.column_method = column_method
+        self.n_columns_to_try = n_columns_to_try
+        self.ensure_all_finite = ensure_all_finite
+
+    def _organize_growth_parameters(self):
+        self.growth_params_ = random_tree_models.params.TreeGrowthParameters(
+            max_depth=self.max_depth,
+            min_improvement=self.min_improvement,
+            lam=-abs(self.lam),
+            frac_subsamples=float(self.frac_subsamples),
+            frac_features=float(self.frac_features),
+            random_state=int(self.random_state),
+            threshold_params=random_tree_models.params.ThresholdSelectionParameters(
+                method=self.threshold_method,
+                quantile=self.threshold_quantile,
+                n_thresholds=self.n_thresholds,
+                random_state=int(self.random_state),
+            ),
+            column_params=random_tree_models.params.ColumnSelectionParameters(
+                method=self.column_method,
+                n_trials=self.n_columns_to_try,
+            ),
+        )
+
+    def _select_samples_and_features(
+        self, X: np.ndarray, y: np.ndarray
+    ) -> T.Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        "Sub-samples rows and columns from X and y"
+        if not hasattr(self, "growth_params_"):
+            raise ValueError(f"Try calling `fit` first.")
+
+        ix = np.arange(len(X))
+        rng = np.random.RandomState(self.growth_params_.random_state)
+        if self.growth_params_.frac_subsamples < 1.0:
+            n_samples = int(self.growth_params_.frac_subsamples * len(X))
+            ix_samples = rng.choice(ix, size=n_samples, replace=False)
+        else:
+            ix_samples = ix
+
+        if self.frac_features < 1.0:
+            n_columns = int(X.shape[1] * self.frac_features)
+            ix_features = rng.choice(
+                np.arange(X.shape[1]),
+                size=n_columns,
+                replace=False,
+            )
+        else:
+            ix_features = np.arange(X.shape[1])
+
+        _X = X[ix_samples, :]
+        _X = _X[:, ix_features]
+
+        _y = y[ix_samples]
+        return _X, _y, ix_features
+
+    def _select_features(self, X: np.ndarray, ix_features: np.ndarray) -> np.ndarray:
+        return X[:, ix_features]
+
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+    ) -> "DecisionTreeTemplate":
+        raise NotImplementedError()
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        raise NotImplementedError()
+
+
+class DecisionTreeRegressor(base.RegressorMixin, DecisionTreeTemplate):
+    """DecisionTreeRegressor
+
+    Based on: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator
+    """
+
+    def __init__(
+        self,
+        measure_name: MetricNames = MetricNames.variance,
+        max_depth: int = 2,
+        min_improvement: float = 0.0,
+        lam: float = 0.0,
+        frac_subsamples: float = 1.0,
+        frac_features: float = 1.0,
+        threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce,
+        threshold_quantile: float = 0.1,
+        n_thresholds: int = 100,
+        column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending,
+        n_columns_to_try: int | None = None,
+        random_state: int = 42,
+        ensure_all_finite: bool = True,
+    ) -> None:
+        super().__init__(
+            measure_name=measure_name,
+            max_depth=max_depth,
+            min_improvement=min_improvement,
+            lam=lam,
+            frac_subsamples=frac_subsamples,
+            frac_features=frac_features,
+            threshold_method=threshold_method,
+            threshold_quantile=threshold_quantile,
+            n_thresholds=n_thresholds,
+            column_method=column_method,
+            n_columns_to_try=n_columns_to_try,
+            random_state=random_state,
+            ensure_all_finite=ensure_all_finite,
+        )
+
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        **kwargs,
+    ) -> "DecisionTreeRegressor":
+        self._organize_growth_parameters()
+
+        X, y = validate_data(self, X, y, ensure_all_finite=False)
+
+        _X, _y, self.ix_features_ = self._select_samples_and_features(X, y)
+
+        self.tree_ = grow_tree(
+            _X,
+            _y,
+            measure_name=self.measure_name,
+            growth_params=self.growth_params_,
+            random_state=self.random_state,
+            **kwargs,
+        )
+
+        return self
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        check_is_fitted(self, ("tree_", "growth_params_"))
+
+        X = validate_data(self, X, reset=False, ensure_all_finite=False)
+
+        _X = self._select_features(X, self.ix_features_)
+
+        y = predict_with_tree(self.tree_, _X)
+
+        return y
+
+
+class DecisionTreeClassifier(base.ClassifierMixin, DecisionTreeTemplate):
+    """DecisionTreeClassifier
+
+    Based on: https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator
+    """
+
+    def __init__(
+        self,
+        measure_name: MetricNames = MetricNames.gini,
+        max_depth: int = 2,
+        min_improvement: float = 0.0,
+        lam: float = 0.0,
+        frac_subsamples: float = 1.0,
+        frac_features: float = 1.0,
+        threshold_method: random_tree_models.params.ThresholdSelectionMethod = random_tree_models.params.ThresholdSelectionMethod.bruteforce,
+        threshold_quantile: float = 0.1,
+        n_thresholds: int = 100,
+        column_method: random_tree_models.params.ColumnSelectionMethod = random_tree_models.params.ColumnSelectionMethod.ascending,
+        n_columns_to_try: int | None = None,
+        random_state: int = 42,
+        ensure_all_finite: bool = True,
+    ) -> None:
+        super().__init__(
+            measure_name=measure_name,
+            max_depth=max_depth,
+            min_improvement=min_improvement,
+            lam=lam,
+            frac_subsamples=frac_subsamples,
+            frac_features=frac_features,
+            threshold_method=threshold_method,
+            threshold_quantile=threshold_quantile,
+            n_thresholds=n_thresholds,
+            column_method=column_method,
+            n_columns_to_try=n_columns_to_try,
+            random_state=random_state,
+        )
+        self.ensure_all_finite = ensure_all_finite
+
+    def _more_tags(self) -> T.Dict[str, bool]:
+        """Describes to scikit-learn parametrize_with_checks the scope of this class
+
+        Reference: https://scikit-learn.org/stable/developers/develop.html#estimator-tags
+        """
+        return {"binary_only": True}
+
+    def __sklearn_tags__(self):
+        # https://scikit-learn.org/stable/developers/develop.html
+        tags = super().__sklearn_tags__()  # type: ignore
+        tags.classifier_tags.multi_class = False
+        return tags
+
+    def fit(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+    ) -> "DecisionTreeClassifier":
+        X, y = validate_data(self, X, y, ensure_all_finite=False)
+
+        check_classification_targets(y)
+
+        y_type = type_of_target(y, input_name="y", raise_unknown=True)  # type: ignore
+        if y_type != "binary":
+            raise ValueError(
+                "Only binary classification is supported. The type of the target "
+                f"is {y_type}."
+            )
+
+        if len(np.unique(y)) == 1:
+            raise ValueError("Cannot train with only one class present")
+
+        self._organize_growth_parameters()
+
+        self.classes_, y = np.unique(y, return_inverse=True)
+
+        _X, _y, self.ix_features_ = self._select_samples_and_features(X, y)
+
+        self.tree_ = grow_tree(
+            _X,
+            _y,
+            measure_name=self.measure_name,
+            growth_params=self.growth_params_,
+            random_state=self.random_state,
+        )
+
+        return self
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        check_is_fitted(self, ("tree_", "classes_", "growth_params_"))
+        X = validate_data(self, X, reset=False, ensure_all_finite=False)
+
+        _X = self._select_features(X, self.ix_features_)
+
+        proba = predict_with_tree(self.tree_, _X)
+        proba = np.array([1 - proba, proba]).T
+        return proba
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        proba = self.predict_proba(X)
+        ix = np.argmax(proba, axis=1)
+        y = self.classes_[ix]
+
+        return y
diff --git a/src/random_tree_models/decisiontree/node.py b/src/random_tree_models/decisiontree/node.py
new file mode 100644
index 0000000..9489d14
--- /dev/null
+++ b/src/random_tree_models/decisiontree/node.py
@@ -0,0 +1,37 @@
+import uuid
+
+from pydantic import StrictInt, StrictStr
+from pydantic.dataclasses import dataclass
+
+from random_tree_models.decisiontree.split_objects import SplitScore
+
+
+@dataclass
+class Node:
+    """Decision node in a decision tree"""
+
+    # Stuff for making a decision
+    array_column: StrictInt | None = None  # index of the column to use
+    threshold: float | None = None  # threshold for decision
+    prediction: float | None = None  # value to use for predictions
+    default_is_left: bool | None = None  # default direction is x is nan
+
+    # decendants
+    right: "Node | None" = None  # right decendany of type Node
+    left: "Node | None" = None  # left decendant of type Node
+
+    # misc info
+    measure: SplitScore | None = None
+
+    n_obs: StrictInt | None = None  # number of observations in node
+    reason: StrictStr | None = None  # place for some comment
+
+    depth: StrictInt | None = None  # depth of the node
+
+    def __post_init__(self):
+        # unique identifier of the node
+        self.node_id = uuid.uuid4()
+
+    @property
+    def is_leaf(self) -> bool:
+        return self.left is None and self.right is None
diff --git a/src/random_tree_models/decisiontree/predict.py b/src/random_tree_models/decisiontree/predict.py
new file mode 100644
index 0000000..59fac01
--- /dev/null
+++ b/src/random_tree_models/decisiontree/predict.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+from random_tree_models.decisiontree.node import Node
+
+
+def find_leaf_node(node: Node, x: np.ndarray) -> Node:
+    "Traverses tree to find the leaf corresponding to x"
+
+    if node.is_leaf:
+        return node
+
+    is_missing = np.isnan(x[node.array_column])
+    if is_missing:
+        go_left = node.default_is_left
+        if go_left is None:
+            raise ValueError(
+                f"{x[node.array_column]=} is missing but was not observed as a feature that can be missing during training."
+            )
+    else:
+        go_left = x[node.array_column] < node.threshold
+
+    if go_left:
+        if node.left is not None:
+            node = find_leaf_node(node.left, x)
+        else:
+            raise ValueError(f"Oddly tried to access node.left even though it is None.")
+    else:
+        if node.right is not None:
+            node = find_leaf_node(node.right, x)
+        else:
+            raise ValueError(
+                f"Oddly tried to access node.right even though it is None."
+            )
+
+    return node
+
+
+def predict_with_tree(tree: Node, X: np.ndarray) -> np.ndarray:
+    "Traverse a previously built tree to make one prediction per row in X"
+    if not isinstance(tree, Node):
+        raise ValueError(
+            f"Passed `tree` needs to be an instantiation of Node, got {tree=}"
+        )
+    n_obs = len(X)
+    predictions = []
+
+    for i in range(n_obs):
+        leaf_node = find_leaf_node(tree, X[i, :])
+
+        predictions.append(leaf_node.prediction)
+
+    predictions = np.array(predictions)
+    return predictions
diff --git a/src/random_tree_models/decisiontree/split.py b/src/random_tree_models/decisiontree/split.py
new file mode 100644
index 0000000..e79c0df
--- /dev/null
+++ b/src/random_tree_models/decisiontree/split.py
@@ -0,0 +1,212 @@
+import typing as T
+
+import numpy as np
+
+import random_tree_models.params
+import random_tree_models.scoring as scoring
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.split_objects import BestSplit
+
+
+def select_thresholds(
+    feature_values: np.ndarray,
+    threshold_params: random_tree_models.params.ThresholdSelectionParameters,
+    rng: np.random.RandomState,
+) -> np.ndarray:
+    "Selects thresholds to use for splitting"
+
+    method = threshold_params.method
+    n_thresholds = threshold_params.n_thresholds
+    num_quantile_steps = threshold_params.num_quantile_steps
+
+    if method == random_tree_models.params.ThresholdSelectionMethod.bruteforce:
+        return feature_values[1:]
+    elif method == random_tree_models.params.ThresholdSelectionMethod.random:
+        if len(feature_values) - 1 <= n_thresholds:
+            return feature_values[1:]
+        else:
+            return rng.choice(
+                feature_values[1:],
+                size=(n_thresholds,),
+                replace=False,
+            )
+    elif method == random_tree_models.params.ThresholdSelectionMethod.quantile:
+        qs = np.linspace(0, 1, num_quantile_steps)
+        return np.quantile(feature_values[1:], qs)
+    elif method == random_tree_models.params.ThresholdSelectionMethod.uniform:
+        x = np.linspace(
+            feature_values.min(),
+            feature_values.max(),
+            n_thresholds + 2,
+        )
+        return rng.choice(x[1:], size=[1])
+    else:
+        raise NotImplementedError(f"Unknown threshold selection method: {method}")
+
+
+def get_thresholds_and_target_groups(
+    feature_values: np.ndarray,
+    threshold_params: random_tree_models.params.ThresholdSelectionParameters,
+    rng: np.random.RandomState,
+) -> T.Generator[T.Tuple[np.ndarray, np.ndarray, bool | None], None, None]:
+    "Creates a generator for split finding, returning the used threshold, the target groups and a bool indicating if the default direction is left"
+    is_missing = np.isnan(feature_values)
+    is_finite = np.logical_not(is_missing)
+    all_finite = is_finite.all()
+
+    if all_finite:
+        default_direction_is_left = None
+        thresholds = select_thresholds(feature_values, threshold_params, rng)
+
+        for threshold in thresholds:
+            target_groups = feature_values < threshold
+            yield (threshold, target_groups, default_direction_is_left)
+    else:
+        finite_feature_values = feature_values[is_finite]
+        thresholds = select_thresholds(finite_feature_values, threshold_params, rng)
+
+        for threshold in thresholds:
+            # default direction left - feature value <= threshold or missing  (i.e. missing are included left of the threshold)
+            target_groups = np.logical_or(feature_values < threshold, is_missing)
+            yield (threshold, target_groups, True)
+
+            # default direction right - feature value <= threshold and finite (i.e. missing are included right of the threshold)
+            target_groups = np.logical_and(feature_values < threshold, is_finite)
+            yield (threshold, target_groups, False)
+
+
+def get_column(
+    X: np.ndarray,
+    column_params: random_tree_models.params.ColumnSelectionParameters,
+    rng: np.random.RandomState,
+) -> list[int]:
+    # select column order to split on
+    method = column_params.method
+    n_columns_to_try = column_params.n_trials
+
+    columns = list(range(X.shape[1]))
+    if method == random_tree_models.params.ColumnSelectionMethod.ascending:
+        pass
+    elif method == random_tree_models.params.ColumnSelectionMethod.random:
+        columns = np.array(columns)
+        rng.shuffle(columns)
+        columns = columns.tolist()
+    elif method == random_tree_models.params.ColumnSelectionMethod.largest_delta:
+        deltas = X.max(axis=0) - X.min(axis=0)
+        weights = deltas / deltas.sum()
+        columns = np.array(columns)
+        columns = rng.choice(columns, p=weights, size=len(columns), replace=False)
+        columns = columns.tolist()
+    else:
+        raise NotImplementedError(
+            f"Unknown column selection method: {column_params.method}"
+        )
+    if n_columns_to_try is not None:
+        columns = columns[:n_columns_to_try]
+
+    return columns
+
+
+def find_best_split(
+    X: np.ndarray,
+    y: np.ndarray,
+    measure_name: str,
+    yhat: np.ndarray | None = None,
+    g: np.ndarray | None = None,
+    h: np.ndarray | None = None,
+    growth_params: random_tree_models.params.TreeGrowthParameters
+    | None = None,  # TODO: make required
+    rng: np.random.RandomState = np.random.RandomState(42),
+) -> BestSplit:
+    """Find the best split, detecting the "default direction" with missing data."""
+
+    if len(np.unique(y)) == 1:
+        raise ValueError(
+            f"Tried to find a split for homogenous y: {y[:3]} ... {y[-3:]}"
+        )
+
+    best = None  # this will be an BestSplit instance
+
+    if growth_params is None:
+        raise ValueError(f"{growth_params=} but is not allowed to be None")
+
+    for array_column in get_column(X, growth_params.column_params, rng):
+        feature_values = X[:, array_column]
+
+        for (
+            threshold,
+            target_groups,
+            default_is_left,
+        ) in get_thresholds_and_target_groups(
+            feature_values, growth_params.threshold_params, rng
+        ):
+            split_score = scoring.calc_split_score(
+                random_tree_models.params.MetricNames(measure_name),
+                y,
+                target_groups,
+                yhat=yhat,
+                g=g,
+                h=h,
+                growth_params=growth_params,
+            )
+
+            if best is None or split_score > best.score:
+                best = BestSplit(
+                    score=float(split_score),
+                    column=int(array_column),
+                    threshold=float(threshold),
+                    target_groups=target_groups,
+                    default_is_left=default_is_left,
+                )
+
+    if best is None:
+        raise ValueError(f"Something went wrong {best=} cannot be None.")
+    return best
+
+
+def check_if_split_sensible(
+    best: BestSplit,
+    parent_node: Node | None,
+    growth_params: random_tree_models.params.TreeGrowthParameters,
+) -> tuple[bool, float | None]:
+    "Verifies if split is sensible, considering score gain and left/right group sizes"
+    parent_is_none = parent_node is None
+    if parent_is_none:
+        return False, None
+
+    measure_is_none = parent_node.measure is None
+    if measure_is_none:
+        return False, None
+
+    value_is_none = parent_node.measure.value is None  # type: ignore
+    if value_is_none:
+        return False, None
+
+    # score gain
+    gain = best.score - parent_node.measure.value  # type: ignore
+    is_insufficient_gain = gain < growth_params.min_improvement
+
+    # left/right group assignment
+    all_on_one_side = bool(best.target_groups.all())
+    all_on_other_side = bool(np.logical_not(best.target_groups).all())
+    is_all_onesided = all_on_one_side or all_on_other_side
+
+    is_not_sensible = is_all_onesided or is_insufficient_gain
+
+    return is_not_sensible, gain
+
+
+def select_arrays_for_child_node(
+    go_left: bool,
+    best: BestSplit,
+    X: np.ndarray,
+    y: np.ndarray,
+    g: np.ndarray | None = None,
+    h: np.ndarray | None = None,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray | None, np.ndarray | None]:
+    mask = best.target_groups == go_left
+    _X = X[mask, :]
+    _y = y[mask]
+    _g = g[mask] if g is not None else None
+    _h = h[mask] if h is not None else None
+    return _X, _y, _g, _h
diff --git a/src/random_tree_models/decisiontree/split_objects.py b/src/random_tree_models/decisiontree/split_objects.py
new file mode 100644
index 0000000..e1e95c7
--- /dev/null
+++ b/src/random_tree_models/decisiontree/split_objects.py
@@ -0,0 +1,18 @@
+import numpy as np
+from pydantic import ConfigDict, Field, StrictBool, StrictFloat, StrictInt, StrictStr
+from pydantic.dataclasses import dataclass
+
+
+@dataclass(validate_on_init=True)
+class SplitScore:
+    name: StrictStr  # name of the score used
+    value: StrictFloat | None = None  # optimization value gini etc
+
+
+@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
+class BestSplit:
+    score: StrictFloat
+    column: StrictInt
+    threshold: StrictFloat
+    target_groups: np.ndarray = Field(default_factory=lambda: np.zeros(10))
+    default_is_left: StrictBool | None = None
diff --git a/src/random_tree_models/decisiontree/train.py b/src/random_tree_models/decisiontree/train.py
new file mode 100644
index 0000000..1f33cc1
--- /dev/null
+++ b/src/random_tree_models/decisiontree/train.py
@@ -0,0 +1,187 @@
+import typing as T
+
+import numpy as np
+
+import random_tree_models.leafweights as leafweights
+import random_tree_models.params
+import random_tree_models.scoring as scoring
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.split import (
+    check_if_split_sensible,
+    find_best_split,
+    select_arrays_for_child_node,
+)
+from random_tree_models.decisiontree.split_objects import SplitScore
+from random_tree_models.params import MetricNames
+
+
+def check_is_baselevel(y: np.ndarray, depth: int, max_depth: int) -> T.Tuple[bool, str]:
+    """Verifies if the tree traversal reached the baselevel / a leaf
+    * group homogeneous / cannot sensibly be splitted further
+    * no data in the group
+    * max depth reached
+    """
+    if max_depth is not None and depth >= max_depth:
+        return (True, "max depth reached")
+    elif len(np.unique(y)) == 1:
+        return (True, "homogenous group")
+    elif len(y) <= 1:
+        return (True, "<= 1 data point in group")
+    else:
+        return (False, "")
+
+
+def calc_leaf_weight_and_split_score(
+    y: np.ndarray,
+    measure_name: random_tree_models.params.MetricNames,
+    growth_params: random_tree_models.params.TreeGrowthParameters,
+    g: np.ndarray | None = None,
+    h: np.ndarray | None = None,
+) -> tuple[float | None, float]:
+    leaf_weight = leafweights.calc_leaf_weight(y, measure_name, growth_params, g=g, h=h)
+
+    yhat = leaf_weight * np.ones_like(y)
+    score = scoring.calc_split_score(
+        measure_name,
+        y,
+        np.ones_like(y, dtype=bool),
+        yhat=yhat,
+        g=g,
+        h=h,
+        growth_params=growth_params,
+    )
+
+    return leaf_weight, score
+
+
+def grow_tree(
+    X: np.ndarray,
+    y: np.ndarray,
+    measure_name: MetricNames,
+    growth_params: random_tree_models.params.TreeGrowthParameters,
+    parent_node: Node | None = None,
+    depth: int = 0,
+    g: np.ndarray | None = None,
+    h: np.ndarray | None = None,
+    random_state: int = 42,
+    **kwargs,
+) -> Node:
+    """Implementation of the Classification And Regression Tree (CART) algorithm
+
+    Args:
+        X (np.ndarray): Input feature values to do thresholding on.
+        y (np.ndarray): Target values.
+        measure_name (str): Values indicating which functions in scoring.SplitScoreMetrics and leafweights.LeafWeightSchemes to call.
+        parent_node (Node, optional): Parent node in tree. Defaults to None.
+        depth (int, optional): Current tree depth. Defaults to 0.
+        growth_params (utils.TreeGrowthParameters, optional): Parameters controlling tree growth. Defaults to None.
+        g (np.ndarray, optional): Boosting and loss specific precomputed 1st order derivative dloss/dyhat. Defaults to None.
+        h (np.ndarray, optional): Boosting and loss specific precomputed 2nd order derivative d^2loss/dyhat^2. Defaults to None.
+
+    Raises:
+        ValueError: Fails if parent node passes an empty y array.
+
+    Returns:
+        Node: Tree node with leaf weight, node score and potential child nodes.
+
+    Note:
+    Currently measure_name controls how the split score and the leaf weights are computed.
+
+    But only the decision tree algorithm directly uses y for that and can predict y using the leaf weight values directly.
+
+    For the boosting algorithms g and h are used to compute split score and leaf weights. Their leaf weights
+    sometimes also need post-processing, e.g. for binary classification. Computation of g and h and post-processing is not
+    done here but in the respective class implementations of the algorithms.
+    """
+
+    n_obs = len(y)
+    if n_obs == 0:
+        raise ValueError(
+            f"Something went wrong. {parent_node=} handed down an empty set of data points."
+        )
+
+    is_baselevel, reason = check_is_baselevel(
+        y, depth, max_depth=growth_params.max_depth
+    )
+    if parent_node is None:
+        scoring.reset_incrementing_score()
+
+    # compute leaf weight (for prediction) and node score (for split gain check)
+    leaf_weight, score = calc_leaf_weight_and_split_score(
+        y, measure_name, growth_params, g, h
+    )
+
+    if is_baselevel:  # end of the line buddy
+        return Node(
+            prediction=leaf_weight,
+            measure=SplitScore(measure_name, value=score),
+            n_obs=n_obs,
+            reason=reason,
+            depth=depth,
+        )
+
+    # find best split
+    rng = np.random.RandomState(random_state)
+
+    best = find_best_split(
+        X, y, measure_name, g=g, h=h, growth_params=growth_params, rng=rng
+    )
+
+    # check if improvement due to split is below minimum requirement
+    is_not_sensible_split, gain = check_if_split_sensible(
+        best, parent_node, growth_params
+    )
+
+    if is_not_sensible_split:
+        reason = f"gain due split ({gain=}) lower than {growth_params.min_improvement=} or all data points assigned to one side (is left {best.target_groups.mean()=:.2%})"
+        leaf_node = Node(
+            prediction=leaf_weight,
+            measure=SplitScore(measure_name, value=score),
+            n_obs=n_obs,
+            reason=reason,
+            depth=depth,
+        )
+        return leaf_node
+
+    # create new parent node for subsequent child nodes
+    new_node = Node(
+        array_column=best.column,
+        threshold=best.threshold,
+        prediction=leaf_weight,
+        default_is_left=best.default_is_left,
+        measure=SplitScore(measure_name, best.score),
+        n_obs=n_obs,
+        reason="",
+        depth=depth,
+    )
+    random_state_left, random_state_right = rng.randint(0, 2**32, size=2)
+
+    # descend left
+    _X, _y, _g, _h = select_arrays_for_child_node(True, best, X, y, g, h)
+    new_node.left = grow_tree(
+        _X,
+        _y,
+        measure_name=measure_name,
+        growth_params=growth_params,
+        parent_node=new_node,
+        depth=depth + 1,
+        g=_g,
+        h=_h,
+        random_state=random_state_left,
+    )
+
+    # descend right
+    _X, _y, _g, _h = select_arrays_for_child_node(False, best, X, y, g, h)
+    new_node.right = grow_tree(
+        _X,
+        _y,
+        measure_name=measure_name,
+        growth_params=growth_params,
+        parent_node=new_node,
+        depth=depth + 1,
+        g=_g,
+        h=_h,
+        random_state=random_state_right,
+    )
+
+    return new_node
diff --git a/src/random_tree_models/decisiontree/visualize.py b/src/random_tree_models/decisiontree/visualize.py
new file mode 100644
index 0000000..8d4aa60
--- /dev/null
+++ b/src/random_tree_models/decisiontree/visualize.py
@@ -0,0 +1,42 @@
+from rich import print as rprint
+from rich.tree import Tree
+
+from random_tree_models.decisiontree.estimators import DecisionTreeTemplate
+from random_tree_models.decisiontree.node import Node
+
+
+def walk_tree(
+    decision_tree: Node,
+    tree: Tree,
+    parent: Node | None = None,
+    is_left: bool | None = None,
+):
+    arrow = (
+        ""
+        if parent is None
+        else f"[magenta](< {parent.threshold:.3f})[/magenta]"
+        if is_left
+        else f"[magenta](>= {parent.threshold:.3f})[/magenta]"
+    )
+
+    if decision_tree.is_leaf:  # base cases
+        branch = tree.add(
+            f"{arrow} 🍁 # obs: [cyan]{decision_tree.n_obs}[/cyan], value: [green]{decision_tree.prediction:.3f}[/green], leaf reason '{decision_tree.reason}'"
+        )
+        return None
+    else:
+        branch = tree.add(
+            f"{arrow} col idx: {decision_tree.array_column}, threshold: [magenta]{decision_tree.threshold:.3f}[/magenta]"
+        )
+
+        if decision_tree.left is not None:  # go left
+            walk_tree(decision_tree.left, branch, decision_tree, True)
+
+        if decision_tree.right is not None:  # go right
+            walk_tree(decision_tree.right, branch, decision_tree, False)
+
+
+def show_tree(decision_tree: DecisionTreeTemplate):
+    tree = Tree(f"Represenation of 🌲 ({decision_tree})")
+    walk_tree(decision_tree.tree_, tree)
+    rprint(tree)
diff --git a/src/random_tree_models/extratrees.py b/src/random_tree_models/extratrees.py
index a76b97c..1a67dba 100644
--- a/src/random_tree_models/extratrees.py
+++ b/src/random_tree_models/extratrees.py
@@ -10,8 +10,8 @@
 )
 
 import random_tree_models.decisiontree as dtree
-import random_tree_models.utils as utils
-from random_tree_models.scoring import MetricNames
+import random_tree_models.params as utils
+from random_tree_models.params import MetricNames
 
 
 class ExtraTreesTemplate(base.BaseEstimator):
diff --git a/src/random_tree_models/gradientboostedtrees.py b/src/random_tree_models/gradientboostedtrees.py
index 35f3c98..7bbe872 100644
--- a/src/random_tree_models/gradientboostedtrees.py
+++ b/src/random_tree_models/gradientboostedtrees.py
@@ -14,7 +14,7 @@
 )
 
 import random_tree_models.decisiontree as dtree
-from random_tree_models.scoring import MetricNames
+from random_tree_models.params import MetricNames
 
 
 class GradientBoostedTreesTemplate(base.BaseEstimator):
diff --git a/src/random_tree_models/isolationforest.py b/src/random_tree_models/isolationforest.py
index 532044b..4be543d 100644
--- a/src/random_tree_models/isolationforest.py
+++ b/src/random_tree_models/isolationforest.py
@@ -5,14 +5,22 @@
 from sklearn import base
 from sklearn.utils.validation import check_is_fitted, validate_data  # type: ignore
 
-import random_tree_models.decisiontree as dtree
-from random_tree_models.scoring import MetricNames
-from random_tree_models.utils import ColumnSelectionMethod, ThresholdSelectionMethod
-
-
-def predict_with_isolationtree(tree: dtree.Node, X: np.ndarray) -> np.ndarray:
+from random_tree_models.decisiontree import (
+    DecisionTreeTemplate,
+    find_leaf_node,
+    grow_tree,
+)
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.params import (
+    ColumnSelectionMethod,
+    MetricNames,
+    ThresholdSelectionMethod,
+)
+
+
+def predict_with_isolationtree(tree: Node, X: np.ndarray) -> np.ndarray:
     "Traverse a previously built tree to make one prediction per row in X"
-    if not isinstance(tree, dtree.Node):
+    if not isinstance(tree, Node):
         raise ValueError(
             f"Passed `tree` needs to be an instantiation of Node, got {tree=}"
         )
@@ -20,13 +28,13 @@ def predict_with_isolationtree(tree: dtree.Node, X: np.ndarray) -> np.ndarray:
     predictions = np.zeros(X.shape[0], dtype=int)
 
     for i in range(n_obs):
-        leaf_node = dtree.find_leaf_node(tree, X[i, :])
+        leaf_node = find_leaf_node(tree, X[i, :])
         predictions[i] = leaf_node.depth
 
     return predictions
 
 
-class IsolationTree(base.OutlierMixin, dtree.DecisionTreeTemplate):
+class IsolationTree(base.OutlierMixin, DecisionTreeTemplate):
     """Isolation tree
 
     Liu et al. 2006, Isolation Forest, algorithm 2
@@ -60,7 +68,7 @@ def fit(
 
         _X, _y, self.ix_features_ = self._select_samples_and_features(X, dummy_y)
 
-        self.tree_ = dtree.grow_tree(
+        self.tree_ = grow_tree(
             _X,
             _y,
             measure_name=self.measure_name,
diff --git a/src/random_tree_models/leafweights.py b/src/random_tree_models/leafweights.py
index 165a616..de48e39 100644
--- a/src/random_tree_models/leafweights.py
+++ b/src/random_tree_models/leafweights.py
@@ -1,8 +1,7 @@
 import numpy as np
 
-import random_tree_models.utils as utils
-from random_tree_models import scoring
-from random_tree_models.scoring import MetricNames
+import random_tree_models.params as utils
+from random_tree_models.params import MetricNames
 
 
 def leaf_weight_mean(y: np.ndarray) -> float:
@@ -32,7 +31,7 @@ def leaf_weight_xgboost(
 
 def calc_leaf_weight(
     y: np.ndarray,
-    measure_name: scoring.MetricNames,
+    measure_name: utils.MetricNames,
     growth_params: utils.TreeGrowthParameters,
     g: np.ndarray | None = None,
     h: np.ndarray | None = None,
diff --git a/src/random_tree_models/params.py b/src/random_tree_models/params.py
new file mode 100644
index 0000000..b261bb8
--- /dev/null
+++ b/src/random_tree_models/params.py
@@ -0,0 +1,104 @@
+from enum import StrEnum, auto
+from typing import Any
+
+from pydantic import BaseModel, StrictFloat, StrictInt
+
+
+class ColumnSelectionMethod(StrEnum):
+    ascending = "ascending"
+    largest_delta = "largest_delta"
+    random = "random"
+
+
+class ThresholdSelectionMethod(StrEnum):
+    bruteforce = "bruteforce"
+    quantile = "quantile"
+    random = "random"
+    uniform = "uniform"
+
+
+class ThresholdSelectionParameters(BaseModel):
+    method: ThresholdSelectionMethod
+    quantile: StrictFloat = 0.1
+    random_state: StrictInt = 0
+    n_thresholds: StrictInt = 100
+    num_quantile_steps: StrictInt = -1
+
+    def model_post_init(self, context: Any):
+        # verify method
+        # expected = ThresholdSelectionMethod.__members__.keys()
+        # is_okay = self.method in expected
+        # if not is_okay:
+        #     raise ValueError(
+        #         f"passed value for method ('{self.method}') not one of {expected}"
+        #     )
+
+        # verify quantile
+        is_okay = 0.0 < self.quantile < 1.0
+        if not is_okay:
+            raise ValueError(f"{self.quantile=} not in (0, 1)")
+        is_okay = 1 / self.quantile % 1 == 0
+        if not is_okay:
+            raise ValueError(f"{self.quantile=} not a valid quantile")
+
+        # verify random_state
+        is_okay = self.random_state >= 0
+        if not is_okay:
+            raise ValueError(f"{self.random_state=} not in [0, inf)")
+
+        # verify n_thresholds valid int
+        is_okay = self.n_thresholds > 0
+        if not is_okay:
+            raise ValueError(f"{self.n_thresholds=} not > 0")
+
+        # set dq
+        self.num_quantile_steps = int(1 / self.quantile) + 1
+
+
+class ColumnSelectionParameters(BaseModel):
+    method: ColumnSelectionMethod
+    n_trials: StrictInt | None = None
+
+
+class TreeGrowthParameters(BaseModel):
+    max_depth: StrictInt
+    min_improvement: StrictFloat = 0.0
+    # xgboost lambda - multiplied with sum of squares of leaf weights
+    # see Chen et al. 2016 equation 2
+    lam: StrictFloat = 0.0
+    frac_subsamples: StrictFloat = 1.0
+    frac_features: StrictFloat = 1.0
+    random_state: StrictInt = 0
+    threshold_params: ThresholdSelectionParameters = ThresholdSelectionParameters(
+        method=ThresholdSelectionMethod.bruteforce,
+        quantile=0.1,
+        random_state=0,
+        n_thresholds=100,
+    )
+    column_params: ColumnSelectionParameters = ColumnSelectionParameters(
+        method=ColumnSelectionMethod.ascending, n_trials=None
+    )
+
+    def model_post_init(self, context: Any):
+        # verify frac_subsamples
+        is_okay = 0.0 < self.frac_subsamples <= 1.0
+        if not is_okay:
+            raise ValueError(f"{self.frac_subsamples=} not in (0, 1]")
+
+        # verify frac_features
+        is_okay = 0.0 < self.frac_features <= 1.0
+        if not is_okay:
+            raise ValueError(f"{self.frac_features=} not in (0, 1]")
+
+
+class MetricNames(StrEnum):
+    variance = auto()
+    entropy = auto()
+    entropy_rs = auto()
+    gini = auto()
+    gini_rs = auto()
+    # variance for split score because Friedman et al. 2001 in Algorithm 1
+    # step 4 minimize the squared error between actual and predicted dloss/dyhat
+    friedman_binary_classification = auto()
+    xgboost = auto()
+    incrementing = auto()
diff --git a/src/random_tree_models/randomforest.py b/src/random_tree_models/randomforest.py
index 0bdc356..dbe0164 100644
--- a/src/random_tree_models/randomforest.py
+++ b/src/random_tree_models/randomforest.py
@@ -10,7 +10,7 @@
 )
 
 import random_tree_models.decisiontree as dtree
-from random_tree_models.scoring import MetricNames
+from random_tree_models.params import MetricNames
 
 
 class RandomForestTemplate(base.BaseEstimator):
diff --git a/src/random_tree_models/scoring.py b/src/random_tree_models/scoring.py
index 39c7fc2..864f83f 100644
--- a/src/random_tree_models/scoring.py
+++ b/src/random_tree_models/scoring.py
@@ -1,9 +1,8 @@
-from enum import StrEnum, auto
-
 import numpy as np
 
-import random_tree_models.utils as utils
+import random_tree_models.params as utils
 from random_tree_models import rs_entropy, rs_gini_impurity
+from random_tree_models.params import MetricNames
 
 
 def check_y_and_target_groups(y: np.ndarray, target_groups: np.ndarray | None = None):
@@ -216,19 +215,6 @@ def reset_incrementing_score():
     INC_SCORE = 0
 
 
-class MetricNames(StrEnum):
-    variance = auto()
-    entropy = auto()
-    entropy_rs = auto()
-    gini = auto()
-    gini_rs = auto()
-    # variance for split score because Friedman et al. 2001 in Algorithm 1
-    # step 4 minimize the squared error between actual and predicted dloss/dyhat
-    friedman_binary_classification = auto()
-    xgboost = auto()
-    incrementing = auto()
-
-
 def calc_split_score(
     metric: MetricNames,
     y: np.ndarray,
diff --git a/src/random_tree_models/utils.py b/src/random_tree_models/utils.py
index 0efe452..313a9a3 100644
--- a/src/random_tree_models/utils.py
+++ b/src/random_tree_models/utils.py
@@ -1,98 +1,8 @@
 import logging
-from enum import StrEnum
-from typing import Any
 
-from pydantic import BaseModel, StrictFloat, StrictInt
 from rich.logging import RichHandler
 
 
-class ColumnSelectionMethod(StrEnum):
-    ascending = "ascending"
-    largest_delta = "largest_delta"
-    random = "random"
-
-
-class ThresholdSelectionMethod(StrEnum):
-    bruteforce = "bruteforce"
-    quantile = "quantile"
-    random = "random"
-    uniform = "uniform"
-
-
-class ThresholdSelectionParameters(BaseModel):
-    method: ThresholdSelectionMethod
-    quantile: StrictFloat = 0.1
-    random_state: StrictInt = 0
-    n_thresholds: StrictInt = 100
-    num_quantile_steps: StrictInt = -1
-
-    def model_post_init(self, context: Any):
-        # verify method
-        # expected = ThresholdSelectionMethod.__members__.keys()
-        # is_okay = self.method in expected
-        # if not is_okay:
-        #     raise ValueError(
-        #         f"passed value for method ('{self.method}') not one of {expected}"
-        #     )
-
-        # verify quantile
-        is_okay = 0.0 < self.quantile < 1.0
-        if not is_okay:
-            raise ValueError(f"{self.quantile=} not in (0, 1)")
-        is_okay = 1 / self.quantile % 1 == 0
-        if not is_okay:
-            raise ValueError(f"{self.quantile=} not a valid quantile")
-
-        # verify random_state
-        is_okay = self.random_state >= 0
-        if not is_okay:
-            raise ValueError(f"{self.random_state=} not in [0, inf)")
-
-        # verify n_thresholds valid int
-        is_okay = self.n_thresholds > 0
-        if not is_okay:
-            raise ValueError(f"{self.n_thresholds=} not > 0")
-
-        # set dq
-        self.num_quantile_steps = int(1 / self.quantile) + 1
-
-
-class ColumnSelectionParameters(BaseModel):
-    method: ColumnSelectionMethod
-    n_trials: StrictInt | None = None
-
-
-class TreeGrowthParameters(BaseModel):
-    max_depth: StrictInt
-    min_improvement: StrictFloat = 0.0
-    # xgboost lambda - multiplied with sum of squares of leaf weights
-    # see Chen et al. 2016 equation 2
-    lam: StrictFloat = 0.0
-    frac_subsamples: StrictFloat = 1.0
-    frac_features: StrictFloat = 1.0
-    random_state: StrictInt = 0
-    threshold_params: ThresholdSelectionParameters = ThresholdSelectionParameters(
-        method=ThresholdSelectionMethod.bruteforce,
-        quantile=0.1,
-        random_state=0,
-        n_thresholds=100,
-    )
-    column_params: ColumnSelectionParameters = ColumnSelectionParameters(
-        method=ColumnSelectionMethod.ascending, n_trials=None
-    )
-
-    def model_post_init(self, context: Any):
-        # verify frac_subsamples
-        is_okay = 0.0 < self.frac_subsamples <= 1.0
-        if not is_okay:
-            raise ValueError(f"{self.frac_subsamples=} not in (0, 1]")
-
-        # verify frac_features
-        is_okay = 0.0 < self.frac_features <= 1.0
-        if not is_okay:
-            raise ValueError(f"{self.frac_features=} not in (0, 1]")
-
-
 def _get_logger(level=logging.INFO):
     for handler in logging.root.handlers[:]:
         logging.root.removeHandler(handler)
diff --git a/src/random_tree_models/xgboost.py b/src/random_tree_models/xgboost.py
index a228546..6a36310 100644
--- a/src/random_tree_models/xgboost.py
+++ b/src/random_tree_models/xgboost.py
@@ -28,7 +28,7 @@
 
 import random_tree_models.decisiontree as dtree
 import random_tree_models.gradientboostedtrees as gbt
-from random_tree_models.scoring import MetricNames
+from random_tree_models.params import MetricNames
 
 
 class XGBoostTemplate(base.BaseEstimator):
diff --git a/tests/decisiontree/__init__.py b/tests/decisiontree/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/decisiontree/conftest.py b/tests/decisiontree/conftest.py
new file mode 100644
index 0000000..a86f141
--- /dev/null
+++ b/tests/decisiontree/conftest.py
@@ -0,0 +1,28 @@
+from random_tree_models.decisiontree.node import Node
+
+# first value in each tuple is the value to test and the second is the flag indicating if this should work
+BOOL_OPTIONS_NONE_OKAY = [(False, True), (True, True), ("blub", False)]
+INT_OPTIONS_NONE_OKAY = [(0, True), (None, True), ("blub", False)]
+INT_OPTIONS_NONE_NOT_OKAY = [(0, True), (None, False), ("blub", False)]
+FLOAT_OPTIONS_NONE_OKAY = [
+    (-1.0, True),
+    (None, True),
+    ("blub", False),
+]
+FLOAT_OPTIONS_NONE_NOT_OKAY = [
+    (-1.0, True),
+    (None, False),
+    ("blub", False),
+]
+NODE_OPTIONS_NONE_OKAY = [
+    (Node(), True),
+    (None, True),
+    ("blub", False),
+]
+STR_OPTIONS_NONE_OKAY = [("blub", True), (None, True), (1.0, False)]
+STR_OPTIONS_NONE_NOT_OKAY = [
+    ("blub", True),
+    (None, False),
+    (1, False),
+    (1.0, False),
+]
diff --git a/tests/decisiontree/test_estimators.py b/tests/decisiontree/test_estimators.py
new file mode 100644
index 0000000..20092c7
--- /dev/null
+++ b/tests/decisiontree/test_estimators.py
@@ -0,0 +1,184 @@
+import numpy as np
+import pytest
+from sklearn.utils.estimator_checks import parametrize_with_checks
+
+import random_tree_models.params
+from random_tree_models.decisiontree import (
+    DecisionTreeClassifier,
+    DecisionTreeRegressor,
+)
+from random_tree_models.decisiontree.estimators import DecisionTreeTemplate
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.params import MetricNames
+from tests.conftest import expected_failed_checks
+
+
+class TestDecisionTreeTemplate:
+    model = DecisionTreeTemplate(measure_name=MetricNames.entropy)
+    X = np.random.normal(size=(100, 10))
+    y = np.random.normal(size=(100,))
+
+    def test_tree_(self):
+        assert not hasattr(self.model, "tree_")
+
+    def test_growth_params_(self):
+        assert not hasattr(self.model, "growth_params_")
+
+        self.model._organize_growth_parameters()
+        assert isinstance(
+            self.model.growth_params_, random_tree_models.params.TreeGrowthParameters
+        )
+
+    def test_fit(self):
+        try:
+            self.model.fit(None, None)  # type: ignore
+        except NotImplementedError as ex:
+            pytest.xfail("DecisionTreeTemplate.fit expectedly refused call")
+
+    def test_predict(self):
+        try:
+            self.model.predict(None)  # type: ignore
+        except NotImplementedError as ex:
+            pytest.xfail("DecisionTreeTemplate.predict expectedly refused call")
+
+    def test_select_samples_and_features_no_sampling(self):
+        self.model.frac_features = 1.0
+        self.model.frac_subsamples = 1.0
+        self.model._organize_growth_parameters()
+
+        # line to test
+        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
+
+        assert np.allclose(X, self.X)
+        assert np.allclose(y, self.y)
+        assert np.allclose(ix_features, np.arange(0, self.X.shape[1], 1))
+
+    def test_select_samples_and_features_with_column_sampling(self):
+        self.model.frac_features = 0.5
+        self.model.frac_subsamples = 1.0
+        self.model._organize_growth_parameters()
+
+        # line to test
+        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
+
+        assert np.isclose(
+            X.shape[1], self.X.shape[1] * self.model.frac_features, atol=1
+        )
+        assert np.isclose(y.shape[0], self.y.shape[0])
+        assert all([ix in np.arange(0, self.X.shape[1], 1) for ix in ix_features])
+
+    def test_select_samples_and_features_with_row_sampling(self):
+        self.model.frac_features = 1.0
+        self.model.frac_subsamples = 0.5
+        self.model._organize_growth_parameters()
+
+        # line to test
+        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
+
+        assert np.isclose(X.shape[0], self.X.shape[0] * self.model.frac_subsamples)
+        assert np.isclose(y.shape[0], self.y.shape[0] * self.model.frac_subsamples)
+        assert np.allclose(ix_features, np.arange(0, self.X.shape[1], 1))
+
+    def test_select_samples_and_features_with_column_and_row_sampling(self):
+        self.model.frac_features = 0.5
+        self.model.frac_subsamples = 0.5
+        self.model._organize_growth_parameters()
+
+        # line to test
+        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
+
+        assert np.isclose(
+            X.shape[1], self.X.shape[1] * self.model.frac_features, atol=1
+        )
+        assert np.isclose(X.shape[0], self.X.shape[0] * self.model.frac_subsamples)
+        assert np.isclose(y.shape[0], self.y.shape[0] * self.model.frac_subsamples)
+        assert all([ix in np.arange(0, self.X.shape[1], 1) for ix in ix_features])
+
+    def test_select_samples_and_features_sampling_reproducibility(self):
+        self.model.frac_features = 0.5
+        self.model.frac_subsamples = 0.5
+        self.model._organize_growth_parameters()
+
+        # line to test
+        X0, y0, ix_features0 = self.model._select_samples_and_features(self.X, self.y)
+        X1, y1, ix_features1 = self.model._select_samples_and_features(self.X, self.y)
+
+        assert np.allclose(X0, X1)
+        assert np.allclose(y0, y1)
+        assert np.allclose(ix_features0, ix_features1)
+
+    def test_select_features(self):
+        ix_features = np.arange(0, self.X.shape[1], 1)
+        _X = self.model._select_features(self.X, ix_features)
+        assert np.allclose(_X, self.X)
+
+        ix_features = np.array([0, 1, 2])
+        _X = self.model._select_features(self.X, ix_features)
+        assert _X.shape[1] == 3
+
+
+class TestDecisionTreeRegressor:
+    model = DecisionTreeRegressor()
+
+    X = np.array(
+        [
+            [-1, -1],
+            [1, -1],
+            [1, 1],
+            [-1, 1],
+        ]
+    )
+    y = np.array([0.0, 0.0, 1.0, 1.0])
+
+    def test_fit(self):
+        model = DecisionTreeRegressor()
+        model.fit(self.X, self.y)
+        assert isinstance(model.tree_, Node)
+
+    def test_predict(self):
+        model = DecisionTreeRegressor()
+        model.fit(self.X, self.y)
+        predictions = model.predict(self.X)
+        assert np.allclose(predictions, self.y)
+
+
+class TestDecisionTreeClassifier:
+    model = DecisionTreeClassifier()
+
+    X = np.array(
+        [
+            [-1, -1],
+            [1, -1],
+            [1, 1],
+            [-1, 1],
+        ]
+    )
+    y = np.array([False, False, True, True])
+
+    def test_classes_(self):
+        assert not hasattr(self.model, "classes_")
+
+    def test_fit(self):
+        model = DecisionTreeClassifier()
+        model.fit(self.X, self.y)
+        assert not hasattr(self.model, "classes_")
+        assert isinstance(model.tree_, Node)
+
+    def test_predict(self):
+        model = DecisionTreeClassifier()
+        model.fit(self.X, self.y)
+        predictions = model.predict(self.X)
+        assert (predictions == self.y).all()
+
+
+@parametrize_with_checks(
+    [DecisionTreeRegressor(), DecisionTreeClassifier()],
+    expected_failed_checks=expected_failed_checks,  # type: ignore
+)
+def test_dtree_estimators_with_sklearn_checks(estimator, check):
+    """Test of estimators using scikit-learn test suite
+
+    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks
+    """
+
+    check(estimator)
diff --git a/tests/decisiontree/test_node.py b/tests/decisiontree/test_node.py
new file mode 100644
index 0000000..9203810
--- /dev/null
+++ b/tests/decisiontree/test_node.py
@@ -0,0 +1,84 @@
+import pytest
+from pydantic import ValidationError
+
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.split_objects import SplitScore
+
+from .conftest import (
+    BOOL_OPTIONS_NONE_OKAY,
+    FLOAT_OPTIONS_NONE_OKAY,
+    INT_OPTIONS_NONE_OKAY,
+    NODE_OPTIONS_NONE_OKAY,
+    STR_OPTIONS_NONE_OKAY,
+)
+
+
+@pytest.mark.parametrize(
+    "int_val, float_val, node_val, str_val, bool_val",
+    [
+        (int_val, float_val, node_val, str_val, bool_val)
+        for int_val in INT_OPTIONS_NONE_OKAY
+        for float_val in FLOAT_OPTIONS_NONE_OKAY
+        for node_val in NODE_OPTIONS_NONE_OKAY
+        for str_val in STR_OPTIONS_NONE_OKAY
+        for bool_val in BOOL_OPTIONS_NONE_OKAY
+    ],
+)
+def test_Node(int_val, float_val, node_val, str_val, bool_val):
+    array_column, array_column_okay = int_val
+    threshold, threshold_okay = float_val
+    prediction, prediction_okay = float_val
+    left, left_okay = node_val
+    right, right_okay = node_val
+    n_obs, n_obs_okay = int_val
+    reason, reason_okay = str_val
+    default_is_left, default_is_left_okay = bool_val
+
+    is_okay = all(
+        [
+            array_column_okay,
+            threshold_okay,
+            prediction_okay,
+            left_okay,
+            right_okay,
+            n_obs_okay,
+            reason_okay,
+            default_is_left_okay,
+        ]
+    )
+    measure = SplitScore(name="blub", value=1.0)
+    try:
+        # line to test
+        node = Node(
+            array_column=array_column,
+            threshold=threshold,
+            prediction=prediction,
+            default_is_left=default_is_left,
+            left=left,
+            right=right,
+            measure=measure,
+            n_obs=n_obs,
+            reason=reason,
+        )
+    except ValidationError as ex:
+        if is_okay:
+            raise ex
+        else:
+            pytest.xfail("SplitScore validation failed as expected")
+    else:
+        for att in [
+            "array_column",
+            "threshold",
+            "prediction",
+            "default_is_left",
+            "left",
+            "right",
+            "measure",
+            "n_obs",
+            "reason",
+            "node_id",
+        ]:
+            assert hasattr(node, att), f"{att=} missing in Node"
+        assert node.is_leaf == ((left is None) and (right is None)), (
+            f"left: {left is None} right: {right is None}"
+        )
diff --git a/tests/decisiontree/test_predict.py b/tests/decisiontree/test_predict.py
new file mode 100644
index 0000000..f0806d7
--- /dev/null
+++ b/tests/decisiontree/test_predict.py
@@ -0,0 +1,69 @@
+import numpy as np
+import pytest
+
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.predict import find_leaf_node, predict_with_tree
+
+
+@pytest.mark.parametrize(
+    "x,exp",
+    [
+        (np.array([-1, -1]), 0.0),
+        (np.array([1, -1]), 1.0),
+        (np.array([1, 1]), 2.0),
+        (np.array([-1, 1]), 3.0),
+    ],
+)
+def test_find_leaf_node(x: np.ndarray, exp: float):
+    tree = Node(
+        array_column=0,
+        threshold=0.0,
+        left=Node(
+            array_column=1,
+            threshold=0.0,
+            left=Node(prediction=0.0),
+            right=Node(prediction=3.0),
+        ),
+        right=Node(
+            array_column=1,
+            threshold=0.0,
+            left=Node(prediction=1.0),
+            right=Node(prediction=2.0),
+        ),
+    )
+    # line to test
+    leaf = find_leaf_node(tree, x)
+
+    assert leaf.prediction == exp
+
+
+def test_predict_with_tree():
+    X = np.array(
+        [
+            [-1.0, -1.0],
+            [1.0, -1.0],
+            [1.0, 1.0],
+            [-1.0, 1.0],
+        ]
+    )
+    tree = Node(
+        array_column=0,
+        threshold=0.0,
+        left=Node(
+            array_column=1,
+            threshold=0.0,
+            left=Node(prediction=0.0),
+            right=Node(prediction=3.0),
+        ),
+        right=Node(
+            array_column=1,
+            threshold=0.0,
+            left=Node(prediction=1.0),
+            right=Node(prediction=2.0),
+        ),
+    )
+
+    # line to test
+    predictions = predict_with_tree(tree, X)
+
+    assert np.allclose(predictions, np.arange(0, 4, 1))
diff --git a/tests/decisiontree/test_split.py b/tests/decisiontree/test_split.py
new file mode 100644
index 0000000..266b5de
--- /dev/null
+++ b/tests/decisiontree/test_split.py
@@ -0,0 +1,761 @@
+from typing import Generator
+
+import numpy as np
+import pytest
+from pydantic import ValidationError
+from scipy import stats
+
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.split import (
+    BestSplit,
+    check_if_split_sensible,
+    find_best_split,
+    get_column,
+    get_thresholds_and_target_groups,
+    select_arrays_for_child_node,
+    select_thresholds,
+)
+from random_tree_models.decisiontree.split_objects import SplitScore
+from random_tree_models.params import (
+    ColumnSelectionMethod,
+    ColumnSelectionParameters,
+    ThresholdSelectionMethod,
+    ThresholdSelectionParameters,
+    TreeGrowthParameters,
+)
+
+from .conftest import (
+    BOOL_OPTIONS_NONE_OKAY,
+    FLOAT_OPTIONS_NONE_NOT_OKAY,
+    INT_OPTIONS_NONE_NOT_OKAY,
+)
+
+
+@pytest.mark.parametrize(
+    "score,column,threshold,target_groups,default_is_left",
+    [
+        (score, column, threshold, target_groups, default_is_left)
+        for score in FLOAT_OPTIONS_NONE_NOT_OKAY
+        for column in INT_OPTIONS_NONE_NOT_OKAY
+        for threshold in FLOAT_OPTIONS_NONE_NOT_OKAY
+        for target_groups in [
+            (np.array([1, 2, 3]), True),
+            (np.array([]), True),
+            (None, False),
+        ]
+        for default_is_left in BOOL_OPTIONS_NONE_OKAY
+    ],
+)
+def test_BestSplit(score, column, threshold, target_groups, default_is_left):
+    score, score_okay = score
+    column, column_okay = column
+    threshold, threshold_okay = threshold
+    target_groups, target_groups_okay = target_groups
+    default_is_left, default_is_left_okay = default_is_left
+
+    is_okay = all(
+        [
+            score_okay,
+            column_okay,
+            threshold_okay,
+            target_groups_okay,
+            default_is_left_okay,
+        ]
+    )
+    is_bad = not is_okay
+
+    try:
+        # line to test
+        best = BestSplit(
+            score=score,
+            column=column,
+            threshold=threshold,
+            target_groups=target_groups,
+            default_is_left=default_is_left,
+        )
+    except ValidationError as ex:
+        if is_okay:
+            raise ex
+        else:
+            pytest.xfail("BestSplit validation failed as expected")
+    else:
+        if is_bad:
+            pytest.fail(
+                f"BestSplit validation did pass unexpectedly with {score=}, {column=}, {threshold=}, {target_groups=}, {score_okay=}, {column_okay=}, {threshold_okay=}, {target_groups_okay=}, {is_bad=}"
+            )
+
+        assert hasattr(best, "score")
+        assert hasattr(best, "column")
+        assert hasattr(best, "threshold")
+        assert hasattr(best, "target_groups")
+        assert hasattr(best, "default_is_left")
+
+
+class Test_select_thresholds:
+    """
+    bruteforce: returns all possible thresholds from the 2nd onward
+    random:
+    * returns a random subset of the thresholds if n_thresholds smaller than avaliable values
+    * is reproducible with random_state
+    quantile: returns num_quantile_steps thresholds which are ordered
+    uniform: returns single value between min and max
+    """
+
+    def test_bruteforce(self):
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.bruteforce
+        )
+        feature_values = np.linspace(-1, 1, 100)
+        rng = np.random.RandomState(42)
+
+        # line to test
+        thresholds = select_thresholds(feature_values, params, rng=rng)
+
+        assert np.allclose(thresholds, feature_values[1:])
+
+    def test_random_when_to_few_values(self):
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.random, n_thresholds=1000
+        )
+        feature_values = np.linspace(-1, 1, 100)
+        rng = np.random.RandomState(42)
+
+        # line to test
+        thresholds = select_thresholds(feature_values, params, rng=rng)
+
+        assert np.allclose(thresholds, feature_values[1:])
+
+    def test_random_when_enough_values(self):
+        n_thresholds = 10
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.random, n_thresholds=n_thresholds
+        )
+        feature_values = np.linspace(-1, 1, 100)
+        rng = np.random.RandomState(42)
+
+        # line to test
+        thresholds0 = select_thresholds(feature_values, params, rng=rng)
+
+        assert thresholds0.shape == (n_thresholds,)
+        assert np.unique(thresholds0).shape == (n_thresholds,)
+
+    def test_random_reproducible(self):
+        n_thresholds = 10
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.random, n_thresholds=n_thresholds
+        )
+        feature_values = np.linspace(-1, 1, 100)
+
+        # line to test
+        rng = np.random.RandomState(42)
+        thresholds0 = select_thresholds(feature_values, params, rng=rng)
+        rng = np.random.RandomState(42)
+        thresholds1 = select_thresholds(feature_values, params, rng=rng)
+
+        assert np.allclose(thresholds0, thresholds1)
+
+    def test_random_produces_changing_thresholds(self):
+        n_thresholds = 10
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.random, n_thresholds=n_thresholds
+        )
+        feature_values = np.linspace(-1, 1, 100)
+        rng = np.random.RandomState(42)
+
+        # line to test
+        thresholds0 = select_thresholds(feature_values, params, rng=rng)
+        thresholds1 = select_thresholds(feature_values, params, rng=rng)
+
+        assert not np.allclose(thresholds0, thresholds1)
+
+    def test_quantile(self):
+        n_thresholds = 10
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.quantile,
+            n_thresholds=n_thresholds,
+            quantile=0.1,
+        )
+        feature_values = np.linspace(-1, 1, 100)
+        rng = np.random.RandomState(42)
+
+        # line to test
+        thresholds = select_thresholds(feature_values, params, rng=rng)
+
+        assert thresholds.shape == (11,)
+        assert (thresholds[1:] > thresholds[:-1]).all()
+
+    def test_uniform(self):
+        n_thresholds = 10
+        params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.uniform, n_thresholds=n_thresholds
+        )
+        rng = np.random.RandomState(42)
+        feature_values = rng.normal(loc=0, scale=1, size=100)
+
+        # line to test
+        thresholds = select_thresholds(feature_values, params, rng=rng)
+
+        assert thresholds.shape == (1,)
+        assert thresholds[0] >= feature_values.min()
+        assert thresholds[0] <= feature_values.max()
+
+
+class Test_get_thresholds_and_target_groups:
+    """
+    * preduces a generator
+    * produces twice as many items to iterate in the case of missing values
+    * each item contains the current threshold, the target groups and a boolean that indicates the default direction
+    * the default direction is always None if there are no missing values and otherwise boolean
+    """
+
+    def test_produces_generator(self):
+        feature_values = np.linspace(-1, 1, 10)
+        threshold_params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.bruteforce
+        )
+        rng = np.random.RandomState(42)
+
+        # line to test
+        gen = get_thresholds_and_target_groups(
+            feature_values, threshold_params, rng=rng
+        )
+
+        assert isinstance(gen, Generator)
+
+    def test_finite_only_case(self):
+        feature_values = np.linspace(-1, 1, 10)
+        threshold_params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.bruteforce
+        )
+        rng = np.random.RandomState(42)
+
+        # line to test
+        thresholds_and_target_groups = get_thresholds_and_target_groups(
+            feature_values, threshold_params, rng=rng
+        )
+
+        c = 0
+        for (
+            threshold,
+            target_groups,
+            default_direction_is_left,
+        ) in thresholds_and_target_groups:
+            assert isinstance(target_groups, np.ndarray)
+            assert threshold in feature_values[1:]
+            assert target_groups.dtype == bool
+            assert default_direction_is_left is None
+            c += 1
+
+        assert c == len(feature_values[1:])
+
+    def test_with_missing_case(self):
+        feature_values = np.linspace(-1, 1, 10)
+        feature_values[5] = np.nan
+        threshold_params = ThresholdSelectionParameters(
+            method=ThresholdSelectionMethod.bruteforce
+        )
+        rng = np.random.RandomState(42)
+
+        thresholds_and_target_groups = get_thresholds_and_target_groups(
+            feature_values, threshold_params, rng=rng
+        )
+
+        # line to test
+        c = 0
+        for (
+            threshold,
+            target_groups,
+            default_direction_is_left,
+        ) in thresholds_and_target_groups:
+            assert isinstance(target_groups, np.ndarray)
+            assert threshold in feature_values[1:]
+            assert target_groups.dtype == bool
+            assert default_direction_is_left in [True, False]
+            c += 1
+
+        assert c == 2 * (len(feature_values[1:]) - 1)
+
+
+class Test_get_column:
+    """
+    * method ascending just returns ascending integer list for columns
+    * method random returns random integer list for columns
+    * method largest_delta returns column indices with largest feature max-min differences first
+    * if n_columns_to_try is given it is used to shorted the returned list
+    """
+
+    def test_ascending(self):
+        n_columns = 10
+        n_trials = None
+        column_params = ColumnSelectionParameters(
+            method=ColumnSelectionMethod.ascending, n_trials=n_trials
+        )
+        X = np.random.normal(size=(100, n_columns))
+        rng = np.random.RandomState(42)
+
+        # line to test
+        columns = get_column(X, column_params, rng=rng)
+
+        assert columns == list(range(n_columns))
+
+    def test_ascending_first_n_trials_columns(self):
+        n_columns = 10
+        n_trials = 5
+        column_params = ColumnSelectionParameters(
+            method=ColumnSelectionMethod.ascending, n_trials=n_trials
+        )
+        X = np.random.normal(size=(100, n_columns))
+        rng = np.random.RandomState(42)
+
+        # line to test
+        columns = get_column(X, column_params, rng=rng)
+
+        assert columns == list(range(n_trials))
+
+    def test_random(self):
+        n_columns = 10
+        n_trials = None
+        column_params = ColumnSelectionParameters(
+            method=ColumnSelectionMethod.random, n_trials=n_trials
+        )
+        X = np.random.normal(size=(100, n_columns))
+        rng = np.random.RandomState(42)
+
+        # line to test
+        columns = get_column(X, column_params, rng=rng)
+
+        assert not all([i0 < i1 for i0, i1 in zip(columns[:-1], columns[1:])])
+        assert sorted(columns) == list(range(n_columns))
+
+    def test_random_is_reproducible(self):
+        n_columns = 10
+        n_trials = None
+        column_params = ColumnSelectionParameters(
+            method=ColumnSelectionMethod.random, n_trials=n_trials
+        )
+        X = np.random.normal(size=(100, n_columns))
+
+        # line to test
+        rng = np.random.RandomState(42)
+        columns0 = get_column(X, column_params, rng=rng)
+        rng = np.random.RandomState(42)
+        columns1 = get_column(X, column_params, rng=rng)
+
+        assert columns0 == columns1
+
+    def test_largest_delta(self):
+        n_columns = 5
+        n_trials = None
+        column_params = ColumnSelectionParameters(
+            method=ColumnSelectionMethod.largest_delta, n_trials=n_trials
+        )
+        rng = np.random.RandomState(42)
+        X = np.array([[0, 0.001], [0, 0.01], [0, 0.1], [0, 1.0], [0, 10.0]]).T
+
+        n_repetitions = 100
+        all_columns = np.zeros((n_repetitions, n_columns), dtype=int)
+
+        for i in range(n_repetitions):
+            # line to test
+            all_columns[i, :] = get_column(X, column_params, rng=rng)
+
+        assert np.allclose(stats.mode(all_columns, axis=0).mode, [4, 3, 2, 1, 0])
+
+
+class Test_find_best_split:
+    """
+    cases to test for all measure_name values:
+    * simple & 1d is split as expected
+        * classification: y = 1 class, y = 2 classes, y = 3 classes
+        * regression: y = 1 value, y = 2 values, y = 3 values where 2 are more similar
+    * simple & 2d is split as expected
+        * same as 1d but 1st column useless and 2nd contains the needed info
+    """
+
+    X_1D = np.array(
+        [
+            [
+                1,
+            ],
+            [
+                2,
+            ],
+            [
+                3,
+            ],
+            [
+                4,
+            ],
+        ]
+    )
+
+    X_1D_missing = np.array(
+        [
+            [
+                1,
+            ],
+            [
+                np.nan,
+            ],
+            [
+                3,
+            ],
+            [
+                4,
+            ],
+        ]
+    )
+
+    X_2D = np.hstack((np.ones_like(X_1D), X_1D))
+    X_2D_missing = np.hstack((np.ones_like(X_1D_missing), X_1D_missing))
+
+    y_1class = np.ones(X_1D.shape[0], dtype=bool)
+    y_2class = np.array([False, False, True, True])
+    y_3class = np.array([0, 0, 1, 2])
+
+    y_1reg = np.ones(X_1D.shape[0])
+    y_2reg = np.array([-1.0, -1.0, 1.0, 1.0])
+    y_3reg = np.array([-1.0, -0.9, 1.0, 2.0])
+
+    # xgboost - least squares
+    g_1reg = np.array([0.0, 0.0, 0.0, 0.0])
+    g_2reg = np.array([-1.0, -1.0, 1.0, 1.0])
+    g_3reg = np.array([-1.275, -1.175, 0.725, 1.725])
+
+    h_1reg = np.array([-1.0, -1.0, -1.0, -1.0])
+    h_2reg = np.array([-1.0, -1.0, -1.0, -1.0])
+    h_3reg = np.array([-1.0, -1.0, -1.0, -1.0])
+
+    # xgboost - binomial log-likelihood
+    g_2class = np.array([-1.0, -1.0, 1.0, 1.0])
+    h_2class = np.array([-1.0, -1.0, -1.0, -1.0])
+
+    @pytest.mark.parametrize(
+        "y,ix,measure_name,g,h",
+        [
+            (y_1class, None, "gini", None, None),
+            (y_2class, 2, "gini", None, None),
+            (y_3class, 2, "gini", None, None),
+            (y_1class, None, "entropy", None, None),
+            (y_2class, 2, "entropy", None, None),
+            (y_3class, 2, "entropy", None, None),
+            (y_1reg, None, "variance", None, None),
+            (y_2reg, 2, "variance", None, None),
+            (y_3reg, 2, "variance", None, None),
+            (y_1reg, None, "xgboost", g_1reg, h_1reg),
+            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
+            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
+            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
+            (y_2class, 2, "xgboost", g_2class, h_2class),
+            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
+        ],
+    )
+    def test_1d(
+        self,
+        y: np.ndarray,
+        ix: int,
+        measure_name: str,
+        g: np.ndarray,
+        h: np.ndarray,
+    ):
+        is_homogenous = len(np.unique(y)) == 1
+        grow_params = TreeGrowthParameters(max_depth=2)
+        try:
+            # line to test
+            best = find_best_split(
+                self.X_1D,
+                y,
+                measure_name=measure_name,
+                g=g,
+                h=h,
+                growth_params=grow_params,
+            )
+        except ValueError as ex:
+            if is_homogenous:
+                pytest.xfail("Splitting a homogneous y failed as expected")
+            else:
+                raise ex
+        else:
+            if is_homogenous:
+                pytest.fail("Splitting a homogneous y passed unexpectedly")
+
+            threshold_exp = float(self.X_1D[ix, 0])
+            assert best.threshold == threshold_exp
+
+    @pytest.mark.parametrize(
+        "y,ix,measure_name,g,h",
+        [
+            (y_1class, None, "gini", None, None),
+            (y_2class, 2, "gini", None, None),
+            (y_3class, 2, "gini", None, None),
+            (y_1class, None, "entropy", None, None),
+            (y_2class, 2, "entropy", None, None),
+            (y_3class, 2, "entropy", None, None),
+            (y_1reg, None, "variance", None, None),
+            (y_2reg, 2, "variance", None, None),
+            (y_3reg, 2, "variance", None, None),
+            (y_1reg, None, "xgboost", g_1reg, h_1reg),
+            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
+            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
+            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
+            (y_2class, 2, "xgboost", g_2class, h_2class),
+            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
+        ],
+    )
+    def test_1d_missing(
+        self,
+        y: np.ndarray,
+        ix: int,
+        measure_name: str,
+        g: np.ndarray,
+        h: np.ndarray,
+    ):
+        is_homogenous = len(np.unique(y)) == 1
+        grow_params = TreeGrowthParameters(max_depth=2)
+        try:
+            # line to test
+            best = find_best_split(
+                self.X_1D_missing,
+                y,
+                measure_name=measure_name,
+                g=g,
+                h=h,
+                growth_params=grow_params,
+            )
+        except ValueError as ex:
+            if is_homogenous:
+                pytest.xfail("Splitting a homogneous y failed as expected")
+            else:
+                raise ex
+        else:
+            if is_homogenous:
+                pytest.fail("Splitting a homogneous y passed unexpectedly")
+
+            threshold_exp = float(self.X_1D_missing[ix, 0])
+            assert best.threshold == threshold_exp
+
+    @pytest.mark.parametrize(
+        "y,ix,measure_name,g,h",
+        [
+            (y_1class, None, "gini", None, None),
+            (y_2class, 2, "gini", None, None),
+            (y_3class, 2, "gini", None, None),
+            (y_1class, None, "entropy", None, None),
+            (y_2class, 2, "entropy", None, None),
+            (y_3class, 2, "entropy", None, None),
+            (y_1reg, None, "variance", None, None),
+            (y_2reg, 2, "variance", None, None),
+            (y_3reg, 2, "variance", None, None),
+            (y_1reg, None, "xgboost", g_1reg, h_1reg),
+            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
+            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
+            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
+            (y_2class, 2, "xgboost", g_2class, h_2class),
+            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
+        ],
+    )
+    def test_2d(
+        self,
+        y: np.ndarray,
+        ix: int,
+        measure_name: str,
+        g: np.ndarray,
+        h: np.ndarray,
+    ):
+        is_homogenous = len(np.unique(y)) == 1
+        growth_params = TreeGrowthParameters(max_depth=2)
+        try:
+            # line to test
+            best = find_best_split(
+                self.X_2D,
+                y,
+                measure_name,
+                g=g,
+                h=h,
+                growth_params=growth_params,
+            )
+        except ValueError as ex:
+            if is_homogenous:
+                pytest.xfail("Splitting a homogneous y failed as expected")
+            else:
+                raise ex
+        else:
+            if is_homogenous:
+                pytest.fail("Splitting a homogneous y passed unexpectedly")
+
+            assert best.column == 1
+            threshold_exp = float(self.X_2D[ix, 1])
+            assert best.threshold == threshold_exp
+
+    @pytest.mark.parametrize(
+        "y,ix,measure_name,g,h",
+        [
+            (y_1class, None, "gini", None, None),
+            (y_2class, 2, "gini", None, None),
+            (y_3class, 2, "gini", None, None),
+            (y_1class, None, "entropy", None, None),
+            (y_2class, 2, "entropy", None, None),
+            (y_3class, 2, "entropy", None, None),
+            (y_1reg, None, "variance", None, None),
+            (y_2reg, 2, "variance", None, None),
+            (y_3reg, 2, "variance", None, None),
+            (y_1reg, None, "xgboost", g_1reg, h_1reg),
+            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
+            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
+            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
+            (y_2class, 2, "xgboost", g_2class, h_2class),
+            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
+        ],
+    )
+    def test_2d_missing(
+        self,
+        y: np.ndarray,
+        ix: int,
+        measure_name: str,
+        g: np.ndarray,
+        h: np.ndarray,
+    ):
+        is_homogenous = len(np.unique(y)) == 1
+        growth_params = TreeGrowthParameters(max_depth=2)
+        try:
+            # line to test
+            best = find_best_split(
+                self.X_2D_missing,
+                y,
+                measure_name,
+                g=g,
+                h=h,
+                growth_params=growth_params,
+            )
+        except ValueError as ex:
+            if is_homogenous:
+                pytest.xfail("Splitting a homogneous y failed as expected")
+            else:
+                raise ex
+        else:
+            if is_homogenous:
+                pytest.fail("Splitting a homogneous y passed unexpectedly")
+
+            assert best.column == 1
+            threshold_exp = float(self.X_2D_missing[ix, 1])
+            assert best.threshold == threshold_exp
+
+
+@pytest.mark.parametrize(
+    "best,parent_node,growth_params,is_no_sensible_split_exp",
+    [
+        # parent is None #1
+        (
+            BestSplit(score=-1.0, column=0, threshold=0.0, target_groups=np.array([])),
+            None,
+            TreeGrowthParameters(max_depth=2),
+            False,
+        ),
+        # parent is None #2
+        (
+            BestSplit(score=-1.0, column=0, threshold=0.0, target_groups=np.array([])),
+            Node(measure=SplitScore("bla")),
+            TreeGrowthParameters(max_depth=2),
+            False,
+        ),
+        # split is sufficient
+        (
+            BestSplit(
+                score=-1.0,
+                column=0,
+                threshold=0.0,
+                target_groups=np.array([False, True]),
+            ),
+            Node(measure=SplitScore("bla", value=-1.1)),
+            TreeGrowthParameters(max_depth=2, min_improvement=0.01),
+            False,
+        ),
+        # split is insufficient - because min gain not exceeded
+        (
+            BestSplit(
+                score=-1.0,
+                column=0,
+                threshold=0.0,
+                target_groups=np.array([False, True]),
+            ),
+            Node(measure=SplitScore("bla", value=-1.1)),
+            TreeGrowthParameters(max_depth=2, min_improvement=0.2),
+            True,
+        ),
+        # split is insufficient - because all items sorted left
+        (
+            BestSplit(
+                score=-1.0,
+                column=0,
+                threshold=0.0,
+                target_groups=np.array([True, True]),
+            ),
+            Node(measure=SplitScore("bla", value=-1.1)),
+            TreeGrowthParameters(max_depth=2, min_improvement=0.0),
+            True,
+        ),
+        # split is insufficient - because all items sorted right
+        (
+            BestSplit(
+                score=-1.0,
+                column=0,
+                threshold=0.0,
+                target_groups=np.array([False, False]),
+            ),
+            Node(measure=SplitScore("bla", value=-1.1)),
+            TreeGrowthParameters(max_depth=2, min_improvement=0.0),
+            True,
+        ),
+    ],
+)
+def test_check_if_split_sensible(
+    best: BestSplit,
+    parent_node: Node,
+    growth_params: TreeGrowthParameters,
+    is_no_sensible_split_exp: bool,
+):
+    # line to test
+    is_not_sensible_split, gain = check_if_split_sensible(
+        best, parent_node, growth_params
+    )
+
+    assert is_not_sensible_split == is_no_sensible_split_exp
+    if parent_node is None or parent_node.measure.value is None:  # type: ignore
+        assert gain is None
+
+
+@pytest.mark.parametrize("go_left", [True, False])
+def test_select_arrays_for_child_node(go_left: bool):
+    best = BestSplit(
+        score=1.0,
+        column=0,
+        threshold=2.0,
+        target_groups=np.array([True, True, False]),
+    )
+
+    X = np.array([[1], [2], [3]])
+    y = np.array([True, True, False])
+    g = np.array([1, 2, 3])
+    h = np.array([4, 5, 6])
+
+    # line to test
+    _X, _y, _g, _h = select_arrays_for_child_node(
+        go_left=go_left,
+        best=best,
+        X=X,
+        y=y,
+        g=g,
+        h=h,
+    )
+    assert _g is not None
+    assert _h is not None
+    if go_left:
+        assert np.allclose(_X, X[:2])
+        assert np.allclose(_y, y[:2])
+        assert np.allclose(_g, g[:2])
+        assert np.allclose(_h, h[:2])
+    else:
+        assert np.allclose(_X, X[2:])
+        assert np.allclose(_y, y[2:])
+        assert np.allclose(_g, g[2:])
+        assert np.allclose(_h, h[2:])
diff --git a/tests/decisiontree/test_split_objects.py b/tests/decisiontree/test_split_objects.py
new file mode 100644
index 0000000..e6942f9
--- /dev/null
+++ b/tests/decisiontree/test_split_objects.py
@@ -0,0 +1,37 @@
+import pytest
+from pydantic import ValidationError
+
+from random_tree_models.decisiontree.split_objects import SplitScore
+
+from .conftest import FLOAT_OPTIONS_NONE_OKAY, STR_OPTIONS_NONE_NOT_OKAY
+
+
+@pytest.mark.parametrize(
+    "name,value",
+    [
+        (name, value)
+        for name in STR_OPTIONS_NONE_NOT_OKAY
+        for value in FLOAT_OPTIONS_NONE_OKAY
+    ],
+)
+def test_SplitScore(name, value):
+    name, name_okay = name
+    value, value_okay = value
+    is_okay = name_okay and value_okay
+    is_bad = not is_okay
+    try:
+        # line to test
+        measure = SplitScore(name=name, value=value)
+    except ValidationError as ex:
+        if is_okay:
+            raise ValueError(f"whoops {name=} {value=} failed with {ex}")
+        else:
+            pytest.xfail("SplitScore validation failed as expected")
+    else:
+        if is_bad:
+            pytest.fail(
+                f"SplitScore test unexpectedly passed for {name=}, {value=}, {name_okay=}, {value_okay=}, {is_okay=}"
+            )
+
+        assert hasattr(measure, "name")
+        assert hasattr(measure, "value")
diff --git a/tests/decisiontree/test_train.py b/tests/decisiontree/test_train.py
new file mode 100644
index 0000000..ef5fb5b
--- /dev/null
+++ b/tests/decisiontree/test_train.py
@@ -0,0 +1,98 @@
+import numpy as np
+import pytest
+from dirty_equals import IsApprox
+from inline_snapshot import snapshot
+
+from random_tree_models.decisiontree.node import Node
+from random_tree_models.decisiontree.split_objects import SplitScore
+from random_tree_models.decisiontree.train import (
+    calc_leaf_weight_and_split_score,
+    check_is_baselevel,
+    grow_tree,
+)
+from random_tree_models.params import MetricNames, TreeGrowthParameters
+
+
+@pytest.mark.parametrize(
+    "y, depths",
+    [
+        (y, depths)
+        for y in [(np.array([1, 2]), False), (np.array([]), True)]
+        for depths in [(1, 2, False), (2, 2, True), (3, 2, True)]
+    ],
+)
+def test_check_is_baselevel(y, depths):
+    y, is_baselevel_exp_y = y
+    depth, max_depth, is_baselevel_exp_depth = depths
+    is_baselevel_exp = is_baselevel_exp_depth or is_baselevel_exp_y
+
+    # line to test
+    is_baselevel, msg = check_is_baselevel(y, depth=depth, max_depth=max_depth)
+
+    assert is_baselevel == is_baselevel_exp
+    assert isinstance(msg, str)
+
+
+def test_calc_leaf_weight_and_split_score():
+    y = np.array([True, True, False])
+    measure_name = MetricNames.gini
+    growth_params = TreeGrowthParameters(max_depth=2)
+    g = np.array([1, 2, 3])
+    h = np.array([4, 5, 6])
+
+    # line to test
+    leaf_weight, split_score = calc_leaf_weight_and_split_score(
+        y, measure_name, growth_params, g, h
+    )
+
+    assert leaf_weight == IsApprox(0.6666666666666666)
+    assert split_score == IsApprox(-0.4444444444444445)
+
+
+class Test_grow_tree:
+    X = np.array([[1], [2], [3]])
+    y = np.array([True, True, False])
+    target_groups = np.array([True, True, False])
+    measure_name = MetricNames.gini
+    depth_dummy = 0
+
+    def test_baselevel(self):
+        # test returned leaf node
+        growth_params = TreeGrowthParameters(max_depth=2)
+        parent_node = None
+
+        # line to test
+        leaf_node = grow_tree(
+            self.X,
+            self.y,
+            self.measure_name,
+            growth_params=growth_params,
+            parent_node=parent_node,
+            depth=self.depth_dummy,
+        )
+
+        assert leaf_node == snapshot(
+            Node(
+                array_column=0,
+                threshold=3.0,
+                prediction=0.6666666666666666,
+                right=Node(
+                    prediction=0.0,
+                    measure=SplitScore(name="gini", value=0.0),
+                    n_obs=1,
+                    reason="homogenous group",
+                    depth=1,
+                ),
+                left=Node(
+                    prediction=1.0,
+                    measure=SplitScore(name="gini", value=0.0),
+                    n_obs=2,
+                    reason="homogenous group",
+                    depth=1,
+                ),
+                measure=SplitScore(name="gini", value=0.0),
+                n_obs=3,
+                reason="",
+                depth=0,
+            )
+        )
diff --git a/tests/decisiontree/test_visualize.py b/tests/decisiontree/test_visualize.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_decisiontree.py b/tests/test_decisiontree.py
deleted file mode 100644
index 635085c..0000000
--- a/tests/test_decisiontree.py
+++ /dev/null
@@ -1,1320 +0,0 @@
-import types
-from unittest.mock import patch
-
-import numpy as np
-import pytest
-from pydantic import ValidationError
-from scipy import stats
-from sklearn.utils.estimator_checks import parametrize_with_checks
-
-import random_tree_models.decisiontree as dtree
-import random_tree_models.utils as utils
-from random_tree_models import scoring
-from random_tree_models.scoring import MetricNames
-from random_tree_models.utils import ThresholdSelectionMethod
-from tests.conftest import expected_failed_checks
-
-# first value in each tuple is the value to test and the second is the flag indicating if this should work
-BOOL_OPTIONS_NONE_OKAY = [(False, True), (True, True), ("blub", False)]
-INT_OPTIONS_NONE_OKAY = [(0, True), (None, True), ("blub", False)]
-INT_OPTIONS_NONE_NOT_OKAY = [(0, True), (None, False), ("blub", False)]
-FLOAT_OPTIONS_NONE_OKAY = [
-    (-1.0, True),
-    (None, True),
-    ("blub", False),
-]
-FLOAT_OPTIONS_NONE_NOT_OKAY = [
-    (-1.0, True),
-    (None, False),
-    ("blub", False),
-]
-NODE_OPTIONS_NONE_OKAY = [
-    (dtree.Node(), True),
-    (None, True),
-    ("blub", False),
-]
-STR_OPTIONS_NONE_OKAY = [("blub", True), (None, True), (1.0, False)]
-STR_OPTIONS_NONE_NOT_OKAY = [
-    ("blub", True),
-    (None, False),
-    (1, False),
-    (1.0, False),
-]
-
-
-@pytest.mark.parametrize(
-    "name,value",
-    [
-        (name, value)
-        for name in STR_OPTIONS_NONE_NOT_OKAY
-        for value in FLOAT_OPTIONS_NONE_OKAY
-    ],
-)
-def test_SplitScore(name, value):
-    name, name_okay = name
-    value, value_okay = value
-    is_okay = name_okay and value_okay
-    is_bad = not is_okay
-    try:
-        # line to test
-        measure = dtree.SplitScore(name=name, value=value)
-    except ValidationError as ex:
-        if is_okay:
-            raise ValueError(f"whoops {name=} {value=} failed with {ex}")
-        else:
-            pytest.xfail("SplitScore validation failed as expected")
-    else:
-        if is_bad:
-            pytest.fail(
-                f"SplitScore test unexpectedly passed for {name=}, {value=}, {name_okay=}, {value_okay=}, {is_okay=}"
-            )
-
-        assert hasattr(measure, "name")
-        assert hasattr(measure, "value")
-
-
-@pytest.mark.parametrize(
-    "int_val, float_val, node_val, str_val, bool_val",
-    [
-        (int_val, float_val, node_val, str_val, bool_val)
-        for int_val in INT_OPTIONS_NONE_OKAY
-        for float_val in FLOAT_OPTIONS_NONE_OKAY
-        for node_val in NODE_OPTIONS_NONE_OKAY
-        for str_val in STR_OPTIONS_NONE_OKAY
-        for bool_val in BOOL_OPTIONS_NONE_OKAY
-    ],
-)
-def test_Node(int_val, float_val, node_val, str_val, bool_val):
-    array_column, array_column_okay = int_val
-    threshold, threshold_okay = float_val
-    prediction, prediction_okay = float_val
-    left, left_okay = node_val
-    right, right_okay = node_val
-    n_obs, n_obs_okay = int_val
-    reason, reason_okay = str_val
-    default_is_left, default_is_left_okay = bool_val
-
-    is_okay = all(
-        [
-            array_column_okay,
-            threshold_okay,
-            prediction_okay,
-            left_okay,
-            right_okay,
-            n_obs_okay,
-            reason_okay,
-            default_is_left_okay,
-        ]
-    )
-    measure = dtree.SplitScore(name="blub", value=1.0)
-    try:
-        # line to test
-        node = dtree.Node(
-            array_column=array_column,
-            threshold=threshold,
-            prediction=prediction,
-            default_is_left=default_is_left,
-            left=left,
-            right=right,
-            measure=measure,
-            n_obs=n_obs,
-            reason=reason,
-        )
-    except ValidationError as ex:
-        if is_okay:
-            raise ex
-        else:
-            pytest.xfail("SplitScore validation failed as expected")
-    else:
-        for att in [
-            "array_column",
-            "threshold",
-            "prediction",
-            "default_is_left",
-            "left",
-            "right",
-            "measure",
-            "n_obs",
-            "reason",
-            "node_id",
-        ]:
-            assert hasattr(node, att), f"{att=} missing in Node"
-        assert node.is_leaf == ((left is None) and (right is None)), (
-            f"left: {left is None} right: {right is None}"
-        )
-
-
-@pytest.mark.parametrize(
-    "y, depths",
-    [
-        (y, depths)
-        for y in [(np.array([1, 2]), False), (np.array([]), True)]
-        for depths in [(1, 2, False), (2, 2, True), (3, 2, True)]
-    ],
-)
-def test_check_is_baselevel(y, depths):
-    node = dtree.Node()
-
-    y, is_baselevel_exp_y = y
-    depth, max_depth, is_baselevel_exp_depth = depths
-    is_baselevel_exp = is_baselevel_exp_depth or is_baselevel_exp_y
-
-    # line to test
-    is_baselevel, msg = dtree.check_is_baselevel(y, depth=depth, max_depth=max_depth)
-
-    assert is_baselevel == is_baselevel_exp
-    assert isinstance(msg, str)
-
-
-@pytest.mark.parametrize(
-    "score,column,threshold,target_groups,default_is_left",
-    [
-        (score, column, threshold, target_groups, default_is_left)
-        for score in FLOAT_OPTIONS_NONE_NOT_OKAY
-        for column in INT_OPTIONS_NONE_NOT_OKAY
-        for threshold in FLOAT_OPTIONS_NONE_NOT_OKAY
-        for target_groups in [
-            (np.array([1, 2, 3]), True),
-            (np.array([]), True),
-            (None, False),
-        ]
-        for default_is_left in BOOL_OPTIONS_NONE_OKAY
-    ],
-)
-def test_BestSplit(score, column, threshold, target_groups, default_is_left):
-    score, score_okay = score
-    column, column_okay = column
-    threshold, threshold_okay = threshold
-    target_groups, target_groups_okay = target_groups
-    default_is_left, default_is_left_okay = default_is_left
-
-    is_okay = all(
-        [
-            score_okay,
-            column_okay,
-            threshold_okay,
-            target_groups_okay,
-            default_is_left_okay,
-        ]
-    )
-    is_bad = not is_okay
-
-    try:
-        # line to test
-        best = dtree.BestSplit(
-            score=score,
-            column=column,
-            threshold=threshold,
-            target_groups=target_groups,
-            default_is_left=default_is_left,
-        )
-    except ValidationError as ex:
-        if is_okay:
-            raise ex
-        else:
-            pytest.xfail("BestSplit validation failed as expected")
-    else:
-        if is_bad:
-            pytest.fail(
-                f"BestSplit validation did pass unexpectedly with {score=}, {column=}, {threshold=}, {target_groups=}, {score_okay=}, {column_okay=}, {threshold_okay=}, {target_groups_okay=}, {is_bad=}"
-            )
-
-        assert hasattr(best, "score")
-        assert hasattr(best, "column")
-        assert hasattr(best, "threshold")
-        assert hasattr(best, "target_groups")
-        assert hasattr(best, "default_is_left")
-
-
-class Test_select_thresholds:
-    """
-    bruteforce: returns all possible thresholds from the 2nd onward
-    random:
-    * returns a random subset of the thresholds if n_thresholds smaller than avaliable values
-    * is reproducible with random_state
-    quantile: returns num_quantile_steps thresholds which are ordered
-    uniform: returns single value between min and max
-    """
-
-    def test_bruteforce(self):
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.bruteforce
-        )
-        feature_values = np.linspace(-1, 1, 100)
-        rng = np.random.RandomState(42)
-
-        # line to test
-        thresholds = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert np.allclose(thresholds, feature_values[1:])
-
-    def test_random_when_to_few_values(self):
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.random, n_thresholds=1000
-        )
-        feature_values = np.linspace(-1, 1, 100)
-        rng = np.random.RandomState(42)
-
-        # line to test
-        thresholds = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert np.allclose(thresholds, feature_values[1:])
-
-    def test_random_when_enough_values(self):
-        n_thresholds = 10
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.random, n_thresholds=n_thresholds
-        )
-        feature_values = np.linspace(-1, 1, 100)
-        rng = np.random.RandomState(42)
-
-        # line to test
-        thresholds0 = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert thresholds0.shape == (n_thresholds,)
-        assert np.unique(thresholds0).shape == (n_thresholds,)
-
-    def test_random_reproducible(self):
-        n_thresholds = 10
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.random, n_thresholds=n_thresholds
-        )
-        feature_values = np.linspace(-1, 1, 100)
-
-        # line to test
-        rng = np.random.RandomState(42)
-        thresholds0 = dtree.select_thresholds(feature_values, params, rng=rng)
-        rng = np.random.RandomState(42)
-        thresholds1 = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert np.allclose(thresholds0, thresholds1)
-
-    def test_random_produces_changing_thresholds(self):
-        n_thresholds = 10
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.random, n_thresholds=n_thresholds
-        )
-        feature_values = np.linspace(-1, 1, 100)
-        rng = np.random.RandomState(42)
-
-        # line to test
-        thresholds0 = dtree.select_thresholds(feature_values, params, rng=rng)
-        thresholds1 = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert not np.allclose(thresholds0, thresholds1)
-
-    def test_quantile(self):
-        n_thresholds = 10
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.quantile,
-            n_thresholds=n_thresholds,
-            quantile=0.1,
-        )
-        feature_values = np.linspace(-1, 1, 100)
-        rng = np.random.RandomState(42)
-
-        # line to test
-        thresholds = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert thresholds.shape == (11,)
-        assert (thresholds[1:] > thresholds[:-1]).all()
-
-    def test_uniform(self):
-        n_thresholds = 10
-        params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.uniform, n_thresholds=n_thresholds
-        )
-        rng = np.random.RandomState(42)
-        feature_values = rng.normal(loc=0, scale=1, size=100)
-
-        # line to test
-        thresholds = dtree.select_thresholds(feature_values, params, rng=rng)
-
-        assert thresholds.shape == (1,)
-        assert thresholds[0] >= feature_values.min()
-        assert thresholds[0] <= feature_values.max()
-
-
-class Test_get_thresholds_and_target_groups:
-    """
-    * preduces a generator
-    * produces twice as many items to iterate in the case of missing values
-    * each item contains the current threshold, the target groups and a boolean that indicates the default direction
-    * the default direction is always None if there are no missing values and otherwise boolean
-    """
-
-    def test_produces_generator(self):
-        feature_values = np.linspace(-1, 1, 10)
-        threshold_params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.bruteforce
-        )
-        rng = np.random.RandomState(42)
-
-        # line to test
-        gen = dtree.get_thresholds_and_target_groups(
-            feature_values, threshold_params, rng=rng
-        )
-
-        assert isinstance(gen, types.GeneratorType)
-
-    def test_finite_only_case(self):
-        feature_values = np.linspace(-1, 1, 10)
-        threshold_params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.bruteforce
-        )
-        rng = np.random.RandomState(42)
-
-        # line to test
-        thresholds_and_target_groups = dtree.get_thresholds_and_target_groups(
-            feature_values, threshold_params, rng=rng
-        )
-
-        c = 0
-        for (
-            threshold,
-            target_groups,
-            default_direction_is_left,
-        ) in thresholds_and_target_groups:
-            assert isinstance(target_groups, np.ndarray)
-            assert threshold in feature_values[1:]
-            assert target_groups.dtype == bool
-            assert default_direction_is_left is None
-            c += 1
-
-        assert c == len(feature_values[1:])
-
-    def test_with_missing_case(self):
-        feature_values = np.linspace(-1, 1, 10)
-        feature_values[5] = np.nan
-        threshold_params = utils.ThresholdSelectionParameters(
-            method=ThresholdSelectionMethod.bruteforce
-        )
-        rng = np.random.RandomState(42)
-
-        thresholds_and_target_groups = dtree.get_thresholds_and_target_groups(
-            feature_values, threshold_params, rng=rng
-        )
-
-        # line to test
-        c = 0
-        for (
-            threshold,
-            target_groups,
-            default_direction_is_left,
-        ) in thresholds_and_target_groups:
-            assert isinstance(target_groups, np.ndarray)
-            assert threshold in feature_values[1:]
-            assert target_groups.dtype == bool
-            assert default_direction_is_left in [True, False]
-            c += 1
-
-        assert c == 2 * (len(feature_values[1:]) - 1)
-
-
-class Test_get_column:
-    """
-    * method ascending just returns ascending integer list for columns
-    * method random returns random integer list for columns
-    * method largest_delta returns column indices with largest feature max-min differences first
-    * if n_columns_to_try is given it is used to shorted the returned list
-    """
-
-    def test_ascending(self):
-        n_columns = 10
-        n_trials = None
-        column_params = utils.ColumnSelectionParameters(
-            method=utils.ColumnSelectionMethod.ascending, n_trials=n_trials
-        )
-        X = np.random.normal(size=(100, n_columns))
-        rng = np.random.RandomState(42)
-
-        # line to test
-        columns = dtree.get_column(X, column_params, rng=rng)
-
-        assert columns == list(range(n_columns))
-
-    def test_ascending_first_n_trials_columns(self):
-        n_columns = 10
-        n_trials = 5
-        column_params = utils.ColumnSelectionParameters(
-            method=utils.ColumnSelectionMethod.ascending, n_trials=n_trials
-        )
-        X = np.random.normal(size=(100, n_columns))
-        rng = np.random.RandomState(42)
-
-        # line to test
-        columns = dtree.get_column(X, column_params, rng=rng)
-
-        assert columns == list(range(n_trials))
-
-    def test_random(self):
-        n_columns = 10
-        n_trials = None
-        column_params = utils.ColumnSelectionParameters(
-            method=utils.ColumnSelectionMethod.random, n_trials=n_trials
-        )
-        X = np.random.normal(size=(100, n_columns))
-        rng = np.random.RandomState(42)
-
-        # line to test
-        columns = dtree.get_column(X, column_params, rng=rng)
-
-        assert not all([i0 < i1 for i0, i1 in zip(columns[:-1], columns[1:])])
-        assert sorted(columns) == list(range(n_columns))
-
-    def test_random_is_reproducible(self):
-        n_columns = 10
-        n_trials = None
-        column_params = utils.ColumnSelectionParameters(
-            method=utils.ColumnSelectionMethod.random, n_trials=n_trials
-        )
-        X = np.random.normal(size=(100, n_columns))
-
-        # line to test
-        rng = np.random.RandomState(42)
-        columns0 = dtree.get_column(X, column_params, rng=rng)
-        rng = np.random.RandomState(42)
-        columns1 = dtree.get_column(X, column_params, rng=rng)
-
-        assert columns0 == columns1
-
-    def test_largest_delta(self):
-        n_columns = 5
-        n_trials = None
-        column_params = utils.ColumnSelectionParameters(
-            method=utils.ColumnSelectionMethod.largest_delta, n_trials=n_trials
-        )
-        rng = np.random.RandomState(42)
-        X = np.array([[0, 0.001], [0, 0.01], [0, 0.1], [0, 1.0], [0, 10.0]]).T
-
-        n_repetitions = 100
-        all_columns = np.zeros((n_repetitions, n_columns), dtype=int)
-
-        for i in range(n_repetitions):
-            # line to test
-            all_columns[i, :] = dtree.get_column(X, column_params, rng=rng)
-
-        assert np.allclose(stats.mode(all_columns, axis=0).mode, [4, 3, 2, 1, 0])
-
-
-class Test_find_best_split:
-    """
-    cases to test for all measure_name values:
-    * simple & 1d is split as expected
-        * classification: y = 1 class, y = 2 classes, y = 3 classes
-        * regression: y = 1 value, y = 2 values, y = 3 values where 2 are more similar
-    * simple & 2d is split as expected
-        * same as 1d but 1st column useless and 2nd contains the needed info
-    """
-
-    X_1D = np.array(
-        [
-            [
-                1,
-            ],
-            [
-                2,
-            ],
-            [
-                3,
-            ],
-            [
-                4,
-            ],
-        ]
-    )
-
-    X_1D_missing = np.array(
-        [
-            [
-                1,
-            ],
-            [
-                np.nan,
-            ],
-            [
-                3,
-            ],
-            [
-                4,
-            ],
-        ]
-    )
-
-    X_2D = np.hstack((np.ones_like(X_1D), X_1D))
-    X_2D_missing = np.hstack((np.ones_like(X_1D_missing), X_1D_missing))
-
-    y_1class = np.ones(X_1D.shape[0], dtype=bool)
-    y_2class = np.array([False, False, True, True])
-    y_3class = np.array([0, 0, 1, 2])
-
-    y_1reg = np.ones(X_1D.shape[0])
-    y_2reg = np.array([-1.0, -1.0, 1.0, 1.0])
-    y_3reg = np.array([-1.0, -0.9, 1.0, 2.0])
-
-    # xgboost - least squares
-    g_1reg = np.array([0.0, 0.0, 0.0, 0.0])
-    g_2reg = np.array([-1.0, -1.0, 1.0, 1.0])
-    g_3reg = np.array([-1.275, -1.175, 0.725, 1.725])
-
-    h_1reg = np.array([-1.0, -1.0, -1.0, -1.0])
-    h_2reg = np.array([-1.0, -1.0, -1.0, -1.0])
-    h_3reg = np.array([-1.0, -1.0, -1.0, -1.0])
-
-    # xgboost - binomial log-likelihood
-    g_2class = np.array([-1.0, -1.0, 1.0, 1.0])
-    h_2class = np.array([-1.0, -1.0, -1.0, -1.0])
-
-    @pytest.mark.parametrize(
-        "y,ix,measure_name,g,h",
-        [
-            (y_1class, None, "gini", None, None),
-            (y_2class, 2, "gini", None, None),
-            (y_3class, 2, "gini", None, None),
-            (y_1class, None, "entropy", None, None),
-            (y_2class, 2, "entropy", None, None),
-            (y_3class, 2, "entropy", None, None),
-            (y_1reg, None, "variance", None, None),
-            (y_2reg, 2, "variance", None, None),
-            (y_3reg, 2, "variance", None, None),
-            (y_1reg, None, "xgboost", g_1reg, h_1reg),
-            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
-            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
-            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
-            (y_2class, 2, "xgboost", g_2class, h_2class),
-            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
-        ],
-    )
-    def test_1d(
-        self,
-        y: np.ndarray,
-        ix: int,
-        measure_name: str,
-        g: np.ndarray,
-        h: np.ndarray,
-    ):
-        is_homogenous = len(np.unique(y)) == 1
-        grow_params = utils.TreeGrowthParameters(max_depth=2)
-        try:
-            # line to test
-            best = dtree.find_best_split(
-                self.X_1D,
-                y,
-                measure_name=measure_name,
-                g=g,
-                h=h,
-                growth_params=grow_params,
-            )
-        except ValueError as ex:
-            if is_homogenous:
-                pytest.xfail("Splitting a homogneous y failed as expected")
-            else:
-                raise ex
-        else:
-            if is_homogenous:
-                pytest.fail("Splitting a homogneous y passed unexpectedly")
-
-            threshold_exp = float(self.X_1D[ix, 0])
-            assert best.threshold == threshold_exp
-
-    @pytest.mark.parametrize(
-        "y,ix,measure_name,g,h",
-        [
-            (y_1class, None, "gini", None, None),
-            (y_2class, 2, "gini", None, None),
-            (y_3class, 2, "gini", None, None),
-            (y_1class, None, "entropy", None, None),
-            (y_2class, 2, "entropy", None, None),
-            (y_3class, 2, "entropy", None, None),
-            (y_1reg, None, "variance", None, None),
-            (y_2reg, 2, "variance", None, None),
-            (y_3reg, 2, "variance", None, None),
-            (y_1reg, None, "xgboost", g_1reg, h_1reg),
-            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
-            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
-            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
-            (y_2class, 2, "xgboost", g_2class, h_2class),
-            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
-        ],
-    )
-    def test_1d_missing(
-        self,
-        y: np.ndarray,
-        ix: int,
-        measure_name: str,
-        g: np.ndarray,
-        h: np.ndarray,
-    ):
-        is_homogenous = len(np.unique(y)) == 1
-        grow_params = utils.TreeGrowthParameters(max_depth=2)
-        try:
-            # line to test
-            best = dtree.find_best_split(
-                self.X_1D_missing,
-                y,
-                measure_name=measure_name,
-                g=g,
-                h=h,
-                growth_params=grow_params,
-            )
-        except ValueError as ex:
-            if is_homogenous:
-                pytest.xfail("Splitting a homogneous y failed as expected")
-            else:
-                raise ex
-        else:
-            if is_homogenous:
-                pytest.fail("Splitting a homogneous y passed unexpectedly")
-
-            threshold_exp = float(self.X_1D_missing[ix, 0])
-            assert best.threshold == threshold_exp
-
-    @pytest.mark.parametrize(
-        "y,ix,measure_name,g,h",
-        [
-            (y_1class, None, "gini", None, None),
-            (y_2class, 2, "gini", None, None),
-            (y_3class, 2, "gini", None, None),
-            (y_1class, None, "entropy", None, None),
-            (y_2class, 2, "entropy", None, None),
-            (y_3class, 2, "entropy", None, None),
-            (y_1reg, None, "variance", None, None),
-            (y_2reg, 2, "variance", None, None),
-            (y_3reg, 2, "variance", None, None),
-            (y_1reg, None, "xgboost", g_1reg, h_1reg),
-            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
-            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
-            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
-            (y_2class, 2, "xgboost", g_2class, h_2class),
-            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
-        ],
-    )
-    def test_2d(
-        self,
-        y: np.ndarray,
-        ix: int,
-        measure_name: str,
-        g: np.ndarray,
-        h: np.ndarray,
-    ):
-        is_homogenous = len(np.unique(y)) == 1
-        growth_params = utils.TreeGrowthParameters(max_depth=2)
-        try:
-            # line to test
-            best = dtree.find_best_split(
-                self.X_2D,
-                y,
-                measure_name,
-                g=g,
-                h=h,
-                growth_params=growth_params,
-            )
-        except ValueError as ex:
-            if is_homogenous:
-                pytest.xfail("Splitting a homogneous y failed as expected")
-            else:
-                raise ex
-        else:
-            if is_homogenous:
-                pytest.fail("Splitting a homogneous y passed unexpectedly")
-
-            assert best.column == 1
-            threshold_exp = float(self.X_2D[ix, 1])
-            assert best.threshold == threshold_exp
-
-    @pytest.mark.parametrize(
-        "y,ix,measure_name,g,h",
-        [
-            (y_1class, None, "gini", None, None),
-            (y_2class, 2, "gini", None, None),
-            (y_3class, 2, "gini", None, None),
-            (y_1class, None, "entropy", None, None),
-            (y_2class, 2, "entropy", None, None),
-            (y_3class, 2, "entropy", None, None),
-            (y_1reg, None, "variance", None, None),
-            (y_2reg, 2, "variance", None, None),
-            (y_3reg, 2, "variance", None, None),
-            (y_1reg, None, "xgboost", g_1reg, h_1reg),
-            (y_2reg, 2, "xgboost", g_2reg, h_2reg),
-            (y_3reg, 2, "xgboost", g_3reg, h_3reg),
-            # (y_1class, None, "xgboost", g_1class, h_1class), # currently not handled
-            (y_2class, 2, "xgboost", g_2class, h_2class),
-            # (y_3class, 2, "xgboost", g_3class, h_3class), # currently not handled
-        ],
-    )
-    def test_2d_missing(
-        self,
-        y: np.ndarray,
-        ix: int,
-        measure_name: str,
-        g: np.ndarray,
-        h: np.ndarray,
-    ):
-        is_homogenous = len(np.unique(y)) == 1
-        growth_params = utils.TreeGrowthParameters(max_depth=2)
-        try:
-            # line to test
-            best = dtree.find_best_split(
-                self.X_2D_missing,
-                y,
-                measure_name,
-                g=g,
-                h=h,
-                growth_params=growth_params,
-            )
-        except ValueError as ex:
-            if is_homogenous:
-                pytest.xfail("Splitting a homogneous y failed as expected")
-            else:
-                raise ex
-        else:
-            if is_homogenous:
-                pytest.fail("Splitting a homogneous y passed unexpectedly")
-
-            assert best.column == 1
-            threshold_exp = float(self.X_2D_missing[ix, 1])
-            assert best.threshold == threshold_exp
-
-
-@pytest.mark.parametrize(
-    "best,parent_node,growth_params,is_no_sensible_split_exp",
-    [
-        # parent is None #1
-        (
-            dtree.BestSplit(
-                score=-1.0, column=0, threshold=0.0, target_groups=np.array([])
-            ),
-            None,
-            utils.TreeGrowthParameters(max_depth=2),
-            False,
-        ),
-        # parent is None #2
-        (
-            dtree.BestSplit(
-                score=-1.0, column=0, threshold=0.0, target_groups=np.array([])
-            ),
-            dtree.Node(measure=dtree.SplitScore("bla")),
-            utils.TreeGrowthParameters(max_depth=2),
-            False,
-        ),
-        # split is sufficient
-        (
-            dtree.BestSplit(
-                score=-1.0,
-                column=0,
-                threshold=0.0,
-                target_groups=np.array([False, True]),
-            ),
-            dtree.Node(measure=dtree.SplitScore("bla", value=-1.1)),
-            utils.TreeGrowthParameters(max_depth=2, min_improvement=0.01),
-            False,
-        ),
-        # split is insufficient - because min gain not exceeded
-        (
-            dtree.BestSplit(
-                score=-1.0,
-                column=0,
-                threshold=0.0,
-                target_groups=np.array([False, True]),
-            ),
-            dtree.Node(measure=dtree.SplitScore("bla", value=-1.1)),
-            utils.TreeGrowthParameters(max_depth=2, min_improvement=0.2),
-            True,
-        ),
-        # split is insufficient - because all items sorted left
-        (
-            dtree.BestSplit(
-                score=-1.0,
-                column=0,
-                threshold=0.0,
-                target_groups=np.array([True, True]),
-            ),
-            dtree.Node(measure=dtree.SplitScore("bla", value=-1.1)),
-            utils.TreeGrowthParameters(max_depth=2, min_improvement=0.0),
-            True,
-        ),
-        # split is insufficient - because all items sorted right
-        (
-            dtree.BestSplit(
-                score=-1.0,
-                column=0,
-                threshold=0.0,
-                target_groups=np.array([False, False]),
-            ),
-            dtree.Node(measure=dtree.SplitScore("bla", value=-1.1)),
-            utils.TreeGrowthParameters(max_depth=2, min_improvement=0.0),
-            True,
-        ),
-    ],
-)
-def test_check_if_split_sensible(
-    best: dtree.BestSplit,
-    parent_node: dtree.Node,
-    growth_params: utils.TreeGrowthParameters,
-    is_no_sensible_split_exp: bool,
-):
-    # line to test
-    is_not_sensible_split, gain = dtree.check_if_split_sensible(
-        best, parent_node, growth_params
-    )
-
-    assert is_not_sensible_split == is_no_sensible_split_exp
-    if parent_node is None or parent_node.measure.value is None:  # type: ignore
-        assert gain is None
-
-
-def test_calc_leaf_weight_and_split_score():
-    # calls leafweights.calc_leaf_weight and scoreing.SplitScoreMetrics
-    # and returns two floats
-    y = np.array([True, True, False])
-    measure_name = scoring.MetricNames.gini
-    growth_params = utils.TreeGrowthParameters(max_depth=2)
-    g = np.array([1, 2, 3])
-    h = np.array([4, 5, 6])
-    leaf_weight_exp = 1.0
-    score_exp = 42.0
-    with (
-        patch(
-            "random_tree_models.decisiontree.leafweights.calc_leaf_weight",
-            return_value=leaf_weight_exp,
-        ) as mock_calc_leaf_weight,
-        patch(
-            "random_tree_models.decisiontree.scoring.calc_split_score",
-            return_value=score_exp,
-        ) as mock_SplitScoreMetrics,
-    ):
-        # line to test
-        leaf_weight, split_score = dtree.calc_leaf_weight_and_split_score(
-            y, measure_name, growth_params, g, h
-        )
-
-    assert leaf_weight == leaf_weight_exp
-    assert split_score == score_exp
-    assert mock_calc_leaf_weight.call_count == 1
-    assert mock_SplitScoreMetrics.call_count == 1
-
-
-@pytest.mark.parametrize("go_left", [True, False])
-def test_select_arrays_for_child_node(go_left: bool):
-    best = dtree.BestSplit(
-        score=1.0,
-        column=0,
-        threshold=2.0,
-        target_groups=np.array([True, True, False]),
-    )
-
-    X = np.array([[1], [2], [3]])
-    y = np.array([True, True, False])
-    g = np.array([1, 2, 3])
-    h = np.array([4, 5, 6])
-
-    # line to test
-    _X, _y, _g, _h = dtree.select_arrays_for_child_node(
-        go_left=go_left,
-        best=best,
-        X=X,
-        y=y,
-        g=g,
-        h=h,
-    )
-    assert _g is not None
-    assert _h is not None
-    if go_left:
-        assert np.allclose(_X, X[:2])
-        assert np.allclose(_y, y[:2])
-        assert np.allclose(_g, g[:2])
-        assert np.allclose(_h, h[:2])
-    else:
-        assert np.allclose(_X, X[2:])
-        assert np.allclose(_y, y[2:])
-        assert np.allclose(_g, g[2:])
-        assert np.allclose(_h, h[2:])
-
-
-class Test_grow_tree:
-    X = np.array([[1], [2], [3]])
-    y = np.array([True, True, False])
-    target_groups = np.array([True, True, False])
-    measure_name = MetricNames.gini
-    depth_dummy = 0
-
-    def test_baselevel(self):
-        # test returned leaf node
-        growth_params = utils.TreeGrowthParameters(max_depth=2)
-        parent_node = None
-        is_baselevel = True
-        reason = "very custom leaf node comment"
-        with patch(
-            "random_tree_models.decisiontree.check_is_baselevel",
-            return_value=[is_baselevel, reason],
-        ) as mock_check_is_baselevel:
-            # line to test
-            leaf_node = dtree.grow_tree(
-                self.X,
-                self.y,
-                self.measure_name,
-                growth_params=growth_params,
-                parent_node=parent_node,
-                depth=self.depth_dummy,
-            )
-
-            mock_check_is_baselevel.assert_called_once()
-            assert leaf_node.is_leaf == True
-            assert leaf_node.reason == reason
-
-    def test_split_improvement_insufficient(self):
-        # test split improvement below minimum
-        growth_params = utils.TreeGrowthParameters(max_depth=2, min_improvement=0.2)
-        parent_score = -1.0
-        new_score = -0.9
-        best = dtree.BestSplit(
-            score=new_score,
-            column=0,
-            threshold=3.0,
-            target_groups=self.target_groups,
-        )
-        measure = dtree.SplitScore(self.measure_name, parent_score)
-        parent_node = dtree.Node(
-            array_column=0,
-            threshold=1.0,
-            prediction=0.9,
-            left=None,
-            right=None,
-            measure=measure,
-            n_obs=3,
-            reason="",
-        )
-        is_baselevel = False
-        leaf_reason = "very custom leaf node comment"
-        gain = new_score - parent_score
-        split_reason = f"gain due split ({gain=}) lower than {growth_params.min_improvement=} or all data points assigned to one side (is left {best.target_groups.mean()=:.2%})"
-        with (
-            patch(
-                "random_tree_models.decisiontree.check_is_baselevel",
-                return_value=[is_baselevel, leaf_reason],
-            ) as mock_check_is_baselevel,
-            patch(
-                "random_tree_models.decisiontree.find_best_split",
-                return_value=best,
-            ) as mock_find_best_split,
-        ):
-            # line to test
-            node = dtree.grow_tree(
-                self.X,
-                self.y,
-                self.measure_name,
-                growth_params=growth_params,
-                parent_node=parent_node,
-                depth=self.depth_dummy,
-            )
-
-            mock_check_is_baselevel.assert_called_once()
-            mock_find_best_split.assert_called_once()
-            assert node.reason == split_reason
-            assert node.prediction == np.mean(self.y)
-            assert node.n_obs == len(self.y)
-
-    def test_split_improvement_sufficient(self):
-        # test split improvement above minumum, leading to two leaf nodes
-        growth_params = utils.TreeGrowthParameters(max_depth=2, min_improvement=0.0)
-        parent_score = -1.0
-        new_score = -0.9
-        best = dtree.BestSplit(
-            score=new_score,
-            column=0,
-            threshold=3.0,
-            target_groups=self.target_groups,
-        )
-        measure = dtree.SplitScore(self.measure_name, parent_score)
-        parent_node = dtree.Node(
-            array_column=0,
-            threshold=1.0,
-            prediction=0.9,
-            left=None,
-            right=None,
-            measure=measure,
-            n_obs=3,
-            reason="",
-        )
-
-        leaf_reason = "very custom leaf node comment"
-
-        with (
-            patch(
-                "random_tree_models.decisiontree.check_is_baselevel",
-                side_effect=[
-                    (False, "bla"),
-                    (True, leaf_reason),
-                    (True, leaf_reason),
-                ],
-            ) as mock_check_is_baselevel,
-            patch(
-                "random_tree_models.decisiontree.find_best_split",
-                side_effect=[best],
-            ) as mock_find_best_split,
-        ):
-            # line to test
-            tree = dtree.grow_tree(
-                self.X,
-                self.y,
-                self.measure_name,
-                growth_params=growth_params,
-                parent_node=parent_node,
-                depth=self.depth_dummy,
-            )
-
-            assert mock_check_is_baselevel.call_count == 3
-            assert mock_find_best_split.call_count == 1
-
-            # parent
-            assert tree.reason == ""
-            assert tree.prediction == np.mean(self.y)
-            assert tree.n_obs == len(self.y)
-            assert tree.is_leaf == False
-
-            # left leaf
-            assert tree.left is not None
-            assert tree.left.reason == leaf_reason
-            assert tree.left.prediction == 1.0
-            assert tree.left.n_obs == 2
-            assert tree.left.is_leaf == True
-
-            # right leaf
-            assert tree.right is not None
-            assert tree.right.reason == leaf_reason
-            assert tree.right.prediction == 0.0
-            assert tree.right.n_obs == 1
-            assert tree.right.is_leaf == True
-
-
-@pytest.mark.parametrize(
-    "x,exp",
-    [
-        (np.array([-1, -1]), 0.0),
-        (np.array([1, -1]), 1.0),
-        (np.array([1, 1]), 2.0),
-        (np.array([-1, 1]), 3.0),
-    ],
-)
-def test_find_leaf_node(x: np.ndarray, exp: float):
-    tree = dtree.Node(
-        array_column=0,
-        threshold=0.0,
-        left=dtree.Node(
-            array_column=1,
-            threshold=0.0,
-            left=dtree.Node(prediction=0.0),
-            right=dtree.Node(prediction=3.0),
-        ),
-        right=dtree.Node(
-            array_column=1,
-            threshold=0.0,
-            left=dtree.Node(prediction=1.0),
-            right=dtree.Node(prediction=2.0),
-        ),
-    )
-    # line to test
-    leaf = dtree.find_leaf_node(tree, x)
-
-    assert leaf.prediction == exp
-
-
-def test_predict_with_tree():
-    X = np.array(
-        [
-            [-1.0, -1.0],
-            [1.0, -1.0],
-            [1.0, 1.0],
-            [-1.0, 1.0],
-        ]
-    )
-    tree = dtree.Node(
-        array_column=0,
-        threshold=0.0,
-        left=dtree.Node(
-            array_column=1,
-            threshold=0.0,
-            left=dtree.Node(prediction=0.0),
-            right=dtree.Node(prediction=3.0),
-        ),
-        right=dtree.Node(
-            array_column=1,
-            threshold=0.0,
-            left=dtree.Node(prediction=1.0),
-            right=dtree.Node(prediction=2.0),
-        ),
-    )
-
-    # line to test
-    predictions = dtree.predict_with_tree(tree, X)
-
-    assert np.allclose(predictions, np.arange(0, 4, 1))
-
-
-class TestDecisionTreeTemplate:
-    model = dtree.DecisionTreeTemplate(measure_name=MetricNames.entropy)
-    X = np.random.normal(size=(100, 10))
-    y = np.random.normal(size=(100,))
-
-    def test_tree_(self):
-        assert not hasattr(self.model, "tree_")
-
-    def test_growth_params_(self):
-        assert not hasattr(self.model, "growth_params_")
-
-        self.model._organize_growth_parameters()
-        assert isinstance(self.model.growth_params_, utils.TreeGrowthParameters)
-
-    def test_fit(self):
-        try:
-            self.model.fit(None, None)  # type: ignore
-        except NotImplementedError as ex:
-            pytest.xfail("DecisionTreeTemplate.fit expectedly refused call")
-
-    def test_predict(self):
-        try:
-            self.model.predict(None)  # type: ignore
-        except NotImplementedError as ex:
-            pytest.xfail("DecisionTreeTemplate.predict expectedly refused call")
-
-    def test_select_samples_and_features_no_sampling(self):
-        self.model.frac_features = 1.0
-        self.model.frac_subsamples = 1.0
-        self.model._organize_growth_parameters()
-
-        # line to test
-        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
-
-        assert np.allclose(X, self.X)
-        assert np.allclose(y, self.y)
-        assert np.allclose(ix_features, np.arange(0, self.X.shape[1], 1))
-
-    def test_select_samples_and_features_with_column_sampling(self):
-        self.model.frac_features = 0.5
-        self.model.frac_subsamples = 1.0
-        self.model._organize_growth_parameters()
-
-        # line to test
-        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
-
-        assert np.isclose(
-            X.shape[1], self.X.shape[1] * self.model.frac_features, atol=1
-        )
-        assert np.isclose(y.shape[0], self.y.shape[0])
-        assert all([ix in np.arange(0, self.X.shape[1], 1) for ix in ix_features])
-
-    def test_select_samples_and_features_with_row_sampling(self):
-        self.model.frac_features = 1.0
-        self.model.frac_subsamples = 0.5
-        self.model._organize_growth_parameters()
-
-        # line to test
-        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
-
-        assert np.isclose(X.shape[0], self.X.shape[0] * self.model.frac_subsamples)
-        assert np.isclose(y.shape[0], self.y.shape[0] * self.model.frac_subsamples)
-        assert np.allclose(ix_features, np.arange(0, self.X.shape[1], 1))
-
-    def test_select_samples_and_features_with_column_and_row_sampling(self):
-        self.model.frac_features = 0.5
-        self.model.frac_subsamples = 0.5
-        self.model._organize_growth_parameters()
-
-        # line to test
-        X, y, ix_features = self.model._select_samples_and_features(self.X, self.y)
-
-        assert np.isclose(
-            X.shape[1], self.X.shape[1] * self.model.frac_features, atol=1
-        )
-        assert np.isclose(X.shape[0], self.X.shape[0] * self.model.frac_subsamples)
-        assert np.isclose(y.shape[0], self.y.shape[0] * self.model.frac_subsamples)
-        assert all([ix in np.arange(0, self.X.shape[1], 1) for ix in ix_features])
-
-    def test_select_samples_and_features_sampling_reproducibility(self):
-        self.model.frac_features = 0.5
-        self.model.frac_subsamples = 0.5
-        self.model._organize_growth_parameters()
-
-        # line to test
-        X0, y0, ix_features0 = self.model._select_samples_and_features(self.X, self.y)
-        X1, y1, ix_features1 = self.model._select_samples_and_features(self.X, self.y)
-
-        assert np.allclose(X0, X1)
-        assert np.allclose(y0, y1)
-        assert np.allclose(ix_features0, ix_features1)
-
-    def test_select_features(self):
-        ix_features = np.arange(0, self.X.shape[1], 1)
-        _X = self.model._select_features(self.X, ix_features)
-        assert np.allclose(_X, self.X)
-
-        ix_features = np.array([0, 1, 2])
-        _X = self.model._select_features(self.X, ix_features)
-        assert _X.shape[1] == 3
-
-
-class TestDecisionTreeRegressor:
-    model = dtree.DecisionTreeRegressor()
-
-    X = np.array(
-        [
-            [-1, -1],
-            [1, -1],
-            [1, 1],
-            [-1, 1],
-        ]
-    )
-    y = np.array([0.0, 0.0, 1.0, 1.0])
-
-    def test_fit(self):
-        model = dtree.DecisionTreeRegressor()
-        model.fit(self.X, self.y)
-        assert isinstance(model.tree_, dtree.Node)
-
-    def test_predict(self):
-        model = dtree.DecisionTreeRegressor()
-        model.fit(self.X, self.y)
-        predictions = model.predict(self.X)
-        assert np.allclose(predictions, self.y)
-
-
-class TestDecisionTreeClassifier:
-    model = dtree.DecisionTreeClassifier()
-
-    X = np.array(
-        [
-            [-1, -1],
-            [1, -1],
-            [1, 1],
-            [-1, 1],
-        ]
-    )
-    y = np.array([False, False, True, True])
-
-    def test_classes_(self):
-        assert not hasattr(self.model, "classes_")
-
-    def test_fit(self):
-        model = dtree.DecisionTreeClassifier()
-        model.fit(self.X, self.y)
-        assert not hasattr(self.model, "classes_")
-        assert isinstance(model.tree_, dtree.Node)
-
-    def test_predict(self):
-        model = dtree.DecisionTreeClassifier()
-        model.fit(self.X, self.y)
-        predictions = model.predict(self.X)
-        assert (predictions == self.y).all()
-
-
-@parametrize_with_checks(
-    [dtree.DecisionTreeRegressor(), dtree.DecisionTreeClassifier()],
-    expected_failed_checks=expected_failed_checks,  # type: ignore
-)
-def test_dtree_estimators_with_sklearn_checks(estimator, check):
-    """Test of estimators using scikit-learn test suite
-
-    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks
-    """
-
-    check(estimator)
diff --git a/tests/test_extratrees.py b/tests/test_extratrees.py
index 7bbce23..ed82edf 100644
--- a/tests/test_extratrees.py
+++ b/tests/test_extratrees.py
@@ -4,7 +4,7 @@
 
 import random_tree_models.decisiontree as dtree
 import random_tree_models.extratrees as et
-from random_tree_models.scoring import MetricNames
+from random_tree_models.params import MetricNames
 from tests.conftest import expected_failed_checks
 
 
diff --git a/tests/test_isolationforest.py b/tests/test_isolationforest.py
index a1d30a8..c127bc0 100644
--- a/tests/test_isolationforest.py
+++ b/tests/test_isolationforest.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 import random_tree_models.isolationforest as iforest
-from random_tree_models.utils import ThresholdSelectionMethod
+from random_tree_models.params import ThresholdSelectionMethod
 
 rng = np.random.RandomState(42)
 
diff --git a/tests/test_leafweights.py b/tests/test_leafweights.py
index d1f6b0d..2b03d68 100644
--- a/tests/test_leafweights.py
+++ b/tests/test_leafweights.py
@@ -2,8 +2,8 @@
 import pytest
 
 import random_tree_models.leafweights as leafweights
-import random_tree_models.utils as utils
-from random_tree_models.scoring import MetricNames
+import random_tree_models.params as utils
+from random_tree_models.params import MetricNames
 
 
 def test_leaf_weight_mean():
diff --git a/tests/test_scoring.py b/tests/test_scoring.py
index cb32b59..c0b9ca2 100644
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@@ -1,8 +1,8 @@
 import numpy as np
 import pytest
 
+import random_tree_models.params as utils
 import random_tree_models.scoring as scoring
-import random_tree_models.utils as utils
 from random_tree_models import rs_entropy, rs_gini_impurity
 
 
@@ -375,43 +375,41 @@ class TestSplitScoreMetrics:
     var_exp = -0.25
 
     def test_gini(self):
-        g = scoring.calc_split_score(
-            scoring.MetricNames.gini, self.y, self.target_groups
-        )
+        g = scoring.calc_split_score(utils.MetricNames.gini, self.y, self.target_groups)
         # g = scoring.SplitScoreMetrics["gini"](self.y, self.target_groups)
         assert g == self.g_exp
 
     def test_gini_rs(self):
         g = scoring.calc_split_score(
-            scoring.MetricNames.gini_rs, self.y, self.target_groups
+            utils.MetricNames.gini_rs, self.y, self.target_groups
         )
         # g = scoring.SplitScoreMetrics["gini_rs"](self.y, self.target_groups)
         assert g == self.g_exp
 
     def test_entropy(self):
         h = scoring.calc_split_score(
-            scoring.MetricNames.entropy, self.y, self.target_groups
+            utils.MetricNames.entropy, self.y, self.target_groups
         )
         # h = scoring.SplitScoreMetrics["entropy"](self.y, self.target_groups)
         assert h == self.h_exp
 
     def test_entropy_rs(self):
         h = scoring.calc_split_score(
-            scoring.MetricNames.entropy_rs, self.y, self.target_groups
+            utils.MetricNames.entropy_rs, self.y, self.target_groups
         )
         # h = scoring.SplitScoreMetrics["entropy_rs"](self.y, self.target_groups)
         assert h == self.h_exp
 
     def test_variance(self):
         var = scoring.calc_split_score(
-            scoring.MetricNames.variance, self.y, self.target_groups
+            utils.MetricNames.variance, self.y, self.target_groups
         )
         # var = scoring.SplitScoreMetrics["variance"](self.y, self.target_groups)
         assert var == self.var_exp
 
     def test_friedman_binary_classification(self):
         var = scoring.calc_split_score(
-            scoring.MetricNames.friedman_binary_classification,
+            utils.MetricNames.friedman_binary_classification,
             self.y,
             self.target_groups,
         )
diff --git a/tests/test_utils.py b/tests/test_utils.py
index dc78d71..5b143a0 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,26 +3,35 @@
 import pytest
 from pydantic import ValidationError
 
+import random_tree_models.params
 import random_tree_models.utils as utils
 
 
 def test_ColumnSelectionMethod():
     expected = ["ascending", "largest_delta", "random"]
-    assert list(utils.ColumnSelectionMethod.__members__.keys()) == expected
+    assert (
+        list(random_tree_models.params.ColumnSelectionMethod.__members__.keys())
+        == expected
+    )
 
 
 def test_ThresholdSelectionMethod():
     expected = ["bruteforce", "quantile", "random", "uniform"]
-    assert list(utils.ThresholdSelectionMethod.__members__.keys()) == expected
+    assert (
+        list(random_tree_models.params.ThresholdSelectionMethod.__members__.keys())
+        == expected
+    )
 
 
 # method, quantile, random_state, n_thresholds
 class TestThresholdSelectionParameters:
     def test_expected_okay(self):
-        params = utils.ThresholdSelectionParameters(
+        params = random_tree_models.params.ThresholdSelectionParameters(
             method="quantile", quantile=0.1, random_state=0, n_thresholds=100
         )
-        assert params.method == utils.ThresholdSelectionMethod.quantile
+        assert (
+            params.method == random_tree_models.params.ThresholdSelectionMethod.quantile
+        )
         assert params.quantile == 0.1
         assert params.random_state == 0
         assert params.n_thresholds == 100
@@ -30,7 +39,7 @@ def test_expected_okay(self):
 
     def test_method_fail(self):
         try:
-            _ = utils.ThresholdSelectionParameters(
+            _ = random_tree_models.params.ThresholdSelectionParameters(
                 method="wuppy", quantile=0.1, random_state=0, n_thresholds=100
             )
         except ValueError as ex:
@@ -44,7 +53,7 @@ def test_method_fail(self):
     )
     def test_quantile(self, q: float, fail: bool):
         try:
-            _ = utils.ThresholdSelectionParameters(
+            _ = random_tree_models.params.ThresholdSelectionParameters(
                 method="quantile", quantile=q, random_state=0, n_thresholds=100
             )
         except ValueError as ex:
@@ -66,7 +75,7 @@ def test_quantile(self, q: float, fail: bool):
     )
     def test_random_state(self, random_state: int, fail: bool):
         try:
-            _ = utils.ThresholdSelectionParameters(
+            _ = random_tree_models.params.ThresholdSelectionParameters(
                 method="quantile",
                 quantile=0.1,
                 random_state=random_state,
@@ -95,7 +104,7 @@ def test_random_state(self, random_state: int, fail: bool):
     )
     def test_n_thresholds(self, n_thresholds: int, fail: bool):
         try:
-            _ = utils.ThresholdSelectionParameters(
+            _ = random_tree_models.params.ThresholdSelectionParameters(
                 method="quantile",
                 quantile=0.1,
                 random_state=42,
@@ -112,27 +121,31 @@ def test_n_thresholds(self, n_thresholds: int, fail: bool):
 
 
 def test_ColumnSelectionParameters():
-    params = utils.ColumnSelectionParameters(method="random", n_trials=10)
-    assert params.method == utils.ColumnSelectionMethod.random
+    params = random_tree_models.params.ColumnSelectionParameters(
+        method="random", n_trials=10
+    )
+    assert params.method == random_tree_models.params.ColumnSelectionMethod.random
     assert params.n_trials == 10
 
 
 class TestTreeGrowthParameters:
     def test_expected_okay(self):
-        params = utils.TreeGrowthParameters(
+        params = random_tree_models.params.TreeGrowthParameters(
             max_depth=10,
             min_improvement=0.0,
             lam=0.0,
             frac_subsamples=1.0,
             frac_features=1.0,
             random_state=0,
-            threshold_params=utils.ThresholdSelectionParameters(
+            threshold_params=random_tree_models.params.ThresholdSelectionParameters(
                 method="quantile",
                 quantile=0.1,
                 random_state=0,
                 n_thresholds=100,
             ),
-            column_params=utils.ColumnSelectionParameters(method="random", n_trials=10),
+            column_params=random_tree_models.params.ColumnSelectionParameters(
+                method="random", n_trials=10
+            ),
         )
         assert params.max_depth == 10
         assert params.min_improvement == 0.0
@@ -140,8 +153,13 @@ def test_expected_okay(self):
         assert params.frac_subsamples == 1.0
         assert params.frac_features == 1.0
         assert params.random_state == 0
-        assert isinstance(params.threshold_params, utils.ThresholdSelectionParameters)
-        assert isinstance(params.column_params, utils.ColumnSelectionParameters)
+        assert isinstance(
+            params.threshold_params,
+            random_tree_models.params.ThresholdSelectionParameters,
+        )
+        assert isinstance(
+            params.column_params, random_tree_models.params.ColumnSelectionParameters
+        )
 
     @pytest.mark.parametrize(
         "frac_subsamples,fail",
@@ -155,7 +173,7 @@ def test_expected_okay(self):
     )
     def test_frac_subsamples(self, frac_subsamples: float, fail: bool):
         try:
-            _ = utils.TreeGrowthParameters(
+            _ = random_tree_models.params.TreeGrowthParameters(
                 max_depth=10,
                 frac_subsamples=frac_subsamples,
             )
@@ -180,7 +198,7 @@ def test_frac_subsamples(self, frac_subsamples: float, fail: bool):
     )
     def test_frac_features(self, frac_features: float, fail: bool):
         try:
-            _ = utils.TreeGrowthParameters(
+            _ = random_tree_models.params.TreeGrowthParameters(
                 max_depth=10,
                 frac_features=frac_features,
             )
@@ -195,7 +213,7 @@ def test_frac_features(self, frac_features: float, fail: bool):
 
     def test_fail_if_max_depth_missing(self):
         with pytest.raises(ValidationError):
-            _ = utils.TreeGrowthParameters()  # type: ignore
+            _ = random_tree_models.params.TreeGrowthParameters()  # type: ignore
 
 
 def test_get_logger():
diff --git a/uv.lock b/uv.lock
index b5eb83e..c61c2f2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -699,6 +699,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
 ]
 
+[[package]]
+name = "dirty-equals"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/99/133892f401ced5a27e641a473c547d5fbdb39af8f85dac8a9d633ea3e7a7/dirty_equals-0.9.0.tar.gz", hash = "sha256:17f515970b04ed7900b733c95fd8091f4f85e52f1fb5f268757f25c858eb1f7b", size = 50412, upload-time = "2025-01-11T23:23:40.491Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/0c/03cc99bf3b6328604b10829de3460f2b2ad3373200c45665c38508e550c6/dirty_equals-0.9.0-py3-none-any.whl", hash = "sha256:ff4d027f5cfa1b69573af00f7ba9043ea652dbdce3fe5cbe828e478c7346db9c", size = 28226, upload-time = "2025-01-11T23:23:37.489Z" },
+]
+
 [[package]]
 name = "distlib"
 version = "0.4.0"
@@ -877,6 +886,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
 ]
 
+[[package]]
+name = "inline-snapshot"
+version = "0.27.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asttokens" },
+    { name = "executing" },
+    { name = "pytest" },
+    { name = "rich" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/93/3caece250cdf267fcb39e6a82ada0e7e8e8fb37207331309dbf6865d7497/inline_snapshot-0.27.2.tar.gz", hash = "sha256:5ecc7ccfdcbf8d9273d3fa9fb55b829720680ef51bb1db12795fd1b0f4a3783c", size = 347133, upload-time = "2025-08-11T07:49:55.134Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/7f/9e41fd793827af8cbe812fff625d62b3b47603d62145b718307ef4e381eb/inline_snapshot-0.27.2-py3-none-any.whl", hash = "sha256:7c11f78ad560669bccd38d6d3aa3ef33d6a8618d53bd959019dca3a452272b7e", size = 68004, upload-time = "2025-08-11T07:49:53.904Z" },
+]
+
 [[package]]
 name = "ipykernel"
 version = "6.30.1"
@@ -2717,6 +2742,8 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "dirty-equals" },
+    { name = "inline-snapshot" },
     { name = "ipywidgets" },
     { name = "jupyter-contrib-nbextensions" },
     { name = "jupyterlab" },
@@ -2732,6 +2759,8 @@ nb = [
     { name = "jupyterlab" },
 ]
 test = [
+    { name = "dirty-equals" },
+    { name = "inline-snapshot" },
     { name = "pytest" },
 ]
 
@@ -2746,6 +2775,8 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "dirty-equals", specifier = ">=0.9.0" },
+    { name = "inline-snapshot", specifier = ">=0.27.2" },
     { name = "ipywidgets", specifier = ">=8.0.6" },
     { name = "jupyter-contrib-nbextensions", specifier = ">=0.7.0" },
     { name = "jupyterlab", specifier = ">=4.0.0" },
@@ -2760,7 +2791,11 @@ nb = [
     { name = "jupyter-contrib-nbextensions", specifier = ">=0.7.0" },
     { name = "jupyterlab", specifier = ">=4.0.0" },
 ]
-test = [{ name = "pytest", specifier = ">=7.3.1" }]
+test = [
+    { name = "dirty-equals", specifier = ">=0.9.0" },
+    { name = "inline-snapshot", specifier = ">=0.27.2" },
+    { name = "pytest", specifier = ">=7.3.1" },
+]
 
 [[package]]
 name = "referencing"