From afad757fad4fbb3a06c51e21503ae31e40714919 Mon Sep 17 00:00:00 2001
From: eschmidt42 <11818904+eschmidt42@users.noreply.github.com>
Date: Mon, 18 Aug 2025 15:32:11 +0200
Subject: [PATCH] feat: polished xgboost.py a bit to use similar naming as
 gradientboostedtrees.py, also got rid of some redundant code that both
 modules should share, re-located re-used functions to gradient.py and
 transform.py, added tests

---
 src/random_tree_models/gradient.py            |  72 ++++++++
 .../models/gradientboostedtrees.py            |  60 ++-----
 src/random_tree_models/models/xgboost.py      | 104 +++++-------
 src/random_tree_models/transform.py           |  21 +++
 src/random_tree_models/utils.py               |  15 --
 tests/models/test_gradientboostedtrees.py     |  84 ----------
 tests/models/test_xgboost.py                  | 156 ++++++++----------
 tests/test_gradient.py                        | 147 +++++++++++++++++
 tests/test_transform.py                       |  63 +++++++
 tests/test_utils.py                           |  43 -----
 10 files changed, 426 insertions(+), 339 deletions(-)
 create mode 100644 src/random_tree_models/gradient.py
 create mode 100644 src/random_tree_models/transform.py
 create mode 100644 tests/test_gradient.py
 create mode 100644 tests/test_transform.py

diff --git a/src/random_tree_models/gradient.py b/src/random_tree_models/gradient.py
new file mode 100644
index 0000000..e7846c7
--- /dev/null
+++ b/src/random_tree_models/gradient.py
@@ -0,0 +1,72 @@
+import math
+
+import numpy as np
+
+
+def check_y_float(y_float: np.ndarray):
+    # expects y_float to consist only of the values -1 and 1
+    unexpected_values = np.abs(y_float) != 1
+    if np.sum(unexpected_values) > 0:
+        raise ValueError(
+            f"expected y_float to contain only -1 and 1, got {y_float[unexpected_values]}"
+        )
+
+
+def get_pseudo_residual_mse(
+    y: np.ndarray, current_estimates: np.ndarray, second_order: bool
+) -> tuple[np.ndarray, np.ndarray | None]:
+    """
+    mse loss = sum_i (y_i - estimate_i)^2
+    pseudo residual_i = d mse loss(y,estimate) / d estimate_i = - (y_i - estimate_i)
+    since we want to apply it as the negative gradient for steepest descent we flip the sign
+    """
+    first_derivative = y - current_estimates
+
+    second_derivative = None
+    if second_order:
+        second_derivative = -1 * np.ones_like(first_derivative)
+
+    return first_derivative, second_derivative
+
+
+def get_pseudo_residual_log_odds(
+    y: np.ndarray, current_estimates: np.ndarray, second_order: bool
+) -> tuple[np.ndarray, np.ndarray | None]:
+    """
+    first derivative: d loss / d current_estimates, g in the xgboost paper
+    second derivative: d^2 loss / d current_estimates^2, h in the xgboost paper
+
+    """
+    check_y_float(y)
+
+    a = np.exp(2 * y * current_estimates)
+    first_derivative = 2 * y / (1 + a)
+
+    second_derivative = None
+    if second_order:
+        second_derivative = -(4 * y**2 * a / (1 + a) ** 2)
+
+    return first_derivative, second_derivative
+
+
+def get_start_estimate_mse(y: np.ndarray) -> float:
+    return float(np.mean(y))
+
+
+def get_start_estimate_log_odds(y: np.ndarray) -> float:
+    """
+    1/2 log(1+ym)/(1-ym) because ym is in [-1, 1]
+    equivalent to log(ym)/(1-ym) if ym were in [0, 1]
+    """
+    check_y_float(y)
+
+    ym = np.mean(y)
+
+    if ym == 1:
+        return math.inf
+    elif ym == -1:
+        return -math.inf
+
+    start_estimate = 0.5 * math.log((1 + ym) / (1 - ym))
+
+    return start_estimate
diff --git a/src/random_tree_models/models/gradientboostedtrees.py b/src/random_tree_models/models/gradientboostedtrees.py
index 9161ddf..0e60725 100644
--- a/src/random_tree_models/models/gradientboostedtrees.py
+++ b/src/random_tree_models/models/gradientboostedtrees.py
@@ -1,4 +1,3 @@
-import math
 import typing as T
 
 import numpy as np
@@ -14,11 +13,20 @@
     validate_data,  # type: ignore
 )
 
+from random_tree_models.gradient import (
+    get_pseudo_residual_log_odds,
+    get_pseudo_residual_mse,
+    get_start_estimate_log_odds,
+    get_start_estimate_mse,
+)
 from random_tree_models.models.decisiontree import (
     DecisionTreeRegressor,
 )
 from random_tree_models.params import MetricNames, is_greater_zero
-from random_tree_models.utils import vectorize_bool_to_float
+from random_tree_models.transform import (
+    get_probabilities_from_mapped_bools,
+    vectorize_bool_to_float,
+)
 
 
 class GradientBoostedTreesTemplate(base.BaseEstimator):
@@ -54,42 +62,6 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         raise NotImplementedError()
 
 
-def get_pseudo_residual_mse(y: np.ndarray, current_estimates: np.ndarray) -> np.ndarray:
-    """
-    mse loss = sum_i (y_i - estimate_i)^2
-    pseudo residual_i = d mse loss(y,estimate) / d estimate_i = - (y_i - estimate_i)
-    since we want to apply it as the negative gradient for steepest descent we flip the sign
-    """
-    return y - current_estimates
-
-
-def get_pseudo_residual_log_odds(
-    y: np.ndarray, current_estimates: np.ndarray
-) -> np.ndarray:
-    """
-    # dloss/dyhat, g in the xgboost paper
-    """
-    return 2 * y / (1 + np.exp(2 * y * current_estimates))
-
-
-def get_start_estimate_mse(y: np.ndarray) -> float:
-    return float(np.mean(y))
-
-
-def get_start_estimate_log_odds(y: np.ndarray) -> float:
-    """
-    1/2 log(1+ym)/(1-ym) because ym is in [-1, 1]
-    equivalent to log(ym)/(1-ym) if ym were in [0, 1]
-    """
-    ym = np.mean(y)
-    if ym == 1:
-        return math.inf
-    elif ym == -1:
-        return -math.inf
-    start_estimate = 0.5 * math.log((1 + ym) / (1 - ym))
-    return start_estimate
-
-
 def find_step_size(
     y: np.ndarray, current_estimates: np.ndarray, h: np.ndarray
 ) -> float:
@@ -112,12 +84,6 @@ def loss(gamma: float) -> float:
         return 1.0
 
 
-def get_probabilities_from_mapped_bools(h: np.ndarray) -> np.ndarray:
-    proba = 1 / (1 + np.exp(-2.0 * h))
-    proba = np.array([1 - proba, proba]).T
-    return proba
-
-
 class GradientBoostedTreesRegressor(
     base.RegressorMixin,
     GradientBoostedTreesTemplate,
@@ -156,7 +122,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "GradientBoostedTreesRegressor":
         self.step_sizes_: list[float] = []
 
         for _ in track(range(self.n_trees), total=self.n_trees, description="tree"):
-            r = get_pseudo_residual_mse(y, current_estimates)
+            r, _ = get_pseudo_residual_mse(y, current_estimates, second_order=False)
 
             # train decision tree to predict differences
             new_tree = DecisionTreeRegressor(
@@ -311,7 +277,9 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "GradientBoostedTreesClassifier":
         self.step_sizes_: list[float] = []
 
         for _ in track(range(self.n_trees), description="tree", total=self.n_trees):
-            r = get_pseudo_residual_log_odds(y, current_estimates)
+            r, _ = get_pseudo_residual_log_odds(
+                y, current_estimates, second_order=False
+            )
 
             new_tree = DecisionTreeRegressor(
                 measure_name=self.measure_name,
diff --git a/src/random_tree_models/models/xgboost.py b/src/random_tree_models/models/xgboost.py
index 2d42d32..5e55fef 100644
--- a/src/random_tree_models/models/xgboost.py
+++ b/src/random_tree_models/models/xgboost.py
@@ -10,7 +10,6 @@
 * sparsity-aware split finding / "default direction" for missing values
 """
 
-import math
 import typing as T
 
 import numpy as np
@@ -26,9 +25,18 @@
     validate_data,  # type: ignore
 )
 
+from random_tree_models.gradient import (
+    get_pseudo_residual_log_odds,
+    get_pseudo_residual_mse,
+    get_start_estimate_log_odds,
+    get_start_estimate_mse,
+)
 from random_tree_models.models.decisiontree import DecisionTreeRegressor
 from random_tree_models.params import MetricNames, is_greater_zero
-from random_tree_models.utils import vectorize_bool_to_float
+from random_tree_models.transform import (
+    get_probabilities_from_mapped_bools,
+    vectorize_bool_to_float,
+)
 
 
 class XGBoostTemplate(base.BaseEstimator):
@@ -73,15 +81,6 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         raise NotImplementedError()
 
 
-def compute_derivatives_negative_least_squares(
-    y: np.ndarray, start_estimate: float
-) -> T.Tuple[np.ndarray, np.ndarray]:
-    "loss = - mean |y-yhat|^2"
-    g = y - start_estimate  # 1st order derivative
-    h = -1 * np.ones_like(g)  # 2nd order derivative
-    return g, h
-
-
 # TODO: add tests:
 # * X_hist is integer based
 # * X_hist has the same shape as X
@@ -144,14 +143,19 @@ class XGBoostRegressor(base.RegressorMixin, XGBoostTemplate):
     """
 
     def fit(self, X: np.ndarray, y: np.ndarray) -> "XGBoostRegressor":
-        X, y = validate_data(self, X, y, ensure_all_finite=False)
+        X, y = validate_data(self, X, y, ensure_all_finite=self.ensure_all_finite)
 
         self.trees_: list[DecisionTreeRegressor] = []
 
-        self.start_estimate_: float = float(np.mean(y))
+        self.start_estimate_ = get_start_estimate_mse(y)
 
         # initial differences to predict using negative squared error loss
-        g, h = compute_derivatives_negative_least_squares(y, self.start_estimate_)
+        current_estimates = self.start_estimate_ * np.ones_like(y)
+        g, h = get_pseudo_residual_mse(y, current_estimates, second_order=True)
+
+        if h is None:
+            raise ValueError(f"h cannot be None beyond this stage.")
+
         if self.use_hist:
             X_hist, all_x_bin_edges = xgboost_histogrammify_with_h(
                 X, h, n_bins=self.n_bins
@@ -179,7 +183,9 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "XGBoostRegressor":
     def predict(self, X: np.ndarray) -> np.ndarray:
         check_is_fitted(self, ("trees_", "n_features_in_", "start_estimate_"))
 
-        X = validate_data(self, X, reset=False, ensure_all_finite=False)
+        X = validate_data(
+            self, X, reset=False, ensure_all_finite=self.ensure_all_finite
+        )
 
         # baseline estimate
         y = np.ones(X.shape[0]) * self.start_estimate_
@@ -197,41 +203,6 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         return y
 
 
-def check_y_float(y_float: np.ndarray):
-    # expects y_float to consist only of the values -1 and 1
-    unexpected_values = np.abs(y_float) != 1
-    if np.sum(unexpected_values) > 0:
-        raise ValueError(
-            f"expected y_float to contain only -1 and 1, got {y_float[unexpected_values]}"
-        )
-
-
-def compute_start_estimate_binomial_loglikelihood(y_float: np.ndarray) -> float:
-    check_y_float(y_float)
-
-    ym = np.mean(y_float)
-    start_estimate = 0.5 * math.log((1 + ym) / (1 - ym))
-
-    return start_estimate
-
-
-def compute_derivatives_binomial_loglikelihood(
-    y_float: np.ndarray, yhat: np.ndarray
-) -> T.Tuple[np.ndarray, np.ndarray]:
-    "loss = - sum log(1+exp(2*y*yhat))"
-
-    check_y_float(y_float)
-
-    # differences to predict using binomial log-likelihood (yes, the negative of the negative :P)
-    exp_y_yhat = np.exp(2 * y_float * yhat)
-    g = 2 * y_float / (1 + exp_y_yhat)  # dloss/dyhat, g in the xgboost paper
-
-    # d^2loss/dyhat^2, h in the xgboost paper
-    h = -(4 * y_float**2 * exp_y_yhat / (1 + exp_y_yhat) ** 2)
-
-    return g, h
-
-
 class XGBoostClassifier(base.ClassifierMixin, XGBoostTemplate):
     """XGBoost classifier
 
@@ -246,7 +217,7 @@ def __sklearn_tags__(self):
         return tags
 
     def fit(self, X: np.ndarray, y: np.ndarray) -> "XGBoostClassifier":
-        X, y = validate_data(self, X, y, ensure_all_finite=False)
+        X, y = validate_data(self, X, y, ensure_all_finite=self.ensure_all_finite)
 
         check_classification_targets(y)
 
@@ -269,11 +240,16 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "XGBoostClassifier":
         y = vectorize_bool_to_float(y)
 
         # initial estimate
-        self.start_estimate_ = compute_start_estimate_binomial_loglikelihood(y)
-        yhat = np.ones_like(y) * self.start_estimate_
+        self.start_estimate_ = get_start_estimate_log_odds(y)
+        current_estimates = np.ones_like(y) * self.start_estimate_
 
         for _ in track(range(self.n_trees), description="tree", total=self.n_trees):
-            g, h = compute_derivatives_binomial_loglikelihood(y, yhat)
+            g, h = get_pseudo_residual_log_odds(
+                y, current_estimates, second_order=True
+            )  # compute_derivatives_binomial_loglikelihood(y, yhat)
+
+            if h is None:
+                raise ValueError(f"h cannot be None beyond this stage.")
 
             if self.use_hist:
                 _X, all_x_bin_edges = xgboost_histogrammify_with_h(
@@ -293,16 +269,17 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "XGBoostClassifier":
             new_tree.fit(_X, y, g=g, h=h)
             self.trees_.append(new_tree)
 
-            # update _y
-            yhat += new_tree.predict(X)
+            current_estimates += new_tree.predict(X)
 
         return self
 
     def predict_proba(self, X: np.ndarray) -> np.ndarray:
         check_is_fitted(self, ("trees_", "classes_", "gammas_", "n_features_in_"))
-        X = validate_data(self, X, reset=False, ensure_all_finite=False)
+        X = validate_data(
+            self, X, reset=False, ensure_all_finite=self.ensure_all_finite
+        )
 
-        g = np.ones(X.shape[0]) * self.start_estimate_
+        h = np.ones(X.shape[0]) * self.start_estimate_
 
         for boost, tree in track(
             enumerate(self.trees_), description="tree", total=len(self.trees_)
@@ -314,16 +291,15 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
             else:
                 _X = X
 
-            g += tree.predict(_X)
+            h += tree.predict(_X)
 
-        proba = 1 / (1 + np.exp(-2.0 * g))
-        proba = np.array([1 - proba, proba]).T
-        return proba
+        p = get_probabilities_from_mapped_bools(h)
+        return p
 
     def predict(self, X: np.ndarray) -> np.ndarray:
-        proba = self.predict_proba(X)
+        p = self.predict_proba(X)
 
-        ix = np.argmax(proba, axis=1)
+        ix = np.argmax(p, axis=1)
         y = self.classes_[ix]
 
         return y
diff --git a/src/random_tree_models/transform.py b/src/random_tree_models/transform.py
new file mode 100644
index 0000000..f3681a1
--- /dev/null
+++ b/src/random_tree_models/transform.py
@@ -0,0 +1,21 @@
+import numpy as np
+
+
+def bool_to_float(x: bool) -> float:
+    if x == True:
+        return 1.0
+    elif x == False:
+        return -1.0
+    else:
+        raise ValueError(f"{x=}, expected bool")
+
+
+def vectorize_bool_to_float(y: np.ndarray) -> np.ndarray:
+    f = np.vectorize(bool_to_float)
+    return f(y)
+
+
+def get_probabilities_from_mapped_bools(h: np.ndarray) -> np.ndarray:
+    p = 1 / (1 + np.exp(-2.0 * h))
+    p = np.array([1 - p, p]).T
+    return p
diff --git a/src/random_tree_models/utils.py b/src/random_tree_models/utils.py
index 390eec5..313a9a3 100644
--- a/src/random_tree_models/utils.py
+++ b/src/random_tree_models/utils.py
@@ -1,6 +1,5 @@
 import logging
 
-import numpy as np
 from rich.logging import RichHandler
 
 
@@ -20,17 +19,3 @@ def _get_logger(level=logging.INFO):
 
 
 logger = _get_logger()
-
-
-def bool_to_float(x: bool) -> float:
-    if x == True:
-        return 1.0
-    elif x == False:
-        return -1.0
-    else:
-        raise ValueError(f"{x=}, expected bool")
-
-
-def vectorize_bool_to_float(y: np.ndarray) -> np.ndarray:
-    f = np.vectorize(bool_to_float)
-    return f(y)
diff --git a/tests/models/test_gradientboostedtrees.py b/tests/models/test_gradientboostedtrees.py
index f9d74dd..3c3f761 100644
--- a/tests/models/test_gradientboostedtrees.py
+++ b/tests/models/test_gradientboostedtrees.py
@@ -1,5 +1,3 @@
-import math
-
 import numpy as np
 import pytest
 from sklearn.utils.estimator_checks import parametrize_with_checks
@@ -88,85 +86,3 @@ def test_gbt_estimators_with_sklearn_checks(estimator, check):
     Reference: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.parametrize_with_checks.html#sklearn.utils.estimator_checks.parametrize_with_checks
     """
     check(estimator)
-
-
-def test_get_pseudo_residual_mse():
-    y = np.array([1.0, 2.0, 3.0])
-    current_estimates = np.array([0.5, 1.0, 2.0])
-    expected_residuals = np.array([0.5, 1.0, 1.0])
-    actual_residuals = gbt.get_pseudo_residual_mse(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-    # Test with negative values
-    y = np.array([-1.0, -2.0, -3.0])
-    current_estimates = np.array([-0.5, -1.0, -2.0])
-    expected_residuals = np.array([-0.5, -1.0, -1.0])
-    actual_residuals = gbt.get_pseudo_residual_mse(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-    # Test with zero values
-    y = np.array([0.0, 0.0, 0.0])
-    current_estimates = np.array([0.0, 0.0, 0.0])
-    expected_residuals = np.array([0.0, 0.0, 0.0])
-    actual_residuals = gbt.get_pseudo_residual_mse(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-
-def test_get_pseudo_residual_log_odds():
-    # Test case 1: Basic test with positive and negative values
-    y = np.array([1, -1, 1, -1])
-    current_estimates = np.array([0.1, 0.2, -0.1, -0.2])
-    expected_residuals = 2 * y / (1 + np.exp(2 * y * current_estimates))
-    actual_residuals = gbt.get_pseudo_residual_log_odds(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-    # Test case 2: y close to zero
-    y = np.array([0.001, -0.001])
-    current_estimates = np.array([0.5, 0.5])
-    expected_residuals = 2 * y / (1 + np.exp(2 * y * current_estimates))
-    actual_residuals = gbt.get_pseudo_residual_log_odds(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-    # Test case 3: current_estimates close to zero
-    y = np.array([1, -1])
-    current_estimates = np.array([0.001, -0.001])
-    expected_residuals = 2 * y / (1 + np.exp(2 * y * current_estimates))
-    actual_residuals = gbt.get_pseudo_residual_log_odds(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-    # Test case 4: Larger current_estimates
-    y = np.array([1, -1])
-    current_estimates = np.array([2, -2])
-    expected_residuals = 2 * y / (1 + np.exp(2 * y * current_estimates))
-    actual_residuals = gbt.get_pseudo_residual_log_odds(y, current_estimates)
-    assert np.allclose(actual_residuals, expected_residuals)
-
-
-def test_get_start_estimate_log_odds():
-    # Test case 1: Balanced classes (mean close to 0)
-    y = np.array([1, -1, 1, -1])
-    actual_start_estimate = gbt.get_start_estimate_log_odds(y)
-    assert np.isclose(actual_start_estimate, 0.0)
-
-    # Test case 2: All positive class
-    y = np.array([1, 1, 1, 1])
-    actual_start_estimate = gbt.get_start_estimate_log_odds(y)
-    assert math.isinf(actual_start_estimate)
-
-    # Test case 3: All negative class
-    y = np.array([-1, -1, -1, -1])
-    actual_start_estimate = gbt.get_start_estimate_log_odds(y)
-    assert math.isinf(actual_start_estimate)
-
-    # Test case 4: Unbalanced classes
-    y = np.array([1, 1, 1, -1])
-    ym = np.mean(y)
-    actual_start_estimate = gbt.get_start_estimate_log_odds(y)
-    v = 0.5493061443340549
-    assert np.isclose(actual_start_estimate, v)
-
-    # Test case 5: Another set of unbalanced classes
-    y = np.array([1, -1, -1, -1])
-    ym = np.mean(y)
-    actual_start_estimate = gbt.get_start_estimate_log_odds(y)
-    assert np.isclose(actual_start_estimate, -v)
diff --git a/tests/models/test_xgboost.py b/tests/models/test_xgboost.py
index 83a84c1..a8aed26 100644
--- a/tests/models/test_xgboost.py
+++ b/tests/models/test_xgboost.py
@@ -86,90 +86,72 @@ def test_xgboost_estimators_with_sklearn_checks(estimator, check):
     check(estimator)
 
 
-@pytest.mark.parametrize(
-    "y_float, start_estimate_exp",
-    [
-        (np.array([-1.0, 1.0]), 0),
-        (np.array([-1.0, 1.0, 1.0, 1.0]), 0.5493061443340549),
-        (np.array([-1.0, -1.0, -1.0, 1.0]), -0.5493061443340549),
-        (np.array([True, True, False, False]), None),
-        (np.array([-2.0, -2.0, 2.0, 2.0]), None),
-    ],
-)
-def test_compute_start_estimate_binomial_loglikelihood(
-    y_float: np.ndarray, start_estimate_exp: float
-):
-    try:
-        # line to test
-        start_estimate = xgboost.compute_start_estimate_binomial_loglikelihood(y_float)
-    except ValueError as ex:
-        if start_estimate_exp is None:
-            pass  # expectedly failed for non -1 and 1 values
-        else:
-            raise ex
-    else:
-        if start_estimate_exp is None:
-            pytest.fail(f"unexpectedly passed for non -1 and 1 values")
-        assert np.isclose(start_estimate, start_estimate_exp)
-
-
-@pytest.mark.parametrize(
-    "y,start_estimate,g_exp",
-    [
-        (np.array([1.0]), 0.5, np.array([0.5])),
-        (np.array([1.0, 1.0]), 0.5, np.array([0.5, 0.5])),
-    ],
-)
-def test_compute_derivatives_negative_least_squares(
-    y: np.ndarray, start_estimate: float, g_exp: np.ndarray
-):
-    # line to test
-    g, h = xgboost.compute_derivatives_negative_least_squares(y, start_estimate)
-
-    assert g.shape == h.shape
-    assert np.allclose(g, g_exp)
-    assert np.allclose(h, -1)
-
-
-@pytest.mark.parametrize(
-    "y_float,start_estimate,g_exp,h_exp",
-    [
-        (
-            np.array([-1.0, 1.0]),
-            0.0,
-            np.array([-1.0, 1.0]),
-            np.array([-1.0, -1.0]),
-        ),
-        (
-            np.array([-1.0, -1.0, 1.0, 1.0]),
-            0.0,
-            np.array([-1.0, -1.0, 1.0, 1.0]),
-            np.array([-1.0, -1.0, -1.0, -1.0]),
-        ),
-        # failure cases
-        (np.array([False, True]), 0.0, None, None),
-        (np.array([-2.0, 2.0]), 0.0, None, None),
-    ],
-)
-def test_compute_derivatives_binomial_loglikelihood(
-    y_float: np.ndarray,
-    start_estimate: float,
-    g_exp: np.ndarray,
-    h_exp: np.ndarray,
-):
-    yhat = np.ones_like(y_float) * start_estimate
-    is_bad = g_exp is None and h_exp is None
-    try:
-        # line to test
-        g, h = xgboost.compute_derivatives_binomial_loglikelihood(y_float, yhat)
-    except ValueError as ex:
-        if is_bad:
-            pass  # Expectedly failed for incorrect y_float values"
-        else:
-            raise ex
-    else:
-        if is_bad:
-            pytest.fail("Unexpectedly passed for incorrect y_float values")
-        assert g.shape == h.shape
-        assert np.allclose(g, g_exp)
-        assert np.allclose(h, h_exp)
+# @pytest.mark.parametrize(
+#     "y_float, start_estimate_exp",
+#     [
+#         (np.array([-1.0, 1.0]), 0),
+#         (np.array([-1.0, 1.0, 1.0, 1.0]), 0.5493061443340549),
+#         (np.array([-1.0, -1.0, -1.0, 1.0]), -0.5493061443340549),
+#         (np.array([True, True, False, False]), None),
+#         (np.array([-2.0, -2.0, 2.0, 2.0]), None),
+#     ],
+# )
+# def test_compute_start_estimate_binomial_loglikelihood(
+#     y_float: np.ndarray, start_estimate_exp: float
+# ):
+#     try:
+#         # line to test
+#         start_estimate = xgboost.compute_start_estimate_binomial_loglikelihood(y_float)
+#     except ValueError as ex:
+#         if start_estimate_exp is None:
+#             pass  # expectedly failed for non -1 and 1 values
+#         else:
+#             raise ex
+#     else:
+#         if start_estimate_exp is None:
+#             pytest.fail(f"unexpectedly passed for non -1 and 1 values")
+#         assert np.isclose(start_estimate, start_estimate_exp)
+
+
+# @pytest.mark.parametrize(
+#     "y_float,start_estimate,g_exp,h_exp",
+#     [
+#         (
+#             np.array([-1.0, 1.0]),
+#             0.0,
+#             np.array([-1.0, 1.0]),
+#             np.array([-1.0, -1.0]),
+#         ),
+#         (
+#             np.array([-1.0, -1.0, 1.0, 1.0]),
+#             0.0,
+#             np.array([-1.0, -1.0, 1.0, 1.0]),
+#             np.array([-1.0, -1.0, -1.0, -1.0]),
+#         ),
+#         # failure cases
+#         (np.array([False, True]), 0.0, None, None),
+#         (np.array([-2.0, 2.0]), 0.0, None, None),
+#     ],
+# )
+# def test_compute_derivatives_binomial_loglikelihood(
+#     y_float: np.ndarray,
+#     start_estimate: float,
+#     g_exp: np.ndarray,
+#     h_exp: np.ndarray,
+# ):
+#     yhat = np.ones_like(y_float) * start_estimate
+#     is_bad = g_exp is None and h_exp is None
+#     try:
+#         # line to test
+#         g, h = xgboost.compute_derivatives_binomial_loglikelihood(y_float, yhat)
+#     except ValueError as ex:
+#         if is_bad:
+#             pass  # Expectedly failed for incorrect y_float values"
+#         else:
+#             raise ex
+#     else:
+#         if is_bad:
+#             pytest.fail("Unexpectedly passed for incorrect y_float values")
+#         assert g.shape == h.shape
+#         assert np.allclose(g, g_exp)
+#         assert np.allclose(h, h_exp)
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
new file mode 100644
index 0000000..c45c359
--- /dev/null
+++ b/tests/test_gradient.py
@@ -0,0 +1,147 @@
+import math
+
+import numpy as np
+import pytest
+
+from random_tree_models.gradient import (
+    check_y_float,
+    get_pseudo_residual_log_odds,
+    get_pseudo_residual_mse,
+    get_start_estimate_log_odds,
+)
+
+
+def test_check_y_float():
+    # Test case 1: Valid input with only -1 and 1
+    y_float = np.array([-1, 1, -1, 1])
+    check_y_float(y_float)  # Should not raise an error
+
+    # Test case 2: Valid input with only 1
+    y_float = np.array([1, 1, 1, 1])
+    check_y_float(y_float)  # Should not raise an error
+
+    # Test case 3: Valid input with only -1
+    y_float = np.array([-1, -1, -1, -1])
+    check_y_float(y_float)  # Should not raise an error
+
+    # Test case 4: Invalid input with 0
+    y_float = np.array([-1, 1, 0, 1])
+    with pytest.raises(ValueError):
+        check_y_float(y_float)
+
+    # Test case 5: Invalid input with values other than -1 and 1
+    y_float = np.array([-1, 1, -2, 2])
+    with pytest.raises(ValueError):
+        check_y_float(y_float)
+
+    # Test case 6: Invalid input with mixed values
+    y_float = np.array([-1, 1, 0.5, -0.5])
+    with pytest.raises(ValueError):
+        check_y_float(y_float)
+
+
+def test_get_start_estimate_log_odds():
+    # Test case 1: Balanced classes (mean close to 0)
+    y = np.array([1, -1, 1, -1])
+    actual_start_estimate = get_start_estimate_log_odds(y)
+    assert np.isclose(actual_start_estimate, 0.0)
+
+    # Test case 2: All positive class
+    y = np.array([1, 1, 1, 1])
+    actual_start_estimate = get_start_estimate_log_odds(y)
+    assert math.isinf(actual_start_estimate)
+
+    # Test case 3: All negative class
+    y = np.array([-1, -1, -1, -1])
+    actual_start_estimate = get_start_estimate_log_odds(y)
+    assert math.isinf(actual_start_estimate)
+
+    # Test case 4: Unbalanced classes
+    y = np.array([1, 1, 1, -1])
+    actual_start_estimate = get_start_estimate_log_odds(y)
+    v = 0.5493061443340549
+    assert np.isclose(actual_start_estimate, v)
+
+    # Test case 5: Another set of unbalanced classes
+    y = np.array([1, -1, -1, -1])
+    actual_start_estimate = get_start_estimate_log_odds(y)
+    assert np.isclose(actual_start_estimate, -v)
+
+
+def test_get_pseudo_residual_log_odds():
+    # Test case 1: Basic test with positive and negative values
+    y = np.array([1, -1, 1, -1])
+    current_estimates = np.array([0.1, 0.2, -0.1, -0.2])
+    expected_residuals_1st = np.array(
+        [0.90033201, -1.19737532, 1.09966799, -0.80262468]
+    )
+    expected_residuals_2nd = np.array(
+        [-0.99006629, -0.96104298, -0.99006629, -0.96104298]
+    )
+    actual_residuals_1st, actual_residuals_2nd = get_pseudo_residual_log_odds(
+        y, current_estimates, True
+    )
+    assert actual_residuals_2nd is not None
+    assert np.allclose(actual_residuals_1st, expected_residuals_1st)
+    assert np.allclose(actual_residuals_2nd, expected_residuals_2nd)
+
+    # Test case 2: current_estimates close to zero
+    y = np.array([1, -1])
+    current_estimates = np.array([0.001, -0.001])
+    expected_residuals_1st = np.array([0.999, -0.999])
+    expected_residuals_2nd = np.array([-0.999999, -0.999999])
+    actual_residuals_1st, actual_residuals_2nd = get_pseudo_residual_log_odds(
+        y, current_estimates, True
+    )
+    assert actual_residuals_2nd is not None
+    assert np.allclose(actual_residuals_1st, expected_residuals_1st)
+    assert np.allclose(actual_residuals_2nd, expected_residuals_2nd)
+
+    # Test case 3: Larger current_estimates
+    y = np.array([1, -1])
+    current_estimates = np.array([2, -2])
+    expected_residuals_1st = np.array([0.03597242, -0.03597242])
+    expected_residuals_2nd = np.array([-0.07065082, -0.07065082])
+    actual_residuals_1st, actual_residuals_2nd = get_pseudo_residual_log_odds(
+        y, current_estimates, True
+    )
+    assert actual_residuals_2nd is not None
+    assert np.allclose(actual_residuals_1st, expected_residuals_1st)
+    assert np.allclose(actual_residuals_2nd, expected_residuals_2nd)
+
+
+def test_get_pseudo_residual_mse():
+    y = np.array([1.0, 2.0, 3.0])
+    current_estimates = np.array([0.5, 1.0, 2.0])
+    expected_residuals_1st = np.array([0.5, 1.0, 1.0])
+    expected_residuals_2nd = np.array([-1.0, -1.0, -1.0])
+    actual_residuals_1st, actual_residuals_2nd = get_pseudo_residual_mse(
+        y, current_estimates, True
+    )
+    assert actual_residuals_2nd is not None
+    assert np.allclose(actual_residuals_1st, expected_residuals_1st)
+    assert np.allclose(actual_residuals_2nd, expected_residuals_2nd)
+
+    # Test with negative values
+    y = np.array([-1.0, -2.0, -3.0])
+    current_estimates = np.array([-0.5, -1.0, -2.0])
+    expected_residuals_1st = np.array([-0.5, -1.0, -1.0])
+    expected_residuals_2nd = np.array([-1.0, -1.0, -1.0])
+    actual_residuals_1st, actual_residuals_2nd = get_pseudo_residual_mse(
+        y, current_estimates, True
+    )
+    assert actual_residuals_2nd is not None
+    assert np.allclose(actual_residuals_1st, expected_residuals_1st)
+    assert np.allclose(actual_residuals_2nd, expected_residuals_2nd)
+
+    # Test with zero values
+    y = np.array([0.0, 0.0, 0.0])
+    current_estimates = np.array([0.0, 0.0, 0.0])
+    expected_residuals_1st = np.array([0.0, 0.0, 0.0])
+    expected_residuals_2nd = np.array([-1.0, -1.0, -1.0])
+    actual_residuals_1st, actual_residuals_2nd = get_pseudo_residual_mse(
+        y, current_estimates, True
+    )
+    assert actual_residuals_2nd is not None
+    assert np.allclose(actual_residuals_1st, expected_residuals_1st)
+    assert np.allclose(actual_residuals_2nd, expected_residuals_2nd)
diff --git a/tests/test_transform.py b/tests/test_transform.py
new file mode 100644
index 0000000..42cefd2
--- /dev/null
+++ b/tests/test_transform.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pytest
+
+from random_tree_models.transform import (
+    bool_to_float,
+    get_probabilities_from_mapped_bools,
+    vectorize_bool_to_float,
+)
+
+
+def test_vectorize_bool_to_float():
+    y = np.array([True, False, True, False])
+    res = vectorize_bool_to_float(y)
+    assert np.all(res == np.array([1.0, -1.0, 1.0, -1.0]))
+
+    y = np.array([True, False, True, True])
+    res = vectorize_bool_to_float(y)
+    assert np.all(res == np.array([1.0, -1.0, 1.0, 1.0]))
+
+    y = np.array([False, False, True, False])
+    res = vectorize_bool_to_float(y)
+    assert np.all(res == np.array([-1.0, -1.0, 1.0, -1.0]))
+
+
+@pytest.mark.parametrize(
+    "x,exp,is_bad",
+    [
+        (True, 1, False),
+        (False, -1, False),
+        ("a", None, True),
+        (1, 1, False),
+        (0, -1, False),
+        (-1, None, True),
+        (None, None, True),
+    ],
+)
+def test_bool_to_float(x, exp, is_bad: bool):
+    try:
+        # line to test
+        res = bool_to_float(x)
+    except ValueError as ex:
+        if is_bad:
+            pass  # Failed expectedly to convert non-bool values
+    else:
+        if is_bad:
+            pytest.fail(f"Passed unexpectedly for non-bool value {x} returning {res}")
+        assert res == exp
+
+
+def test_get_probabilities_from_mapped_bools():
+    h = np.array([0.0, 1.0, -1.0])
+    actual = get_probabilities_from_mapped_bools(h)
+    expected = np.array(
+        [[0.5, 0.5], [0.11920292, 0.88079708], [0.88079708, 0.11920292]]
+    )
+    assert np.allclose(actual, expected)
+
+    h = np.array([0.5, -0.5, 0.2])
+    actual = get_probabilities_from_mapped_bools(h)
+    expected = np.array(
+        [[0.26894142, 0.73105858], [0.73105858, 0.26894142], [0.40131234, 0.59868766]]
+    )
+    assert np.allclose(actual, expected)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2a11693..02c7e22 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,9 +1,5 @@
 import logging
 
-import numpy as np
-import pytest
-
-import random_tree_models.utils
 import random_tree_models.utils as utils
 
 
@@ -12,42 +8,3 @@ def test_get_logger():
     assert isinstance(logger, logging.Logger)
     assert logger.name == "rich"
     assert logger.level == logging.INFO
-
-
-@pytest.mark.parametrize(
-    "x,exp,is_bad",
-    [
-        (True, 1, False),
-        (False, -1, False),
-        ("a", None, True),
-        (1, 1, False),
-        (0, -1, False),
-        (-1, None, True),
-        (None, None, True),
-    ],
-)
-def test_bool_to_float(x, exp, is_bad: bool):
-    try:
-        # line to test
-        res = random_tree_models.utils.bool_to_float(x)
-    except ValueError as ex:
-        if is_bad:
-            pass  # Failed expectedly to convert non-bool values
-    else:
-        if is_bad:
-            pytest.fail(f"Passed unexpectedly for non-bool value {x} returning {res}")
-        assert res == exp
-
-
-def test_vectorize_bool_to_float():
-    y = np.array([True, False, True, False])
-    res = utils.vectorize_bool_to_float(y)
-    assert np.all(res == np.array([1.0, -1.0, 1.0, -1.0]))
-
-    y = np.array([True, False, True, True])
-    res = utils.vectorize_bool_to_float(y)
-    assert np.all(res == np.array([1.0, -1.0, 1.0, 1.0]))
-
-    y = np.array([False, False, True, False])
-    res = utils.vectorize_bool_to_float(y)
-    assert np.all(res == np.array([-1.0, -1.0, 1.0, -1.0]))