From 10d88171ed02ac6edd0e02d253578290e0562661 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 19:27:23 +0200
Subject: [PATCH 01/10] License update

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index b1701892..2c195b47 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,7 @@
+[project]
+license = "BSD-3-Clause"
+license-files = ["LICENSE"]
+
 [build-system]
 requires = ["setuptools", "Cython", "numpy", "wheel"]
 

From b1f2fb8988b4c646483ecb22d3d3b9a00cb7e687 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 20:04:49 +0200
Subject: [PATCH 02/10] Seems to be a bug in cython 3.1.1

This bug makes it such that we don't build
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 1d98842e..efc2e9e4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 numpy>=1.25.0
 matplotlib>=3.7.1
-cython>=3.0.0
\ No newline at end of file
+cython==3.1.0

From 53d266dc8332b271144514753741c80fe8943172 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 20:19:45 +0200
Subject: [PATCH 03/10] Fix bug 1

---
 src/adaXT/decision_tree/_decision_tree.pyx | 8 +++++---
 src/adaXT/predictor/predictor.pxd          | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx
index 508fb81b..37287c19 100644
--- a/src/adaXT/decision_tree/_decision_tree.pyx
+++ b/src/adaXT/decision_tree/_decision_tree.pyx
@@ -44,9 +44,7 @@ cdef class refit_object(Node):
     def add_idx(self, idx: int) -> None:
         self.list_idx.append(idx)
 
-
-@cython.auto_pickle(True)
-cdef class _DecisionTree():
+cdef class _DecisionTree:
     cdef public:
         object criteria
         object splitter
@@ -343,6 +341,10 @@ cdef class _DecisionTree():
         # Now squash all the DecisionNodes not visited
         self.__squash_tree()
 
+        # Make sure that predictor_instance points to the same root, if we have
+        # changed it
+        self.predictor_instance.root = self.root
+
 
 # From below here, it is the DepthTreeBuilder
 class queue_obj:
diff --git a/src/adaXT/predictor/predictor.pxd b/src/adaXT/predictor/predictor.pxd
index 77c18433..82300fec 100644
--- a/src/adaXT/predictor/predictor.pxd
+++ b/src/adaXT/predictor/predictor.pxd
@@ -7,7 +7,7 @@ cdef class Predictor():
         cnp.ndarray X
         cnp.ndarray Y
         int n_features
-        Node root
+    cdef public Node root
 
     cpdef dict predict_leaf(self, double[:, ::1] X)
 

From 9cd34b224dfdae0f17f15b45694d3c4a28440888 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 20:20:58 +0200
Subject: [PATCH 04/10] Formatting random forest

---
 src/adaXT/random_forest/random_forest.py | 37 +++++++++---------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py
index d0483e3b..ba5c8edd 100644
--- a/src/adaXT/random_forest/random_forest.py
+++ b/src/adaXT/random_forest/random_forest.py
@@ -64,8 +64,7 @@ def get_sample_indices(
             resample_size0 = sampling_args["size"]
             resample_size1 = sampling_args["size"]
         else:
-            resample_size0 = np.min(
-                [sampling_args["split"], sampling_args["size"]])
+            resample_size0 = np.min([sampling_args["split"], sampling_args["size"]])
             resample_size1 = np.min(
                 [X_n_rows - sampling_args["split"], sampling_args["size"]]
             )
@@ -75,7 +74,7 @@ def get_sample_indices(
             replace=sampling_args["replace"],
         )
         pred_indices = gen.choice(
-            indices[sampling_args["split"]:],
+            indices[sampling_args["split"] :],
             size=resample_size1,
             replace=sampling_args["replace"],
         )
@@ -86,8 +85,7 @@ def get_sample_indices(
             resample_size0 = sampling_args["size"]
             resample_size1 = sampling_args["size"]
         else:
-            resample_size0 = np.min(
-                [sampling_args["split"], sampling_args["size"]])
+            resample_size0 = np.min([sampling_args["split"], sampling_args["size"]])
             resample_size1 = np.min(
                 [X_n_rows - sampling_args["split"], sampling_args["size"]]
             )
@@ -97,7 +95,7 @@ def get_sample_indices(
             replace=sampling_args["replace"],
         )
         pred_indices = gen.choice(
-            indices[sampling_args["split"]:],
+            indices[sampling_args["split"] :],
             size=resample_size1,
             replace=sampling_args["replace"],
         )
@@ -153,18 +151,11 @@ def build_single_tree(
         predictor=predictor,
         splitter=splitter,
     )
-    tree.fit(
-        X=X,
-        Y=Y,
-        sample_indices=fitting_indices,
-        sample_weight=sample_weight)
+    tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight)
     if honest_tree:
         tree.refit_leaf_nodes(
-            X=X,
-            Y=Y,
-            sample_weight=sample_weight,
-            sample_indices=prediction_indices)
-
+            X=X, Y=Y, sample_weight=sample_weight, sample_indices=prediction_indices
+        )
     return tree
 
 
@@ -349,8 +340,7 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict:
             if "size" not in sampling_args:
                 sampling_args["size"] = self.X_n_rows
             elif isinstance(sampling_args["size"], float):
-                sampling_args["size"] = int(
-                    sampling_args["size"] * self.X_n_rows)
+                sampling_args["size"] = int(sampling_args["size"] * self.X_n_rows)
             elif not isinstance(sampling_args["size"], int):
                 raise ValueError(
                     "The provided sampling_args['size'] is not an integer or float as required."
@@ -421,7 +411,8 @@ def __build_trees(self) -> None:
             sampling=self.sampling,
         )
         self.fitting_indices, self.prediction_indices, self.out_of_bag_indices = zip(
-            *indices)
+            *indices
+        )
         self.trees = self.parallel.starmap(
             build_single_tree,
             map_input=zip(self.fitting_indices, self.prediction_indices),
@@ -444,8 +435,9 @@ def __build_trees(self) -> None:
             n_jobs=self.n_jobs_fit,
         )
 
-    def fit(self, X: ArrayLike, Y: ArrayLike,
-            sample_weight: ArrayLike | None = None) -> None:
+    def fit(
+        self, X: ArrayLike, Y: ArrayLike, sample_weight: ArrayLike | None = None
+    ) -> None:
         """
         Fit the random forest with training data (X, Y).
 
@@ -477,8 +469,7 @@ def fit(self, X: ArrayLike, Y: ArrayLike,
         self.X = shared_numpy_array(X)
         self.Y = shared_numpy_array(Y)
         self.X_n_rows, self.n_features = self.X.shape
-        self.max_features = self._check_max_features(
-            self.max_features, X.shape[0])
+        self.max_features = self._check_max_features(self.max_features, X.shape[0])
         self.sample_weight = self._check_sample_weight(sample_weight)
         self.sampling_args = self.__get_sampling_parameter(self.sampling_args)
 

From fe0935b3ffefed4445d7e47d34ed7fc4987be516 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 20:21:07 +0200
Subject: [PATCH 05/10] License fix

---
 pyproject.toml | 4 ----
 setup.py       | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2c195b47..b1701892 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,3 @@
-[project]
-license = "BSD-3-Clause"
-license-files = ["LICENSE"]
-
 [build-system]
 requires = ["setuptools", "Cython", "numpy", "wheel"]
 
diff --git a/setup.py b/setup.py
index 3358c711..f6a0931d 100644
--- a/setup.py
+++ b/setup.py
@@ -140,6 +140,7 @@ def run_build():
         extensions = cythonize(extensions, **arg_dir)
     setup(
         name=NAME,
+        license="BSD-3-clause",
         version=VERSION,
         description=DESCRIPTION,
         long_description=LONG_DESCRIPTION,
@@ -160,7 +161,6 @@ def run_build():
         classifiers=[
             "Programming Language :: Python :: 3",
             "Intended Audience :: Science/Research",
-            "License :: OSI Approved :: BSD License",
             "Operating System :: OS Independent",
         ],
         extras_require=extras,

From 9d44b72fecc3c0acb9e70d20b6f01784323dc4fe Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 20:36:43 +0200
Subject: [PATCH 06/10] Fix bug 2

---
 src/adaXT/random_forest/random_forest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py
index ba5c8edd..1b564e44 100644
--- a/src/adaXT/random_forest/random_forest.py
+++ b/src/adaXT/random_forest/random_forest.py
@@ -356,11 +356,11 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict:
                 sampling_args["split"] = np.min(
                     [int(0.5 * self.X_n_rows), self.X_n_rows - 1]
                 )
-            elif isinstance(sampling_args["size"], float):
+            elif isinstance(sampling_args["split"], float):
                 sampling_args["split"] = np.min(
                     [int(sampling_args["split"] * self.X_n_rows), self.X_n_rows - 1]
                 )
-            elif not isinstance(sampling_args["size"], int):
+            elif not isinstance(sampling_args["split"], (int, np.integer)):
                 raise ValueError(
                     "The provided sampling_args['split'] is not an integer or float as required."
                 )
@@ -370,7 +370,7 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict:
                 sampling_args["size"] = int(
                     sampling_args["size"] * sampling_args["split"]
                 )
-            elif not isinstance(sampling_args["size"], int):
+            elif not isinstance(sampling_args["size"], (np.integer, int)):
                 raise ValueError(
                     "The provided sampling_args['size'] is not an integer or float as required."
                 )

From 7154f074c075c1dd9790d0f3d14f032d24ccf134 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 20:45:28 +0200
Subject: [PATCH 07/10] Fix issue bug 3

---
 src/adaXT/decision_tree/_decision_tree.pyx | 6 +++---
 src/adaXT/random_forest/random_forest.py   | 7 +++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx
index 37287c19..dadc42af 100644
--- a/src/adaXT/decision_tree/_decision_tree.pyx
+++ b/src/adaXT/decision_tree/_decision_tree.pyx
@@ -3,7 +3,7 @@ import sys
 
 cimport numpy as cnp
 ctypedef cnp.float64_t DOUBLE_t
-ctypedef cnp.int64_t LONG_t
+ctypedef cnp.int32_t INT32_T
 from libcpp cimport bool
 
 
@@ -178,7 +178,7 @@ cdef class _DecisionTree:
     cdef void __fit_new_leaf_nodes(self, cnp.ndarray[DOUBLE_t, ndim=2] X,
                                    cnp.ndarray[DOUBLE_t, ndim=2] Y,
                                    cnp.ndarray[DOUBLE_t, ndim=1] sample_weight,
-                                   cnp.ndarray[LONG_t, ndim=1] sample_indices):
+                                   cnp.ndarray[INT32_T, ndim=1] sample_indices):
         cdef:
             int idx, n_objs, depth, cur_split_idx
             double cur_threshold
@@ -326,7 +326,7 @@ cdef class _DecisionTree:
                          cnp.ndarray[DOUBLE_t, ndim=2] X,
                          cnp.ndarray[DOUBLE_t, ndim=2] Y,
                          cnp.ndarray[DOUBLE_t, ndim=1] sample_weight,
-                         cnp.ndarray[LONG_t, ndim=1] sample_indices) -> None:
+                         cnp.ndarray[INT32_T, ndim=1] sample_indices) -> None:
 
         if self.root is None:
             raise ValueError("The tree has not been trained before trying to\
diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py
index 1b564e44..185ad4f7 100644
--- a/src/adaXT/random_forest/random_forest.py
+++ b/src/adaXT/random_forest/random_forest.py
@@ -48,17 +48,17 @@ def get_sample_indices(
     Assumes there has been a previous call to self.__get_sample_indices on the
     RandomForest.
     """
+    indices = np.arange(0, X_n_rows, dtype=np.int32)
     if sampling == "resampling":
         ret = (
             gen.choice(
-                np.arange(0, X_n_rows),
+                indices,
                 size=sampling_args["size"],
                 replace=sampling_args["replace"],
             ),
             None,
         )
     elif sampling == "honest_tree":
-        indices = np.arange(0, X_n_rows)
         gen.shuffle(indices)
         if sampling_args["replace"]:
             resample_size0 = sampling_args["size"]
@@ -80,7 +80,6 @@ def get_sample_indices(
         )
         ret = (fit_indices, pred_indices)
     elif sampling == "honest_forest":
-        indices = np.arange(0, X_n_rows)
         if sampling_args["replace"]:
             resample_size0 = sampling_args["size"]
             resample_size1 = sampling_args["size"]
@@ -101,7 +100,7 @@ def get_sample_indices(
         )
         ret = (fit_indices, pred_indices)
     else:
-        ret = (np.arange(0, X_n_rows), None)
+        ret = (indices, None)
 
     if sampling_args["OOB"]:
         # Only fitting indices

From a00920d7c03388cb0367fc47911c442fd13b0557 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Mon, 26 May 2025 21:07:07 +0200
Subject: [PATCH 08/10] Fixed linting and set build_system to use cython 3.1.0
 not 3.1.1

---
 pyproject.toml                             | 2 +-
 src/adaXT/decision_tree/_decision_tree.pyx | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b1701892..51c7fe31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "Cython", "numpy", "wheel"]
+requires = ["setuptools", "Cython==3.1.0", "numpy", "wheel"]
 
 [tool.cython-lint]
 max-line-length = 127
diff --git a/src/adaXT/decision_tree/_decision_tree.pyx b/src/adaXT/decision_tree/_decision_tree.pyx
index dadc42af..cf86e3e4 100644
--- a/src/adaXT/decision_tree/_decision_tree.pyx
+++ b/src/adaXT/decision_tree/_decision_tree.pyx
@@ -16,7 +16,6 @@ from .nodes import DecisionNode
 
 # for c level definitions
 
-cimport cython
 from .nodes cimport DecisionNode, Node
 
 from ..utils cimport dsum
@@ -27,7 +26,7 @@ cdef double EPSILON = np.finfo('double').eps
 cdef class refit_object(Node):
     cdef public:
         list list_idx
-        bint is_left
+        bool is_left
 
     def __init__(
             self,

From 14449f09266f595cd9f16052d9389102de886cd0 Mon Sep 17 00:00:00 2001
From: Simon Vinding Brodersen <simon@3450.dk>
Date: Wed, 28 May 2025 08:17:02 +0200
Subject: [PATCH 09/10] Version update

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f6a0931d..9fc04048 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 import os
 
 NAME = "adaXT"
-VERSION = "1.5.0"
+VERSION = "1.5.1"
 DESCRIPTION = "A Python package for tree-based regression and classification"
 PROJECT_URLS = {
     "Documentation": "https://NiklasPfister.github.io/adaXT/",

From 30ae70e7f55b85b9cf7912efc3c48eee366bd719 Mon Sep 17 00:00:00 2001
From: Niklas Andreas Pfister <np@math.ku.dk>
Date: Wed, 28 May 2025 06:19:01 +0000
Subject: [PATCH 10/10] Automated autopep8 fixes

---
 src/adaXT/random_forest/random_forest.py | 36 +++++++++++++++---------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/adaXT/random_forest/random_forest.py b/src/adaXT/random_forest/random_forest.py
index 185ad4f7..3c40fb6f 100644
--- a/src/adaXT/random_forest/random_forest.py
+++ b/src/adaXT/random_forest/random_forest.py
@@ -64,7 +64,8 @@ def get_sample_indices(
             resample_size0 = sampling_args["size"]
             resample_size1 = sampling_args["size"]
         else:
-            resample_size0 = np.min([sampling_args["split"], sampling_args["size"]])
+            resample_size0 = np.min(
+                [sampling_args["split"], sampling_args["size"]])
             resample_size1 = np.min(
                 [X_n_rows - sampling_args["split"], sampling_args["size"]]
             )
@@ -74,7 +75,7 @@ def get_sample_indices(
             replace=sampling_args["replace"],
         )
         pred_indices = gen.choice(
-            indices[sampling_args["split"] :],
+            indices[sampling_args["split"]:],
             size=resample_size1,
             replace=sampling_args["replace"],
         )
@@ -84,7 +85,8 @@ def get_sample_indices(
             resample_size0 = sampling_args["size"]
             resample_size1 = sampling_args["size"]
         else:
-            resample_size0 = np.min([sampling_args["split"], sampling_args["size"]])
+            resample_size0 = np.min(
+                [sampling_args["split"], sampling_args["size"]])
             resample_size1 = np.min(
                 [X_n_rows - sampling_args["split"], sampling_args["size"]]
             )
@@ -94,7 +96,7 @@ def get_sample_indices(
             replace=sampling_args["replace"],
         )
         pred_indices = gen.choice(
-            indices[sampling_args["split"] :],
+            indices[sampling_args["split"]:],
             size=resample_size1,
             replace=sampling_args["replace"],
         )
@@ -150,11 +152,17 @@ def build_single_tree(
         predictor=predictor,
         splitter=splitter,
     )
-    tree.fit(X=X, Y=Y, sample_indices=fitting_indices, sample_weight=sample_weight)
+    tree.fit(
+        X=X,
+        Y=Y,
+        sample_indices=fitting_indices,
+        sample_weight=sample_weight)
     if honest_tree:
         tree.refit_leaf_nodes(
-            X=X, Y=Y, sample_weight=sample_weight, sample_indices=prediction_indices
-        )
+            X=X,
+            Y=Y,
+            sample_weight=sample_weight,
+            sample_indices=prediction_indices)
     return tree
 
 
@@ -339,7 +347,8 @@ def __get_sampling_parameter(self, sampling_args: dict | None) -> dict:
             if "size" not in sampling_args:
                 sampling_args["size"] = self.X_n_rows
             elif isinstance(sampling_args["size"], float):
-                sampling_args["size"] = int(sampling_args["size"] * self.X_n_rows)
+                sampling_args["size"] = int(
+                    sampling_args["size"] * self.X_n_rows)
             elif not isinstance(sampling_args["size"], int):
                 raise ValueError(
                     "The provided sampling_args['size'] is not an integer or float as required."
@@ -410,8 +419,7 @@ def __build_trees(self) -> None:
             sampling=self.sampling,
         )
         self.fitting_indices, self.prediction_indices, self.out_of_bag_indices = zip(
-            *indices
-        )
+            *indices)
         self.trees = self.parallel.starmap(
             build_single_tree,
             map_input=zip(self.fitting_indices, self.prediction_indices),
@@ -434,9 +442,8 @@ def __build_trees(self) -> None:
             n_jobs=self.n_jobs_fit,
         )
 
-    def fit(
-        self, X: ArrayLike, Y: ArrayLike, sample_weight: ArrayLike | None = None
-    ) -> None:
+    def fit(self, X: ArrayLike, Y: ArrayLike,
+            sample_weight: ArrayLike | None = None) -> None:
         """
         Fit the random forest with training data (X, Y).
 
@@ -468,7 +475,8 @@ def fit(
         self.X = shared_numpy_array(X)
         self.Y = shared_numpy_array(Y)
         self.X_n_rows, self.n_features = self.X.shape
-        self.max_features = self._check_max_features(self.max_features, X.shape[0])
+        self.max_features = self._check_max_features(
+            self.max_features, X.shape[0])
         self.sample_weight = self._check_sample_weight(sample_weight)
         self.sampling_args = self.__get_sampling_parameter(self.sampling_args)