diff --git a/cpp/bench/sg/fil.cu b/cpp/bench/sg/fil.cu
index 7c9f0b6f7d..4444dd95ee 100644
--- a/cpp/bench/sg/fil.cu
+++ b/cpp/bench/sg/fil.cu
@@ -78,7 +78,7 @@ class FIL : public RegressionFixture<float> {
     ML::RandomForestRegressorF rf_model;
     auto* mPtr         = &rf_model;
     size_t train_nrows = std::min(params.nrows, 1000);
-    fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf);
+    fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf, nullptr);
     handle->sync_stream(stream);
 
     ML::build_treelite_forest(&model, &rf_model, params.ncols);
@@ -166,8 +166,7 @@ std::vector<Params> getInputs()
                        8,                  /* n_streams */
                        128                 /* max_batch_size */,
                        0,                  /* minTreesPerGroupFold */
-                       0,                  /* foldGroupSize */
-                       -1                  /* group_col_idx */
+                       0                   /* foldGroupSize */
   );
 
   using ML::fil::algo_t;
diff --git a/cpp/bench/sg/filex.cu b/cpp/bench/sg/filex.cu
index ffc0d2b157..b64b35b7af 100644
--- a/cpp/bench/sg/filex.cu
+++ b/cpp/bench/sg/filex.cu
@@ -61,7 +61,7 @@ class FILEX : public RegressionFixture<float> {
     ML::RandomForestRegressorF rf_model;
     auto* mPtr       = &rf_model;
     auto train_nrows = std::min(params.nrows, 1000);
-    fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf);
+    fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf, nullptr);
     handle->sync_stream(stream);
 
     ML::build_treelite_forest(&model, &rf_model, params.ncols);
@@ -266,8 +266,7 @@ std::vector<Params> getInputs()
                        8,                  /* n_streams */
                        128,                /* max_batch_size */
                        0,                  /* minTreesPerGroupFold */
-                       0,                  /* foldGroupSize */
-                       -1                  /* group_col_idx */
+                       0                  /* foldGroupSize */
   );
 
   using ML::fil::algo_t;
diff --git a/cpp/bench/sg/rf_classifier.cu b/cpp/bench/sg/rf_classifier.cu
index f0936ee31e..8d466b4bb7 100644
--- a/cpp/bench/sg/rf_classifier.cu
+++ b/cpp/bench/sg/rf_classifier.cu
@@ -66,7 +66,8 @@ class RFClassifier : public BlobsFixture<D> {
           this->params.ncols,
           this->data.y.data(),
           this->params.nclasses,
-          rfParams);
+          rfParams,
+          nullptr /* groups */);
       this->handle->sync_stream(this->stream);
     });
   }
@@ -110,8 +111,7 @@ std::vector<Params> getInputs()
                        8,                   /* n_streams */
                        128,                 /* max_batch_size */
                        0,                   /* minTreesPerGroupFold */
-                       0,                   /* foldGroupSize */
-                       -1                   /* group_col_idx */
+                       0                    /* foldGroupSize */
   );
 
   std::vector<Triplets> rowcols = {
diff --git a/cpp/include/cuml/ensemble/randomforest.hpp b/cpp/include/cuml/ensemble/randomforest.hpp
index d978339891..e3bf4caff8 100644
--- a/cpp/include/cuml/ensemble/randomforest.hpp
+++ b/cpp/include/cuml/ensemble/randomforest.hpp
@@ -136,12 +136,6 @@ struct RF_params {
    * entire fold of groups left out.
    */
   int foldGroupSize;
-
-  /**
-   * group_col_idx
-   * The numeric index of the column to be used for group processing
-   */
-  int group_col_idx;
   
   /**
    * Decision tree training hyper parameter struct.
@@ -196,6 +190,12 @@ void build_treelite_forest(ModelHandle* model,
                            const RandomForestMetaData<T, L>* forest,
                            int num_features);
 
+template <class T, class L>
+int get_tree_row_meta_info(
+    int ix_tree,
+    int ix_sample,
+    const RandomForestMetaData<T,L>* forest);
+
 ModelHandle concatenate_trees(std::vector<ModelHandle> treelite_handles);
 
 void compare_concat_forest_to_subforests(ModelHandle concat_tree_handle,
@@ -213,6 +213,7 @@ void fit(const raft::handle_t& user_handle,
          int* labels,
          int n_unique_labels,
          RF_params rf_params,
+         int* groups = nullptr,
          int verbosity = CUML_LEVEL_INFO);
 void fit(const raft::handle_t& user_handle,
          RandomForestClassifierD*& forest,
@@ -222,6 +223,7 @@ void fit(const raft::handle_t& user_handle,
          int* labels,
          int n_unique_labels,
          RF_params rf_params,
+         int* groups = nullptr,
          int verbosity = CUML_LEVEL_INFO);
 
 void predict(const raft::handle_t& user_handle,
@@ -271,8 +273,7 @@ RF_params set_rf_params(int max_depth,
                         int cfg_n_streams,
                         int max_batch_size,
                         int minTreesPerGroupFold,
-                        int foldGroupSize,
-                        int group_col_idx);
+                        int foldGroupSize);
 
 // ----------------------------- Regression ----------------------------------- //
 
@@ -286,7 +287,9 @@ void fit(const raft::handle_t& user_handle,
          int n_cols,
          float* labels,
          RF_params rf_params,
+         int* groups = nullptr,
          int verbosity = CUML_LEVEL_INFO);
+
 void fit(const raft::handle_t& user_handle,
          RandomForestRegressorD*& forest,
          double* input,
@@ -294,6 +297,7 @@ void fit(const raft::handle_t& user_handle,
          int n_cols,
          double* labels,
          RF_params rf_params,
+         int* groups = nullptr,
          int verbosity = CUML_LEVEL_INFO);
 
 void predict(const raft::handle_t& user_handle,
diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp
index cfedb1e057..f710a54bde 100644
--- a/cpp/include/cuml/tree/decisiontree.hpp
+++ b/cpp/include/cuml/tree/decisiontree.hpp
@@ -117,6 +117,8 @@ void set_tree_params(DecisionTreeParams& params,
                      int cfg_max_batch_size          = 4096,
                      bool cfg_oob_honesty            = false);
 
+enum SplitAvgUnusedEnum {unused = 0, split, avg, group_split_unselected, group_avg_unselected, invalid};
+
 template <class T, class L>
 struct TreeMetaDataNode {
   int treeid;
@@ -125,6 +127,7 @@ struct TreeMetaDataNode {
   double train_time;
   std::vector<T> vector_leaf;
   std::vector<SparseTreeNode<T, L>> sparsetree;
+  std::vector<SplitAvgUnusedEnum> split_avg_enums;  
   int num_outputs;
 };
 
diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
index 0cc1b6f51f..6a7fb385d7 100644
--- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
@@ -85,7 +85,7 @@ class NodeQueue {
 
     int nTrain = int(instance.count - instance.avg_count);
     if (nTrain < params.min_samples_split_splitting) return false;
-    if (params.oob_honesty and instance.avg_count < params.min_samples_split_averaging) return false;
+    if (params.oob_honesty and static_cast<int>(instance.avg_count) < params.min_samples_split_averaging) return false;
     if (params.max_leaves != -1 && tree->leaf_counter >= params.max_leaves) return false;
     return true;
   }
diff --git a/cpp/src/randomforest/randomforest.cu b/cpp/src/randomforest/randomforest.cu
index 3817bcef9a..bc7358724f 100644
--- a/cpp/src/randomforest/randomforest.cu
+++ b/cpp/src/randomforest/randomforest.cu
@@ -449,6 +449,7 @@ void fit(const raft::handle_t& user_handle,
          int* labels,
          int n_unique_labels,
          RF_params rf_params,
+         int* groups,
          int verbosity)
 {
   raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
@@ -459,7 +460,18 @@ void fit(const raft::handle_t& user_handle,
 
   std::shared_ptr<RandomForest<float, int>> rf_classifier =
     std::make_shared<RandomForest<float, int>>(rf_params, RF_type::CLASSIFICATION);
-  rf_classifier->fit(user_handle, input, n_rows, n_cols, labels, n_unique_labels, forest);
+  rf_classifier->fit(user_handle, input, groups, n_rows, n_cols, labels, n_unique_labels, forest);
+}
+
+template<class T, class L>
+int get_tree_row_meta_info(
+  int ix_tree,
+  int ix_sample,
+  const RandomForestMetaData<T,L>* forest)
+{
+  if (ix_tree >= forest->trees.size()) return SplitAvgUnusedEnum::invalid;
+  if (ix_sample >= forest->trees[ix_tree]->split_avg_enums.size()) return SplitAvgUnusedEnum::invalid;
+  return forest->trees[ix_tree]->split_avg_enums[ix_sample];
 }
 
 void fit(const raft::handle_t& user_handle,
@@ -470,6 +482,7 @@ void fit(const raft::handle_t& user_handle,
          int* labels,
          int n_unique_labels,
          RF_params rf_params,
+         int* groups,
          int verbosity)
 {
   raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
@@ -480,7 +493,7 @@ void fit(const raft::handle_t& user_handle,
 
   std::shared_ptr<RandomForest<double, int>> rf_classifier =
     std::make_shared<RandomForest<double, int>>(rf_params, RF_type::CLASSIFICATION);
-  rf_classifier->fit(user_handle, input, n_rows, n_cols, labels, n_unique_labels, forest);
+  rf_classifier->fit(user_handle, input, groups, n_rows, n_cols, labels, n_unique_labels, forest);
 }
 /** @} */
 
@@ -596,8 +609,7 @@ RF_params set_rf_params(int max_depth,
                         int cfg_n_streams,
                         int max_batch_size,
                         int minTreesPerGroupFold,
-                        int foldGroupSize,
-                        int group_col_idx)
+                        int foldGroupSize)
 {
   DT::DecisionTreeParams tree_params;
   DT::set_tree_params(tree_params,
@@ -625,7 +637,6 @@ RF_params set_rf_params(int max_depth,
   rf_params.tree_params = tree_params;
   rf_params.minTreesPerGroupFold = minTreesPerGroupFold;
   rf_params.foldGroupSize = foldGroupSize;
-  rf_params.group_col_idx = group_col_idx;
   validity_check(rf_params);
   return rf_params;
 }
@@ -654,6 +665,7 @@ void fit(const raft::handle_t& user_handle,
          int n_cols,
          float* labels,
          RF_params rf_params,
+         int* groups,
          int verbosity)
 {
   raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
@@ -664,7 +676,7 @@ void fit(const raft::handle_t& user_handle,
 
   std::shared_ptr<RandomForest<float, float>> rf_regressor =
     std::make_shared<RandomForest<float, float>>(rf_params, RF_type::REGRESSION);
-  rf_regressor->fit(user_handle, input, n_rows, n_cols, labels, 1, forest);
+  rf_regressor->fit(user_handle, input, groups, n_rows, n_cols, labels, 1, forest);
 }
 
 void fit(const raft::handle_t& user_handle,
@@ -674,6 +686,7 @@ void fit(const raft::handle_t& user_handle,
          int n_cols,
          double* labels,
          RF_params rf_params,
+         int* groups,
          int verbosity)
 {
   raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
@@ -684,7 +697,7 @@ void fit(const raft::handle_t& user_handle,
 
   std::shared_ptr<RandomForest<double, double>> rf_regressor =
     std::make_shared<RandomForest<double, double>>(rf_params, RF_type::REGRESSION);
-  rf_regressor->fit(user_handle, input, n_rows, n_cols, labels, 1, forest);
+  rf_regressor->fit(user_handle, input, groups, n_rows, n_cols, labels, 1, forest);
 }
 /** @} */
 
@@ -776,6 +789,11 @@ template std::string get_rf_summary_text<double, int>(const RandomForestClassifi
 template std::string get_rf_summary_text<float, float>(const RandomForestRegressorF* forest);
 template std::string get_rf_summary_text<double, double>(const RandomForestRegressorD* forest);
 
+template int get_tree_row_meta_info<float, int>(int, int, const RandomForestClassifierF* forest);
+template int get_tree_row_meta_info<double, int>(int, int, const RandomForestClassifierD* forest);
+template int get_tree_row_meta_info<float, float>(int, int, const RandomForestRegressorF* forest);
+template int get_tree_row_meta_info<double, double>(int, int, const RandomForestRegressorD* forest);
+
 template std::string get_rf_detailed_text<float, int>(const RandomForestClassifierF* forest);
 template std::string get_rf_detailed_text<double, int>(const RandomForestClassifierD* forest);
 template std::string get_rf_detailed_text<float, float>(const RandomForestRegressorF* forest);
diff --git a/cpp/src/randomforest/randomforest.cuh b/cpp/src/randomforest/randomforest.cuh
index 2419f7f265..7e1fa77dfc 100644
--- a/cpp/src/randomforest/randomforest.cuh
+++ b/cpp/src/randomforest/randomforest.cuh
@@ -19,6 +19,7 @@
 #include <decisiontree/batched-levelalgo/quantiles.cuh>
 #include <decisiontree/decisiontree.cuh>
 #include <decisiontree/treelite_util.h>
+#include <cuml/tree/decisiontree.hpp>
 
 #include <raft/random/permute.cuh>
 
@@ -61,18 +62,79 @@ struct set_mask_functor {
 
 namespace {
 
-__global__ void log10(int* array) {
-  for (int ix = 0; ix < 10; ++ix) {
+using ML::DT::SplitAvgUnusedEnum;
+
+__global__ void log(int* array, int n_samples) {
+  for (int ix = 0; ix < n_samples; ++ix) {
     printf("array %d = %d\n", ix, array[ix]);
   }
 }
 
-__global__ void log10groups(const int* row_ids, const int* group_ids) {
-  for (int ix = 0; ix < 10; ++ix) {
+__global__ void loggroups(const int* row_ids, const int* group_ids, const int n_samples) {
+  for (int ix = 0; ix < n_samples; ++ix) {
     printf("group ix %d, row %d = %d\n", ix, row_ids[ix], group_ids[row_ids[ix]]);
   }
 }
 
+__global__ void assign_standard_honesty_vec(
+    SplitAvgUnusedEnum* split_enums,
+    const int n_total_selected_rows,
+    const int* selected_rows,
+    const int n_splitting_rows)
+{
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_total_selected_rows) return;
+
+  if (idx < n_splitting_rows) {
+    split_enums[selected_rows[idx]] = SplitAvgUnusedEnum::split;
+  } else {
+    split_enums[selected_rows[idx]] = SplitAvgUnusedEnum::avg;
+  }
+}
+
+template<typename T, typename U>
+__device__ int lower_bound(const T search_val, const U* array, int count) {
+  int it, step;
+  int first = 0;
+  while (count > 0) {
+    step = count / 2;
+    it = first + step;
+    if (array[it] < search_val) {
+      first = ++it;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+  return first;
+}
+
+template<typename T, typename U>
+__device__ int lower_bound_search(const T search_val, const U* array, int count) {
+  int res = lower_bound(search_val, array, count);
+  return res < count and array[res] == search_val;
+}
+
+// In this second stage for groups, we assign any "unused" member of the particular group of groups to 
+// indicate that it was part of this group, but not selected
+__global__ void assign_group_based_standard_honesty_vec_stage1(
+    SplitAvgUnusedEnum* split_enums,
+    const int n_rows,
+    const int* sample_group_ids,
+    const int* considered_group_ids,
+    const int considered_group_id_count,
+    SplitAvgUnusedEnum enum_val)
+{
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= n_rows) return;
+
+  const int group_id = sample_group_ids[idx];
+
+  if (lower_bound_search(group_id, considered_group_ids, considered_group_id_count)) {
+      split_enums[idx] = enum_val;
+  }
+}
+
 void assign_groups_to_folds(
   int n_groups,
   int n_folds,
@@ -82,39 +144,28 @@ void assign_groups_to_folds(
 {
     std::vector<int> group_indices(n_groups);
     std::iota(group_indices.begin(), group_indices.end(), 0);
-
     std::shuffle(group_indices.begin(), group_indices.end(), rng);
-          
-    for (int ix_fold = 0; ix_fold < n_folds - 1; ++ix_fold) {
+    
+    int ix_fold = 0;
+    for (; ix_fold < n_folds - 1; ++ix_fold) {
+      fold_memberships[ix_fold].resize(fold_size);
       std::copy(group_indices.begin() + ix_fold*fold_size,
                 group_indices.begin() + (ix_fold+1)*fold_size,
                 fold_memberships[ix_fold].begin());
+
+      std::sort(fold_memberships[ix_fold].begin(), fold_memberships[ix_fold].end());
     }
 
     // Last fold could be smaller
-    const int last_fold_start = (n_folds - 1) * fold_size;
+    const int last_fold_start = (ix_fold) * fold_size;
     const int last_fold_size = n_groups - last_fold_start;
-    fold_memberships[n_folds - 1].resize(last_fold_size);
+    fold_memberships[ix_fold].resize(last_fold_size);
+
     for (int ix = 0; ix < last_fold_size; ++ix) {
-      fold_memberships[n_folds - 1][ix] = group_indices[last_fold_start + ix];
+      fold_memberships[ix_fold][ix] = group_indices[last_fold_start + ix];
     }
-}
 
-template<typename T, typename U>
-__device__ int lower_bound(const T search_val, const U* array, int count) {
-  int it, step;
-  int first = 0;
-  while (count > 0) {
-    step = count / 2;
-    it = first + step;
-    if (array[it] < search_val) {
-      first = ++it;
-      count -= step + 1;
-    } else {
-      count = step;
-    }
-  }
-  return first;
+    std::sort(fold_memberships[ix_fold].begin(), fold_memberships[ix_fold].end());
 }
 
 template<typename T>
@@ -148,8 +199,7 @@ struct LeaveOutSamplesCopyIfFunctor {
   __device__ bool operator()(const int ix_sample) {
     // Do a quick lower_bound search
     const int group_id = sample_group_ids[ix_sample];
-    int it = lower_bound(group_id, remaining_groups, num_rem_groups);
-    return remaining_groups[it] == group_id;
+    return lower_bound_search(group_id, remaining_groups, num_rem_groups);
   }
 };
 
@@ -211,7 +261,6 @@ void leave_groups_out_sample(
   raft::update_device(remaining_groups->data(), remaining_groups_host.data(), 
       remaining_groups_host.size(), stream);
   remaining_groups->resize(remaining_groups_host.size(), stream);
-  
   generate_row_indices_from_remaining_groups(
     remaining_groups, remaining_samples, sample_group_ids, 
     num_samples, stream, rng);
@@ -278,10 +327,16 @@ class RandomForest {
                         const int n_groups,
                         const int group_tree_count,
                         const std::vector<std::vector<int>>& fold_memberships, // each group belongs to one fold
+                        rmm::device_uvector<SplitAvgUnusedEnum>* split_avg_enums,
                         const cudaStream_t stream)
   {
     // Todo: split group_fold_rng across threads
     raft::common::nvtx::range fun_scope("bootstrapping row IDs @randomforest.cuh");
+    if (split_avg_enums) {
+      // Initialize to unused
+      assert(SplitAvgUnusedEnum::unused == 0);
+      cudaMemsetAsync(split_avg_enums->data(), 0, sizeof(SplitAvgUnusedEnum) * n_rows, stream);
+    }
 
     // Hash these together so they are uncorrelated
     auto random_seed = DT::fnv1a32_basis;
@@ -298,6 +353,8 @@ class RandomForest {
     std::vector<std::vector<int>> honest_group_assignments(2);
     auto& splitting_groups = honest_group_assignments[0];
     auto& averaging_groups = honest_group_assignments[1];
+
+
     if (n_groups > 0) {
       // Special handling for groups. We don't support split ratio honesty
       const std::vector<int>* current_fold_groups;
@@ -328,7 +385,6 @@ class RandomForest {
 
           splitting_groups.resize(honest_split_size);
           averaging_groups.resize(restricted_ix_size - honest_split_size);
-
           assign_groups_to_folds(
               restricted_ix_size,
               2,
@@ -338,10 +394,12 @@ class RandomForest {
           
           // Replace indices with the actual groups
           for (int ix_group = 0; ix_group < honest_split_size; ix_group++) {
+            // printf("split group %d is %d\n", ix_group, restricted_group_ixs_diff[honest_group_assignments[0][ix_group]]);
             honest_group_assignments[0][ix_group] = restricted_group_ixs_diff[honest_group_assignments[0][ix_group]];
           }
           
           for (int ix_group = 0; ix_group < restricted_ix_size - honest_split_size; ix_group++) {
+            // printf("avg group %d is %d\n", ix_group, restricted_group_ixs_diff[honest_group_assignments[1][ix_group]]);
             honest_group_assignments[1][ix_group] = restricted_group_ixs_diff[honest_group_assignments[1][ix_group]];
           }
         } else {
@@ -366,11 +424,33 @@ class RandomForest {
               group_fold_rng);
         }
 
+        const int block_dim = 256;
         leave_groups_out_sample(remaining_groups, remaining_samples, selected_rows, workspace,
             groups, splitting_groups, n_sampled_rows, 0, stream, rng);
-
+        const int stage1_grid_dim = (n_rows + block_dim - 1) / block_dim;
+        assign_group_based_standard_honesty_vec_stage1<<<stage1_grid_dim, block_dim, 0, stream>>>(
+            split_avg_enums->data(),
+            n_rows,
+            groups,
+            remaining_groups->data(),
+            remaining_groups->size(),
+            SplitAvgUnusedEnum::group_split_unselected);
+
+        cudaStreamSynchronize(stream);
         leave_groups_out_sample(remaining_groups, remaining_samples, selected_rows, workspace,
             groups, averaging_groups, n_sampled_rows, n_sampled_rows, stream, rng);
+        assign_group_based_standard_honesty_vec_stage1<<<stage1_grid_dim, block_dim, 0, stream>>>(
+            split_avg_enums->data(),
+            n_rows,
+            groups,
+            remaining_groups->data(),
+            remaining_groups->size(),
+            SplitAvgUnusedEnum::group_avg_unselected);
+
+        const int total_sampled_rows = 2 * n_sampled_rows;
+        const int standard_grid_dim = (total_sampled_rows + block_dim - 1) / block_dim;
+        assign_standard_honesty_vec<<<standard_grid_dim, block_dim, 0, stream>>>(
+            split_avg_enums->data(), total_sampled_rows, selected_rows->data(), n_sampled_rows);
 
         update_averaging_mask(split_row_mask, n_sampled_rows, stream);
 
@@ -380,6 +460,7 @@ class RandomForest {
         // Just don't use samples from the current fold for splitting. No averaging.
         leave_groups_out_sample(remaining_groups, remaining_samples, selected_rows, workspace,
             groups, restricted_group_ixs_diff, n_sampled_rows, 0, stream, rng);
+        
         return 0; // no averaging samples
       }
     }
@@ -418,11 +499,20 @@ class RandomForest {
 
       // Get the avg selected rows either as the remaining data, or bootstrapped again
       selected_rows->resize(n_sampled_rows * 2, stream);
+      int total_sampled_rows = n_sampled_rows;
       if (rf_params.double_bootstrap) {
         sample_rows_from_remaining_rows(remaining_samples, selected_rows, workspace, n_sampled_rows, n_sampled_rows, stream, rng);
       } else {
+        num_avg_samples = remaining_samples->size();
         thrust::copy(thrust::cuda::par.on(stream), remaining_samples->begin(), remaining_samples->end(), selected_rows->begin() + n_sampled_rows);
       }
+
+      total_sampled_rows += num_avg_samples;
+
+      const int block_dim = 256;
+      const int grid_dim = (total_sampled_rows + block_dim - 1) / block_dim;
+      assign_standard_honesty_vec<<<grid_dim, block_dim, 0, stream>>>(
+          split_avg_enums->data(), total_sampled_rows, selected_rows->data(), n_sampled_rows);
       
       update_averaging_mask(split_row_mask, n_sampled_rows, stream);
     }
@@ -475,6 +565,7 @@ class RandomForest {
   */
   void fit(const raft::handle_t& user_handle,
            const T* input,
+           int* groups,
            int n_rows,
            int n_cols,
            L* labels,
@@ -507,38 +598,33 @@ class RandomForest {
     std::vector<std::vector<int>> foldMemberships;
     const int foldGroupSize = this->rf_params.foldGroupSize;
     const int minTreesPerGroupFold = this->rf_params.minTreesPerGroupFold;
-    std::unique_ptr<rmm::device_uvector<int>> groups;
     int n_groups = 0;
 
-    if (this->rf_params.group_col_idx >= 0) {
+    if (groups != nullptr) {
       // Here we'll do a unique on the group array, then build a vector of indices into the unique vector
       cudaStream_t stream = handle.get_stream_from_stream_pool(0);
-      rmm::device_uvector<T> input_groups(n_rows, stream);
-      rmm::device_uvector<T> input_groups_unique(n_rows, stream);
-      groups = std::make_unique<rmm::device_uvector<int>>(n_rows, stream);
-      cudaMemcpyAsync(input_groups.data(), 
-          input + n_rows * this->rf_params.group_col_idx, 
-          n_rows * sizeof(T), cudaMemcpyDefault, stream);
+      rmm::device_uvector<int> input_groups_unique(n_rows, stream);
       cudaMemcpyAsync(input_groups_unique.data(), 
-          input + n_rows * this->rf_params.group_col_idx, 
-          n_rows * sizeof(T), cudaMemcpyDefault, stream);
+          groups, 
+          n_rows * sizeof(int), cudaMemcpyDefault, stream);
       // Sadly we have to sort the entire array for unique to work. Is there 
       // a way to just unique the unsorted array?
       thrust::sort(thrust::cuda::par.on(stream),
           input_groups_unique.data(),
           input_groups_unique.data() + n_rows);
-      T* new_end = thrust::unique(thrust::cuda::par.on(stream),
+      int* new_end = thrust::unique(thrust::cuda::par.on(stream),
           input_groups_unique.data(),
           input_groups_unique.data() + n_rows);
+
       // Now we'll have n_groups and can use some iterator to find the values for each group
       n_groups = new_end - input_groups_unique.data(); 
 
       UniqueTransformFunctor transform_fn{input_groups_unique.data(), n_groups};
       thrust::transform(
           thrust::cuda::par.on(stream),
-          input_groups.data(),
-          input_groups.data() + n_rows,
-          groups->data(),
+          groups,
+          groups + n_rows,
+          groups,
           transform_fn);
     }
 
@@ -586,6 +672,7 @@ class RandomForest {
     std::deque<rmm::device_uvector<int>> workspaces;
     std::deque<rmm::device_uvector<int>> remaining_groups_vec;
     std::deque<rmm::device_uvector<int>> remaining_samples_vec;
+    std::deque<rmm::device_uvector<SplitAvgUnusedEnum>> split_avg_enum_vec;
 
     const bool use_extra_vecs = this->rf_params.oob_honesty or this->rf_params.minTreesPerGroupFold > 0;
     size_t max_sample_row_size = this->rf_params.oob_honesty ? n_sampled_rows * 2 : n_sampled_rows;
@@ -596,7 +683,9 @@ class RandomForest {
         split_row_masks.emplace_back(max_sample_row_size, s);
         workspaces.emplace_back(n_rows, s);
         remaining_samples_vec.emplace_back(n_rows, s);
+        split_avg_enum_vec.emplace_back(n_rows, s);
       }
+      
       if (n_groups > 0) {
         remaining_groups_vec.emplace_back(n_groups, s);
       }
@@ -611,7 +700,7 @@ class RandomForest {
       rmm::device_uvector<int>* workspace = use_extra_vecs ? &workspaces[stream_id] : nullptr;
       rmm::device_uvector<int>* remaining_groups = n_groups > 0 ? &remaining_groups_vec[stream_id] : nullptr;
       rmm::device_uvector<bool>* split_row_mask = use_extra_vecs ? &split_row_masks[stream_id] : nullptr;
-      int* this_groups = n_groups > 0 ? groups->data() : nullptr;
+      rmm::device_uvector<SplitAvgUnusedEnum>* split_avg_enums = use_extra_vecs ? &split_avg_enum_vec[stream_id] : nullptr;
       auto n_avg_samples = this->get_row_sample(
           i, n_rows, n_sampled_rows, 
           &selected_rows[stream_id],
@@ -619,10 +708,11 @@ class RandomForest {
           remaining_groups,
           remaining_samples,
           workspace,
-          this_groups,
+          groups,
           n_groups,
           group_tree_count,
           foldMemberships,
+          split_avg_enums,
           s);
 
       /* Build individual tree in the forest.
@@ -634,6 +724,8 @@ class RandomForest {
           (b) a pointer to a list of row numbers w.r.t original data.
       */
       if (this->rf_params.oob_honesty) {
+        std::vector<SplitAvgUnusedEnum> split_avg_enum_host(n_rows);
+        // Copy split_avg_enum_dev to host
         forest->trees[i] = DT::DecisionTree::fit<true>(handle,
                                                        s,
                                                        input,
@@ -648,6 +740,9 @@ class RandomForest {
                                                        this->rf_params.seed,
                                                        quantiles,
                                                        i);
+        forest->trees[i]->split_avg_enums = std::vector<SplitAvgUnusedEnum>(n_rows);
+        raft::update_host(forest->trees[i]->split_avg_enums.data(), 
+          split_avg_enums->data(), n_rows, s);
       } else {
         forest->trees[i] = DT::DecisionTree::fit<false>(handle,
                                                         s,
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index e2f185d30f..5479f7f2d8 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -241,8 +241,7 @@ auto TrainScore(
                                       params.n_streams,
                                       128,
                                       0,
-                                      0,
-                                      -1);
+                                      0);
 
   auto forest     = std::make_shared<RandomForestMetaData<DataT, LabelT>>();
   auto forest_ptr = forest.get();
@@ -576,7 +575,7 @@ TEST(RfTests, IntegerOverflow)
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 0, 2, 0, 0.0, false, false, false, 1, 1.0, 0, CRITERION::MSE, 4, 128, 0, 0, -1);
+    set_rf_params(3, 100, 1.0, 256, 1, 0, 2, 0, 0.0, false, false, false, 1, 1.0, 0, CRITERION::MSE, 4, 128, 0, 0);
   fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
@@ -602,35 +601,26 @@ TEST(RfTests, IntegerOverflow)
   fil::predict(handle, fil_forest, pred.data().get(), X.data().get(), m, false);
 }
 
-namespace {
-  struct TransformFunctor {
-    __device__ float operator()(float input) {
-      return roundf(input);
-    }
-  };  
-}
-
-
 TEST(RfTests, Honesty)
 {
   std::size_t m = 10000;
   std::size_t n = 2150;
   thrust::device_vector<float> X(m * n);
   thrust::device_vector<float> y(m);
+
   raft::random::Rng r(4);
   r.normal(X.data().get(), X.size(), 0.0f, 2.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
+
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 5, 1.0, 0, CRITERION::MSE, 1, 128, 0, 0, -1);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 5, 1.0, 0, CRITERION::MSE, 1, 128, 0, 0);
   fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
@@ -658,7 +648,7 @@ TEST(RfTests, Honesty)
 
 TEST(RfTests, SmallHonestFolds)
 {
-  std::size_t m = 1000;
+  std::size_t m = 100;
   std::size_t n = 10;
   thrust::device_vector<float> X(m * n);
   thrust::device_vector<float> y(m);
@@ -666,20 +656,29 @@ TEST(RfTests, SmallHonestFolds)
   r.normal(X.data().get(), X.size(), 0.0f, 1.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+
+  thrust::device_vector<int> groups(m);
+  r.uniformInt(groups.data().get(), m, 0, 10, nullptr);
+
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
+  int n_trees = 5;
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 1, 1.0, 0, CRITERION::MSE, 1, 128, 1, 5, 0);
-  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, n_trees, 1.0, 0, CRITERION::MSE, 1, 128, 1, 5);
+  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params, groups.data().get());
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
 
+  // Log out what's returned
+  // for(int ix_tree = 0; ix_tree < n_trees; ++ix_tree) {
+  //   for (int ix_sample = 0; ix_sample < m; ++ix_sample) {
+  //     printf("tree %d sample %d enum %d\n", ix_tree, ix_sample, forest->trees[ix_tree]->split_avg_enums[ix_sample]);
+  //   }
+  // }
+
   // See if fil overflows
   thrust::device_vector<float> pred(m);
   ModelHandle model;
@@ -704,7 +703,7 @@ TEST(RfTests, SmallHonestFolds)
 
 TEST(RfTests, SmallHonestFoldsWithFallback)
 {
-  std::size_t m = 1000;
+  std::size_t m = 100;
   std::size_t n = 10;
   thrust::device_vector<float> X(m * n);
   thrust::device_vector<float> y(m);
@@ -712,17 +711,19 @@ TEST(RfTests, SmallHonestFoldsWithFallback)
   r.normal(X.data().get(), X.size(), 0.0f, 1.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+
+  thrust::device_vector<int> groups(m);
+  r.uniformInt(groups.data().get(), m, 0, 10, nullptr);
+
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
+
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 100, 1.0, 0, CRITERION::MSE, 1, 128, 1, 5, 0);
-  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 100, 1.0, 0, CRITERION::MSE, 1, 128, 1, 5);
+  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params, groups.data().get());
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
 
@@ -757,17 +758,18 @@ TEST(RfTests, SmallDishonestFoldsWithFallback)
   r.normal(X.data().get(), X.size(), 0.0f, 1.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+ 
+  thrust::device_vector<int> groups(m);
+  r.uniformInt(groups.data().get(), m, 0, 10, nullptr);
+
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, false, false, 100, 1.0, 0, CRITERION::MSE, 1, 128, 1, 5, 0);
-  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, false, false, 100, 1.0, 0, CRITERION::MSE, 1, 128, 1, 5);
+  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params, groups.data().get());
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
 
@@ -802,17 +804,18 @@ TEST(RfTests, HonestFolds)
   r.normal(X.data().get(), X.size(), 0.0f, 2.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+
+  thrust::device_vector<int> groups(m);
+  r.uniformInt(groups.data().get(), m, 0, 10, nullptr);
+
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 5, 1.0, 0, CRITERION::MSE, 4, 128, 2, 2, 0);
-  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 5, 1.0, 0, CRITERION::MSE, 1, 128, 2, 2);
+  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params, groups.data().get());
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
 
@@ -847,17 +850,18 @@ TEST(RfTests, HonestGroups)
   r.normal(X.data().get(), X.size(), 0.0f, 2.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+  
+  thrust::device_vector<int> groups(m);
+  r.uniformInt(groups.data().get(), m, 0, 10, nullptr);
+
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 5, 1.0, 0, CRITERION::MSE, 4, 128, 0, 0, 0);
-  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, true, true, 5, 1.0, 0, CRITERION::MSE, 4, 128, 0, 0);
+  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params, groups.data().get());
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
 
@@ -892,17 +896,17 @@ TEST(RfTests, DishonestFolds)
   r.normal(X.data().get(), X.size(), 0.0f, 2.0f, nullptr);
   cudaStream_t stream;
   cudaStreamCreate(&stream);
-  thrust::transform(thrust::cuda::par.on(stream), X.data(), 
-      X.data() + m, X.data(), TransformFunctor{});
-  // quantize the first column so that we can use it for meaningful groups
+  
+  thrust::device_vector<int> groups(m);
+  r.uniformInt(groups.data().get(), m, 0, 10, nullptr);
   r.normal(y.data().get(), y.size(), 0.0f, 2.0f, nullptr);
   auto forest      = std::make_shared<RandomForestMetaData<float, float>>();
   auto forest_ptr  = forest.get();
   auto stream_pool = std::make_shared<rmm::cuda_stream_pool>(4);
   raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool);
   RF_params rf_params =
-    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, false, false, 5, 1.0, 0, CRITERION::MSE, 4, 128, 2, 2, 0);
-  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params);
+    set_rf_params(3, 100, 1.0, 256, 1, 1, 2, 2, 0.0, true, false, false, 5, 1.0, 0, CRITERION::MSE, 4, 128, 2, 2);
+  fit(handle, forest_ptr, X.data().get(), m, n, y.data().get(), rf_params, groups.data().get());
   // Check we have actually learned something
   EXPECT_GT(forest->trees[0]->leaf_counter, 1);
 
@@ -1143,7 +1147,7 @@ INSTANTIATE_TEST_CASE_P(RfTests, RFQuantileVariableBinsTestD, ::testing::ValuesI
 
 TEST(RfTest, TextDump)
 {
-  RF_params rf_params = set_rf_params(2, 2, 1.0, 2, 1, 0, 2, 0, 0.0, false, false, false, 1, 1.0, 0, GINI, 1, 128, 0, 0, -1);
+  RF_params rf_params = set_rf_params(2, 2, 1.0, 2, 1, 0, 2, 0, 0.0, false, false, false, 1, 1.0, 0, GINI, 1, 128, 0, 0);
   auto forest         = std::make_shared<RandomForestMetaData<float, int>>();
 
   std::vector<float> X_host      = {1, 2, 3, 6, 7, 8};
diff --git a/honesty_test.py b/honesty_test.py
index 1d6c044cf9..ed508cbbfe 100755
--- a/honesty_test.py
+++ b/honesty_test.py
@@ -23,6 +23,7 @@
   states_map[state] = float(ix_state)
 
 input["state"] = input["state"].map(states_map)
+input.drop(labels=["state"], axis=1)
 
 # state fields are redundant with state value
 # state_fields = ["d_st_AK","d_st_AR","d_st_AZ","d_st_CO","d_st_FL","d_st_GA","d_st_IA","d_st_KS","d_st_KY","d_st_LA","d_st_ME","d_st_MI","d_st_NC","d_st_NH","d_st_SD","d_st_TX","d_st_WI"]
@@ -30,6 +31,7 @@
 
 input = input.astype('float32')
 input = input.dropna()
+groups = input["state"].astype('int')
 
 # Choose how many index include for random selection
 num_rows = input.shape[0]
@@ -41,20 +43,27 @@
 
 x_train = x.iloc[ix_train, :]
 y_train = y.iloc[ix_train]
+groups_train = groups.iloc[ix_train]
 
 x_test = x.iloc[ix_test, :]
 y_test = y.iloc[ix_test]
 
 n_trees = 100
 
+
 # Start group call -- note we're not using groups to specify OOB predictions
 # Note, here we specify a column index to use for groups. Then the fit() function
 # will use the GPU to compute unique group ids for every sample. 
-group_col_idx = x.columns.get_loc("state")
 random_forest_regress = RFR(n_estimators=n_trees, oob_honesty=True, split_criterion=2, 
-    random_state=42, minTreesPerGroupFold=5, foldGroupSize=1, group_col_idx=group_col_idx)
+    random_state=42, minTreesPerGroupFold=20, foldGroupSize=3)
 start = time.time()
-trainedRFR = random_forest_regress.fit(x_train, y_train)
+trainedRFR = random_forest_regress.fit(x_train, y_train, groups_train)
+
+# Shows the full meta information. The callback allows you to fill arrays yourself as needed
+for ix_tree in range(10):
+  for ix_row in range(3):
+    print(f"tree {ix_tree} row {ix_row} enum {random_forest_regress.get_tree_sample_honesty_group_meta(ix_tree, ix_row)}")
+
 end = time.time()
 pred_test_regress = trainedRFR.predict(x_test)
 mse = cuml.metrics.mean_squared_error(y_test, pred_test_regress)
@@ -66,6 +75,12 @@
 end = time.time()
 pred_test_regress = trainedRFR.predict(x_test)
 mse = cuml.metrics.mean_squared_error(y_test, pred_test_regress)
+
+# Note this shows invalid -- the metadata isn't filled in when honesty is disabled
+for ix_tree in range(10):
+  for ix_row in range(3):
+    print(f"tree {ix_tree} row {ix_row} enum {random_forest_regress.get_tree_sample_honesty_group_meta(ix_tree, ix_row)}")
+
 print(f"No honesty {mse} time {end-start}")
 
 random_forest_regress = RFR(n_estimators=n_trees, oob_honesty=True, split_criterion=2, random_state=42)
@@ -74,5 +89,12 @@
 end = time.time()
 pred_test_regress = trainedRFR.predict(x_test)
 mse = cuml.metrics.mean_squared_error(y_test, pred_test_regress)
+
+# Shows meta info without groups
+for ix_tree in range(10):
+  for ix_row in range(3):
+    print(f"tree {ix_tree} row {ix_row} enum {random_forest_regress.get_tree_sample_honesty_group_meta(ix_tree, ix_row)}")
+
+
 print(f"Honesty {mse} time {end-start}")
 
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 6bb0c34a89..c3845f13cb 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -197,10 +197,6 @@ class RandomForestClassifier(
         as it will contain the remaining groups). Then minTreesPerGroupFold are grown with each
         entire fold of groups left out.
 
-    group_col_idx : int (default = -1)
-        The numeric index of the column to be used for group processing
-
-
     Examples
     --------
     For usage examples, please see the RAPIDS notebooks repository:
@@ -308,6 +304,7 @@ def fit(self, X, y, convert_dtype=False, broadcast_data=False):
             is trained on its partition
 
         """
+        print("class outer fit")
         self.unique_classes = cp.asarray(
             y.unique().compute().sort_values(ignore_index=True)
         )
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index 1ea049b678..e2cea52b80 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -187,10 +187,6 @@ class RandomForestRegressor(
         (if foldGroupSize doesn't evenly divide the number of groups, a single fold will be smaller,
         as it will contain the remaining groups). Then minTreesPerGroupFold are grown with each
         entire fold of groups left out.
-
-    group_col_idx : int (default = -1)
-        The numeric index of the column to be used for group processing
-
     """
 
     def __init__(
@@ -291,6 +287,7 @@ def fit(self, X, y, convert_dtype=False, broadcast_data=False):
             is trained on its partition
 
         """
+        print("regress outer fit")
         self.internal_model = None
         self._fit(
             model=self.rfs,
diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx
index 52ca2f3ceb..6739e7346e 100644
--- a/python/cuml/ensemble/randomforest_common.pyx
+++ b/python/cuml/ensemble/randomforest_common.pyx
@@ -20,6 +20,7 @@ import math
 import warnings
 import typing
 from inspect import signature
+from enum import Enum
 
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
@@ -40,6 +41,13 @@ from cuml.common import input_to_cuml_array
 from cuml.common.array_descriptor import CumlArrayDescriptor
 from cuml.prims.label.classlabels import make_monotonic, check_labels
 
+class SplitAvgUnusedEnum(Enum):
+    unused = 0
+    split = 1
+    avg = 2
+    group_split_unselected = 3
+    group_avg_unselected = 4
+    invalid = 5
 
 class BaseRandomForestModel(Base):
     _param_names = ['n_estimators', 'max_depth', 'handle',
@@ -58,8 +66,7 @@ class BaseRandomForestModel(Base):
                     'output_type', 'min_weight_fraction_leaf', 'n_jobs',
                     'max_leaf_nodes', 'min_impurity_split', 'oob_score',
                     'random_state', 'warm_start', 'class_weight',
-                    'criterion', 'minTreesPerGroupFold', 'foldGroupSize',
-                    'group_col_idx']
+                    'criterion', 'minTreesPerGroupFold', 'foldGroupSize']
 
     criterion_dict = {'0': GINI, 'gini': GINI,
                       '1': ENTROPY, 'entropy': ENTROPY,
@@ -87,7 +94,6 @@ class BaseRandomForestModel(Base):
                  max_batch_size=4096,
                  minTreesPerGroupFold=0,
                  foldGroupSize=1,
-                 group_col_idx=-1,
                  **kwargs):
 
         sklearn_params = {"criterion": criterion,
@@ -159,6 +165,7 @@ class BaseRandomForestModel(Base):
         self.double_bootstrap = double_bootstrap
         self.n_bins = n_bins
         self.n_cols = None
+        self.tree_group_metas = None
         self.dtype = dtype
         self.accuracy_metric = accuracy_metric
         self.max_batch_size = max_batch_size
@@ -171,7 +178,6 @@ class BaseRandomForestModel(Base):
         self.treelite_serialized_model = None
         self.minTreesPerGroupFold = minTreesPerGroupFold
         self.foldGroupSize = foldGroupSize
-        self.group_col_idx = group_col_idx
 
     def _get_max_feat_val(self) -> float:
         if type(self.max_features) == int:
@@ -277,6 +283,7 @@ class BaseRandomForestModel(Base):
                                             get_output_type=False)
     def _dataset_setup_for_fit(
             self, X, y,
+            groups,
             convert_dtype) -> typing.Tuple[CumlArray, CumlArray, float]:
         # Reset the old tree data for new fit call
         self._reset_forest_data()
@@ -314,6 +321,14 @@ class BaseRandomForestModel(Base):
                     convert_to_dtype=(self.dtype if convert_dtype
                                       else None),
                     check_rows=self.n_rows, check_cols=1)
+        
+        group_m = None
+        if groups is not None:
+            group_m, _, _, group_dtype = \
+                input_to_cuml_array(
+                    groups,
+                    convert_to_dtype=None,
+                    check_rows=self.n_rows, check_cols=1)
 
         if self.dtype == np.float64:
             warnings.warn("To use pickling first train using float32 data "
@@ -332,7 +347,7 @@ class BaseRandomForestModel(Base):
         if type(self.min_samples_split_averaging) == float:
             self.min_samples_split_averaging = \
                 max(2, math.ceil(self.min_samples_split_averaging * self.n_rows))
-        return X_m, y_m, max_feature_val
+        return X_m, y_m, group_m,max_feature_val
 
     def _tl_handle_from_bytes(self, treelite_serialized_model):
         if not treelite_serialized_model:
diff --git a/python/cuml/ensemble/randomforest_shared.pxd b/python/cuml/ensemble/randomforest_shared.pxd
index f4d03d89ca..5ee34b5fd8 100644
--- a/python/cuml/ensemble/randomforest_shared.pxd
+++ b/python/cuml/ensemble/randomforest_shared.pxd
@@ -83,6 +83,14 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
                                           int
                                           ) except +
 
+    #
+    # Extra method for filling the tree meta information
+    #
+    cdef int get_tree_row_meta_info[T,L](int, 
+                                         int,
+                                         RandomForestMetaData[T, L]*
+                                         ) except +
+
     cdef void delete_rf_metadata[T, L](RandomForestMetaData[T, L]*) except +
 
     #
@@ -112,7 +120,6 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
                                  int,
                                  int,
                                  int,
-                                 int,
                                  int) except +
 
     cdef vector[unsigned char] save_model(ModelHandle)
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 284f811a7c..c04ddccedf 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -39,7 +39,7 @@ from cuml.common.doc_utils import insert_into_docstring
 from pylibraft.common.handle import Handle
 from cuml.common import input_to_cuml_array
 
-from cuml.ensemble.randomforest_common import BaseRandomForestModel
+from cuml.ensemble.randomforest_common import BaseRandomForestModel, SplitAvgUnusedEnum
 from cuml.ensemble.randomforest_common import _obtain_fil_model
 from cuml.ensemble.randomforest_shared cimport *
 
@@ -72,6 +72,7 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
                   int*,
                   int,
                   RF_params,
+                  int*,
                   int) except +
 
     cdef void fit(handle_t& handle,
@@ -82,6 +83,7 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
                   int*,
                   int,
                   RF_params,
+                  int*,
                   int) except +
 
     cdef void predict(handle_t& handle,
@@ -283,9 +285,6 @@ class RandomForestClassifier(BaseRandomForestModel,
         as it will contain the remaining groups). Then minTreesPerGroupFold are grown with each
         entire fold of groups left out.
 
-    group_col_idx : int (default = -1)
-        The numeric index of the column to be used for group processing
-        
     Notes
     -----
     **Known Limitations**\n
@@ -475,7 +474,7 @@ class RandomForestClassifier(BaseRandomForestModel,
     @cuml.internals.api_base_return_any(set_output_type=False,
                                         set_output_dtype=True,
                                         set_n_features_in=False)
-    def fit(self, X, y, convert_dtype=True):
+    def fit(self, X, y, groups=None, group_meta=None, convert_dtype=True):
         """
         Perform Random Forest Classification on the input data
 
@@ -486,16 +485,18 @@ class RandomForestClassifier(BaseRandomForestModel,
             y to be of dtype int32. This will increase memory used for
             the method.
         """
-
-        X_m, y_m, max_feature_val = self._dataset_setup_for_fit(X, y,
+        X_m, y_m, groups_m, max_feature_val = self._dataset_setup_for_fit(X, y, groups,
                                                                 convert_dtype)
         # Track the labels to see if update is necessary
         self.update_labels = not check_labels(y_m, self.classes_)
-        cdef uintptr_t X_ptr, y_ptr
+        cdef uintptr_t X_ptr, y_ptr, groups_ptr
 
         X_ptr = X_m.ptr
         y_ptr = y_m.ptr
 
+        if groups is not None:
+            groups_ptr = groups_m.ptr
+
         cdef handle_t* handle_ =\
             <handle_t*><uintptr_t>self.handle.getHandle()
 
@@ -530,8 +531,7 @@ class RandomForestClassifier(BaseRandomForestModel,
                                   <int> self.n_streams,
                                   <int> self.max_batch_size,
                                   <int> self.minTreesPerGroupFold,
-                                  <int> self.foldGroupSize,
-                                  <int> self.group_col_idx)
+                                  <int> self.foldGroupSize)
 
         if self.dtype == np.float32:
             fit(handle_[0],
@@ -542,6 +542,7 @@ class RandomForestClassifier(BaseRandomForestModel,
                 <int*> y_ptr,
                 <int> self.num_classes,
                 rf_params,
+                <int*> groups_ptr,
                 <int> self.verbose)
 
         elif self.dtype == np.float64:
@@ -554,19 +555,33 @@ class RandomForestClassifier(BaseRandomForestModel,
                 <int*> y_ptr,
                 <int> self.num_classes,
                 rf_params64,
+                <int*> groups_ptr,
                 <int> self.verbose)
 
         else:
             raise TypeError("supports only np.float32 and np.float64 input,"
                             " but input of type '%s' passed."
                             % (str(self.dtype)))
+        
         # make sure that the `fit` is complete before the following delete
         # call happens
         self.handle.sync()
         del X_m
         del y_m
+        del groups_m
         return self
 
+    def get_tree_sample_honesty_group_meta(self, tree_id, row_id):
+        cdef RandomForestMetaData[float, int] *rf_forest = \
+            <RandomForestMetaData[float, int]*><uintptr_t> self.rf_forest
+        cdef RandomForestMetaData[double, int] *rf_forest64 = \
+            <RandomForestMetaData[double, int]*><uintptr_t> self.rf_forest64
+        
+        if self.dtype == np.float32:
+            return SplitAvgUnusedEnum(get_tree_row_meta_info(tree_id, row_id, rf_forest))
+        else: 
+            return SplitAvgUnusedEnum(get_tree_row_meta_info(tree_id, row_id, rf_forest64))
+
     @cuml.internals.api_base_return_array(get_output_dtype=True)
     def _predict_model_on_cpu(self, X, convert_dtype) -> CumlArray:
         cdef uintptr_t X_ptr
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 85d25b1fde..42ea83ef08 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -39,9 +39,10 @@ from cuml.common.doc_utils import insert_into_docstring
 from pylibraft.common.handle import Handle
 from cuml.common import input_to_cuml_array
 
-from cuml.ensemble.randomforest_common import BaseRandomForestModel
+from cuml.ensemble.randomforest_common import BaseRandomForestModel, SplitAvgUnusedEnum
 from cuml.ensemble.randomforest_common import _obtain_fil_model
 from cuml.ensemble.randomforest_shared cimport *
+from cuml.internals.mem_type import MemoryType
 
 from cuml.fil.fil import TreeliteModel
 
@@ -70,6 +71,7 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
                   int,
                   float*,
                   RF_params,
+                  int*,
                   int) except +
 
     cdef void fit(handle_t& handle,
@@ -79,6 +81,7 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
                   int,
                   double*,
                   RF_params,
+                  int*,
                   int) except +
 
     cdef void predict(handle_t& handle,
@@ -286,9 +289,6 @@ class RandomForestRegressor(BaseRandomForestModel,
         as it will contain the remaining groups). Then minTreesPerGroupFold are grown with each
         entire fold of groups left out.
 
-    group_col_idx : int (default = -1)
-        The numeric index of the column to be used for group processing
-
     Notes
     -----
     **Known Limitations**\n
@@ -468,20 +468,24 @@ class RandomForestRegressor(BaseRandomForestModel,
         domain="cuml_python")
     @generate_docstring()
     @cuml.internals.api_base_return_any_skipall
-    def fit(self, X, y, convert_dtype=True):
+    def fit(self, X, y, groups=None, convert_dtype=True):
         """
         Perform Random Forest Regression on the input data
 
         """
-
-        X_m, y_m, max_feature_val = self._dataset_setup_for_fit(X, y,
+        X_m, y_m, groups_m, max_feature_val = self._dataset_setup_for_fit(X, y, groups,
                                                                 convert_dtype)
 
         # Reset the old tree data for new fit call
-        cdef uintptr_t X_ptr, y_ptr
+        cdef uintptr_t X_ptr, y_ptr, groups_ptr
         X_ptr = X_m.ptr
         y_ptr = y_m.ptr
 
+        if groups is not None:
+            groups_ptr = groups_m.ptr
+        else: 
+            groups_ptr = <uintptr_t>NULL
+
         cdef handle_t* handle_ =\
             <handle_t*><uintptr_t>self.handle.getHandle()
 
@@ -515,8 +519,8 @@ class RandomForestRegressor(BaseRandomForestModel,
                                   <int> self.n_streams,
                                   <int> self.max_batch_size,
                                   <int> self.minTreesPerGroupFold,
-                                  <int> self.foldGroupSize,
-                                  <int> self.group_col_idx)
+                                  <int> self.foldGroupSize)
+        n_trees = rf_params.n_trees
 
         if self.dtype == np.float32:
             fit(handle_[0],
@@ -526,6 +530,7 @@ class RandomForestRegressor(BaseRandomForestModel,
                 <int> self.n_cols,
                 <float*> y_ptr,
                 rf_params,
+                <int*> groups_ptr,
                 <int> self.verbose)
 
         else:
@@ -537,14 +542,30 @@ class RandomForestRegressor(BaseRandomForestModel,
                 <int> self.n_cols,
                 <double*> y_ptr,
                 rf_params64,
+                <int*> groups_ptr,
                 <int> self.verbose)
+
+        cdef uintptr_t meta_ptr
+
         # make sure that the `fit` is complete before the following delete
         # call happens
         self.handle.sync()
         del X_m
         del y_m
+        del groups_m
         return self
 
+    def get_tree_sample_honesty_group_meta(self, tree_id, row_id):
+        cdef RandomForestMetaData[float, int] *rf_forest = \
+            <RandomForestMetaData[float, int]*><uintptr_t> self.rf_forest
+        cdef RandomForestMetaData[double, int] *rf_forest64 = \
+            <RandomForestMetaData[double, int]*><uintptr_t> self.rf_forest64
+        
+        if self.dtype == np.float32:
+            return SplitAvgUnusedEnum(get_tree_row_meta_info(tree_id, row_id, rf_forest))
+        else: 
+            return SplitAvgUnusedEnum(get_tree_row_meta_info(tree_id, row_id, rf_forest64))
+
     def _predict_model_on_cpu(self, X, convert_dtype) -> CumlArray:
         cdef uintptr_t X_ptr
         X_m, n_rows, n_cols, dtype = \