Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions cpp/bench/sg/fil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class FIL : public RegressionFixture<float> {
ML::RandomForestRegressorF rf_model;
auto* mPtr = &rf_model;
size_t train_nrows = std::min(params.nrows, 1000);
fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf);
fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf, nullptr);
handle->sync_stream(stream);

ML::build_treelite_forest(&model, &rf_model, params.ncols);
Expand Down Expand Up @@ -166,8 +166,7 @@ std::vector<Params> getInputs()
8, /* n_streams */
128 /* max_batch_size */,
0, /* minTreesPerGroupFold */
0, /* foldGroupSize */
-1 /* group_col_idx */
0 /* foldGroupSize */
);

using ML::fil::algo_t;
Expand Down
5 changes: 2 additions & 3 deletions cpp/bench/sg/filex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class FILEX : public RegressionFixture<float> {
ML::RandomForestRegressorF rf_model;
auto* mPtr = &rf_model;
auto train_nrows = std::min(params.nrows, 1000);
fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf);
fit(*handle, mPtr, data.X.data(), train_nrows, params.ncols, data.y.data(), p_rest.rf, nullptr);
handle->sync_stream(stream);

ML::build_treelite_forest(&model, &rf_model, params.ncols);
Expand Down Expand Up @@ -266,8 +266,7 @@ std::vector<Params> getInputs()
8, /* n_streams */
128, /* max_batch_size */
0, /* minTreesPerGroupFold */
0, /* foldGroupSize */
-1 /* group_col_idx */
0 /* foldGroupSize */
);

using ML::fil::algo_t;
Expand Down
6 changes: 3 additions & 3 deletions cpp/bench/sg/rf_classifier.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ class RFClassifier : public BlobsFixture<D> {
this->params.ncols,
this->data.y.data(),
this->params.nclasses,
rfParams);
rfParams,
nullptr /* groups */);
this->handle->sync_stream(this->stream);
});
}
Expand Down Expand Up @@ -110,8 +111,7 @@ std::vector<Params> getInputs()
8, /* n_streams */
128, /* max_batch_size */
0, /* minTreesPerGroupFold */
0, /* foldGroupSize */
-1 /* group_col_idx */
0 /* foldGroupSize */
);

std::vector<Triplets> rowcols = {
Expand Down
20 changes: 12 additions & 8 deletions cpp/include/cuml/ensemble/randomforest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,6 @@ struct RF_params {
* entire fold of groups left out.
*/
int foldGroupSize;

/**
* group_col_idx
* The numeric index of the column to be used for group processing
*/
int group_col_idx;

/**
* Decision tree training hyper parameter struct.
Expand Down Expand Up @@ -196,6 +190,12 @@ void build_treelite_forest(ModelHandle* model,
const RandomForestMetaData<T, L>* forest,
int num_features);

template <class T, class L>
int get_tree_row_meta_info(
int ix_tree,
int ix_sample,
const RandomForestMetaData<T,L>* forest);

ModelHandle concatenate_trees(std::vector<ModelHandle> treelite_handles);

void compare_concat_forest_to_subforests(ModelHandle concat_tree_handle,
Expand All @@ -213,6 +213,7 @@ void fit(const raft::handle_t& user_handle,
int* labels,
int n_unique_labels,
RF_params rf_params,
int* groups = nullptr,
int verbosity = CUML_LEVEL_INFO);
void fit(const raft::handle_t& user_handle,
RandomForestClassifierD*& forest,
Expand All @@ -222,6 +223,7 @@ void fit(const raft::handle_t& user_handle,
int* labels,
int n_unique_labels,
RF_params rf_params,
int* groups = nullptr,
int verbosity = CUML_LEVEL_INFO);

void predict(const raft::handle_t& user_handle,
Expand Down Expand Up @@ -271,8 +273,7 @@ RF_params set_rf_params(int max_depth,
int cfg_n_streams,
int max_batch_size,
int minTreesPerGroupFold,
int foldGroupSize,
int group_col_idx);
int foldGroupSize);

// ----------------------------- Regression ----------------------------------- //

Expand All @@ -286,14 +287,17 @@ void fit(const raft::handle_t& user_handle,
int n_cols,
float* labels,
RF_params rf_params,
int* groups = nullptr,
int verbosity = CUML_LEVEL_INFO);

void fit(const raft::handle_t& user_handle,
RandomForestRegressorD*& forest,
double* input,
int n_rows,
int n_cols,
double* labels,
RF_params rf_params,
int* groups = nullptr,
int verbosity = CUML_LEVEL_INFO);

void predict(const raft::handle_t& user_handle,
Expand Down
3 changes: 3 additions & 0 deletions cpp/include/cuml/tree/decisiontree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ void set_tree_params(DecisionTreeParams& params,
int cfg_max_batch_size = 4096,
bool cfg_oob_honesty = false);

enum SplitAvgUnusedEnum {unused = 0, split, avg, group_split_unselected, group_avg_unselected, invalid};

template <class T, class L>
struct TreeMetaDataNode {
int treeid;
Expand All @@ -125,6 +127,7 @@ struct TreeMetaDataNode {
double train_time;
std::vector<T> vector_leaf;
std::vector<SparseTreeNode<T, L>> sparsetree;
std::vector<SplitAvgUnusedEnum> split_avg_enums;
int num_outputs;
};

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/decisiontree/batched-levelalgo/builder.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class NodeQueue {

int nTrain = int(instance.count - instance.avg_count);
if (nTrain < params.min_samples_split_splitting) return false;
if (params.oob_honesty and instance.avg_count < params.min_samples_split_averaging) return false;
if (params.oob_honesty and static_cast<int>(instance.avg_count) < params.min_samples_split_averaging) return false;
if (params.max_leaves != -1 && tree->leaf_counter >= params.max_leaves) return false;
return true;
}
Expand Down
32 changes: 25 additions & 7 deletions cpp/src/randomforest/randomforest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ void fit(const raft::handle_t& user_handle,
int* labels,
int n_unique_labels,
RF_params rf_params,
int* groups,
int verbosity)
{
raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
Expand All @@ -459,7 +460,18 @@ void fit(const raft::handle_t& user_handle,

std::shared_ptr<RandomForest<float, int>> rf_classifier =
std::make_shared<RandomForest<float, int>>(rf_params, RF_type::CLASSIFICATION);
rf_classifier->fit(user_handle, input, n_rows, n_cols, labels, n_unique_labels, forest);
rf_classifier->fit(user_handle, input, groups, n_rows, n_cols, labels, n_unique_labels, forest);
}

template<class T, class L>
int get_tree_row_meta_info(
int ix_tree,
int ix_sample,
const RandomForestMetaData<T,L>* forest)
{
if (ix_tree >= forest->trees.size()) return SplitAvgUnusedEnum::invalid;
if (ix_sample >= forest->trees[ix_tree]->split_avg_enums.size()) return SplitAvgUnusedEnum::invalid;
return forest->trees[ix_tree]->split_avg_enums[ix_sample];
}

void fit(const raft::handle_t& user_handle,
Expand All @@ -470,6 +482,7 @@ void fit(const raft::handle_t& user_handle,
int* labels,
int n_unique_labels,
RF_params rf_params,
int* groups,
int verbosity)
{
raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
Expand All @@ -480,7 +493,7 @@ void fit(const raft::handle_t& user_handle,

std::shared_ptr<RandomForest<double, int>> rf_classifier =
std::make_shared<RandomForest<double, int>>(rf_params, RF_type::CLASSIFICATION);
rf_classifier->fit(user_handle, input, n_rows, n_cols, labels, n_unique_labels, forest);
rf_classifier->fit(user_handle, input, groups, n_rows, n_cols, labels, n_unique_labels, forest);
}
/** @} */

Expand Down Expand Up @@ -596,8 +609,7 @@ RF_params set_rf_params(int max_depth,
int cfg_n_streams,
int max_batch_size,
int minTreesPerGroupFold,
int foldGroupSize,
int group_col_idx)
int foldGroupSize)
{
DT::DecisionTreeParams tree_params;
DT::set_tree_params(tree_params,
Expand Down Expand Up @@ -625,7 +637,6 @@ RF_params set_rf_params(int max_depth,
rf_params.tree_params = tree_params;
rf_params.minTreesPerGroupFold = minTreesPerGroupFold;
rf_params.foldGroupSize = foldGroupSize;
rf_params.group_col_idx = group_col_idx;
validity_check(rf_params);
return rf_params;
}
Expand Down Expand Up @@ -654,6 +665,7 @@ void fit(const raft::handle_t& user_handle,
int n_cols,
float* labels,
RF_params rf_params,
int* groups,
int verbosity)
{
raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
Expand All @@ -664,7 +676,7 @@ void fit(const raft::handle_t& user_handle,

std::shared_ptr<RandomForest<float, float>> rf_regressor =
std::make_shared<RandomForest<float, float>>(rf_params, RF_type::REGRESSION);
rf_regressor->fit(user_handle, input, n_rows, n_cols, labels, 1, forest);
rf_regressor->fit(user_handle, input, groups, n_rows, n_cols, labels, 1, forest);
}

void fit(const raft::handle_t& user_handle,
Expand All @@ -674,6 +686,7 @@ void fit(const raft::handle_t& user_handle,
int n_cols,
double* labels,
RF_params rf_params,
int* groups,
int verbosity)
{
raft::common::nvtx::range fun_scope("RF::fit @randomforest.cu");
Expand All @@ -684,7 +697,7 @@ void fit(const raft::handle_t& user_handle,

std::shared_ptr<RandomForest<double, double>> rf_regressor =
std::make_shared<RandomForest<double, double>>(rf_params, RF_type::REGRESSION);
rf_regressor->fit(user_handle, input, n_rows, n_cols, labels, 1, forest);
rf_regressor->fit(user_handle, input, groups, n_rows, n_cols, labels, 1, forest);
}
/** @} */

Expand Down Expand Up @@ -776,6 +789,11 @@ template std::string get_rf_summary_text<double, int>(const RandomForestClassifi
template std::string get_rf_summary_text<float, float>(const RandomForestRegressorF* forest);
template std::string get_rf_summary_text<double, double>(const RandomForestRegressorD* forest);

template int get_tree_row_meta_info<float, int>(int, int, const RandomForestClassifierF* forest);
template int get_tree_row_meta_info<double, int>(int, int, const RandomForestClassifierD* forest);
template int get_tree_row_meta_info<float, float>(int, int, const RandomForestRegressorF* forest);
template int get_tree_row_meta_info<double, double>(int, int, const RandomForestRegressorD* forest);

template std::string get_rf_detailed_text<float, int>(const RandomForestClassifierF* forest);
template std::string get_rf_detailed_text<double, int>(const RandomForestClassifierD* forest);
template std::string get_rf_detailed_text<float, float>(const RandomForestRegressorF* forest);
Expand Down
Loading