Skip to content

Commit

Permalink
min bucket for Poisson splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
mnwright committed May 16, 2024
1 parent 4ac8f57 commit ed2b73d
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions src/TreeRegression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,10 +1001,14 @@ bool TreeRegression::findBestSplitPoisson(size_t nodeID, std::vector<size_t>& po
// Compute sum of responses in node
double sum_node = sumNodeResponse(nodeID);

// For all possible split variables find best split value
for (auto& varID : possible_split_varIDs) {
findBestSplitValuePoissonSmallQ(nodeID, varID, sum_node, num_samples_node, best_value, best_varID,
best_decrease);
// Stop early if no split posssible
if (num_samples_node >= 2 * (*min_bucket)[0]) {

// For all possible split variables find best split value
for (auto& varID : possible_split_varIDs) {
findBestSplitValuePoissonSmallQ(nodeID, varID, sum_node, num_samples_node, best_value, best_varID,
best_decrease);
}
}

// Stop if no good split found
Expand Down Expand Up @@ -1088,6 +1092,11 @@ void TreeRegression::findBestSplitValuePoissonSmallQ(size_t nodeID, size_t varID
break;
}

// Stop if minimal bucket size reached
if (n_left < (*min_bucket)[0] || n_right < (*min_bucket)[0]) {
continue;
}

// Compute mean
double sum_right = sum_node - sum_left;
double mean_right = sum_right / (double) n_right;
Expand Down

0 comments on commit ed2b73d

Please sign in to comment.