From aacd4fd1d4f61d0fefca26a6c18847f4acbe849a Mon Sep 17 00:00:00 2001 From: "Marvin N. Wright" Date: Tue, 20 Aug 2024 10:33:54 +0200 Subject: [PATCH] skip values for sampling in ascending order; fix bug --- DESCRIPTION | 6 +++--- NEWS.md | 3 +++ cpp_version/src/version.h | 2 +- src/Forest.cpp | 4 ++-- src/utility.cpp | 4 ++-- src/utility.h | 4 +++- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index edb4ca13..868565a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ranger Type: Package Title: A Fast Implementation of Random Forests -Version: 0.16.2 -Date: 2024-05-16 +Version: 0.16.3 +Date: 2024-08-20 Author: Marvin N. Wright [aut, cre], Stefan Wager [ctb], Philipp Probst [ctb] Maintainer: Marvin N. Wright Description: A fast implementation of Random Forests, particularly suited for high @@ -19,7 +19,7 @@ Suggests: survival, testthat Encoding: UTF-8 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 URL: https://imbs-hl.github.io/ranger/, https://github.com/imbs-hl/ranger BugReports: https://github.com/imbs-hl/ranger/issues diff --git a/NEWS.md b/NEWS.md index 708b2316..85ece7f1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ +# ranger 0.16.3 +* Fix a bug for always.split.variables (for some settings) + # ranger 0.16.2 * Add Poisson splitting rule for regression trees diff --git a/cpp_version/src/version.h b/cpp_version/src/version.h index 673de5f2..4aeaffdf 100644 --- a/cpp_version/src/version.h +++ b/cpp_version/src/version.h @@ -1,3 +1,3 @@ #ifndef RANGER_VERSION -#define RANGER_VERSION "0.16.2" +#define RANGER_VERSION "0.16.3" #endif diff --git a/src/Forest.cpp b/src/Forest.cpp index f4999015..95edd6f7 100644 --- a/src/Forest.cpp +++ b/src/Forest.cpp @@ -953,8 +953,8 @@ void Forest::setAlwaysSplitVariables(const std::vector& always_spli } } - // Sort in reverse order for removing with erase later - std::sort(deterministic_varIDs.rbegin(), deterministic_varIDs.rend()); + // Sort for removing later + std::sort(deterministic_varIDs.begin(), deterministic_varIDs.end()); } void Forest::showProgress(std::string operation, size_t max_progress) { diff --git a/src/utility.cpp b/src/utility.cpp index c3450122..14138a5b 100644 --- a/src/utility.cpp +++ b/src/utility.cpp @@ -171,8 +171,8 @@ void drawWithoutReplacementFisherYates(std::vector& result, std::mt19937 std::iota(result.begin(), result.end(), 0); // Skip indices - for (size_t i = 0; i < skip.size(); ++i) { - result.erase(result.begin() + skip[i]); + for (auto it = skip.rbegin(); it != skip.rend(); it++) { + result.erase(result.begin() + *it); } // Draw without replacement using Fisher Yates algorithm diff --git a/src/utility.h b/src/utility.h index 24b2b263..ae72842f 100644 --- a/src/utility.h +++ b/src/utility.h @@ -181,7 +181,8 @@ void drawWithoutReplacementSimple(std::vector& result, std::mt19937_64& size_t num_samples); /** - * Simple algorithm for sampling without replacement (skip values), faster for smaller num_samples + * Simple algorithm for sampling without replacement (skip values), faster for smaller num_samples. + * skip values are expected to be sorted in ascending order. * @param result Vector to add results to. Will not be cleaned before filling. * @param random_number_generator Random number generator * @param range_length Length of range. Interval to draw from: 0..max-1 @@ -203,6 +204,7 @@ void drawWithoutReplacementFisherYates(std::vector& result, std::mt19937 /** * Fisher Yates algorithm for sampling without replacement (skip values). + * skip values are expected to be sorted in ascending order. * @param result Vector to add results to. Will not be cleaned before filling. * @param random_number_generator Random number generator * @param max Length of range. Interval to draw from: 0..max-1