From 8a71b9831cc2d5701f5dbbe0229a5c677b032e6f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 9 Sep 2024 12:15:18 -0700 Subject: [PATCH 01/28] Add support for Python 3.12 (#2428) Contributes to https://github.com/rapidsai/build-planning/issues/40 This PR adds support for Python 3.12. ## Notes for Reviewers This is part of ongoing work to add Python 3.12 support across RAPIDS. It temporarily introduces a build/test matrix including Python 3.12, from https://github.com/rapidsai/shared-workflows/pull/213. A follow-up PR will revert back to pointing at the `branch-24.10` branch of `shared-workflows` once all RAPIDS repos have added Python 3.12 support. ### This will fail until all dependencies have been updates to Python 3.12 CI here is expected to fail until all of this project's upstream dependencies support Python 3.12. This can be merged whenever all CI jobs are passing. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2428 --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 26 +++++++++++++------------- .github/workflows/test.yaml | 10 +++++----- dependencies.yaml | 6 +++++- python/pylibraft/pyproject.toml | 1 + python/raft-ann-bench/pyproject.toml | 1 + python/raft-dask/pyproject.toml | 1 + 7 files changed, 34 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2b0ae5099c..037d7cc05f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: package-name: pylibraft wheel-build-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -98,7 +98,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 381ca6b378..26c6928f77 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.12 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@python-3.12 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ad0456d526..90518c1132 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/dependencies.yaml b/dependencies.yaml index e4e361548f..8f5c69245f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -463,8 +463,12 @@ dependencies: packages: - python=3.11 - matrix: + py: "3.12" packages: - - python>=3.10,<3.12 + - python=3.12 + - matrix: + packages: + - python>=3.10,<3.13 run_pylibraft: common: - output_types: [conda, pyproject] diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index 14f2ba7d2f..a540915585 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -44,6 +44,7 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.optional-dependencies] diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml index fa5781893b..0e4fda1f00 100644 --- a/python/raft-ann-bench/pyproject.toml +++ b/python/raft-ann-bench/pyproject.toml @@ -27,6 +27,7 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.urls] diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 44012b5f10..d1f577120f 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -44,6 +44,7 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.optional-dependencies] From ac53a0fdc35fe36a11a3ac4debd4cc9a4076fe7f Mon Sep 17 00:00:00 2001 From: Micka Date: Tue, 10 Sep 2024 17:20:52 +0200 Subject: [PATCH 02/28] [BUG] Fix bitset function visibility (#2429) `raft::ceildiv` is also being replaced with `raft::div_rounding_up_safe` to avoid including CUDA headers when not needed. Authors: - Micka (https://github.com/lowener) Approvers: - rhdong (https://github.com/rhdong) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2429 --- cpp/include/raft/core/bitmap.cuh | 6 +++--- cpp/include/raft/core/bitset.cuh | 24 ++++++------------------ cpp/include/raft/core/bitset.hpp | 11 +++++++++-- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/cpp/include/raft/core/bitmap.cuh b/cpp/include/raft/core/bitmap.cuh index cafd1977ab..024b1244a6 100644 --- a/cpp/include/raft/core/bitmap.cuh +++ b/cpp/include/raft/core/bitmap.cuh @@ -35,9 +35,9 @@ _RAFT_HOST_DEVICE inline bool bitmap_view::test(const index_t } template -_RAFT_HOST_DEVICE void bitmap_view::set(const index_t row, - const index_t col, - bool new_value) const +_RAFT_DEVICE void bitmap_view::set(const index_t row, + const index_t col, + bool new_value) const { set(row * cols_ + col, new_value); } diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh index 0cdb4c1fb6..b6e6128eca 100644 --- a/cpp/include/raft/core/bitset.cuh +++ b/cpp/include/raft/core/bitset.cuh @@ -46,8 +46,8 @@ _RAFT_HOST_DEVICE bool bitset_view::operator[](const index_t } template -_RAFT_HOST_DEVICE void bitset_view::set(const index_t sample_index, - bool set_value) const +_RAFT_DEVICE void bitset_view::set(const index_t sample_index, + bool set_value) const { const index_t bit_element = sample_index / bitset_element_size; const index_t bit_index = sample_index % bitset_element_size; @@ -60,18 +60,12 @@ _RAFT_HOST_DEVICE void bitset_view::set(const index_t sample_ } } -template -_RAFT_HOST_DEVICE inline index_t bitset_view::n_elements() const -{ - return raft::ceildiv(bitset_len_, bitset_element_size); -} - template bitset::bitset(const raft::resources& res, raft::device_vector_view mask_index, index_t bitset_len, bool default_value) - : bitset_{std::size_t(raft::ceildiv(bitset_len, bitset_element_size)), + : bitset_{std::size_t(raft::div_rounding_up_safe(bitset_len, bitset_element_size)), raft::resource::get_cuda_stream(res)}, bitset_len_{bitset_len} { @@ -83,26 +77,20 @@ template bitset::bitset(const raft::resources& res, index_t bitset_len, bool default_value) - : bitset_{std::size_t(raft::ceildiv(bitset_len, bitset_element_size)), + : bitset_{std::size_t(raft::div_rounding_up_safe(bitset_len, bitset_element_size)), raft::resource::get_cuda_stream(res)}, bitset_len_{bitset_len} { reset(res, default_value); } -template -index_t bitset::n_elements() const -{ - return raft::ceildiv(bitset_len_, bitset_element_size); -} - template void bitset::resize(const raft::resources& res, index_t new_bitset_len, bool default_value) { - auto old_size = raft::ceildiv(bitset_len_, bitset_element_size); - auto new_size = raft::ceildiv(new_bitset_len, bitset_element_size); + auto old_size = raft::div_rounding_up_safe(bitset_len_, bitset_element_size); + auto new_size = raft::div_rounding_up_safe(new_bitset_len, bitset_element_size); bitset_.resize(new_size); bitset_len_ = new_bitset_len; if (old_size < new_size) { diff --git a/cpp/include/raft/core/bitset.hpp b/cpp/include/raft/core/bitset.hpp index 0df12f25e6..3608ee43fa 100644 --- a/cpp/include/raft/core/bitset.hpp +++ b/cpp/include/raft/core/bitset.hpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace raft::core { /** @@ -89,7 +90,10 @@ struct bitset_view { /** * @brief Get the number of elements used by the bitset representation. */ - inline _RAFT_HOST_DEVICE auto n_elements() const -> index_t; + inline _RAFT_HOST_DEVICE auto n_elements() const -> index_t + { + return raft::div_rounding_up_safe(bitset_len_, bitset_element_size); + } inline auto to_mdspan() -> raft::device_vector_view { @@ -173,7 +177,10 @@ struct bitset { /** * @brief Get the number of elements used by the bitset representation. */ - inline auto n_elements() const -> index_t; + inline auto n_elements() const -> index_t + { + return raft::div_rounding_up_safe(bitset_len_, bitset_element_size); + } /** @brief Get an mdspan view of the current bitset */ inline auto to_mdspan() -> raft::device_vector_view From 20718412dfa2026b94de2de2e822b73fe26a3bbc Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 17 Sep 2024 11:56:06 -0700 Subject: [PATCH 03/28] Allow coo_sort to work on int64_t indices (#2432) Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/2432 --- cpp/include/raft/sparse/op/detail/sort.h | 10 +++++----- cpp/include/raft/sparse/op/sort.cuh | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/sparse/op/detail/sort.h b/cpp/include/raft/sparse/op/detail/sort.h index 85ae825035..02287c2367 100644 --- a/cpp/include/raft/sparse/op/detail/sort.h +++ b/cpp/include/raft/sparse/op/detail/sort.h @@ -68,8 +68,8 @@ struct TupleComp { * @param vals vals array from coo matrix * @param stream: cuda stream to use */ -template -void coo_sort(int m, int n, int nnz, int* rows, int* cols, T* vals, cudaStream_t stream) +template +void coo_sort(IdxT m, IdxT n, IdxT nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) { auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); @@ -83,10 +83,10 @@ void coo_sort(int m, int n, int nnz, int* rows, int* cols, T* vals, cudaStream_t * @param in: COO to sort by row * @param stream: the cuda stream to use */ -template -void coo_sort(COO* const in, cudaStream_t stream) +template +void coo_sort(COO* const in, cudaStream_t stream) { - coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); + coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); } /** diff --git a/cpp/include/raft/sparse/op/sort.cuh b/cpp/include/raft/sparse/op/sort.cuh index c6c3c2e220..5b8a792429 100644 --- a/cpp/include/raft/sparse/op/sort.cuh +++ b/cpp/include/raft/sparse/op/sort.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ namespace op { * @param vals vals array from coo matrix * @param stream: cuda stream to use */ -template -void coo_sort(int m, int n, int nnz, int* rows, int* cols, T* vals, cudaStream_t stream) +template +void coo_sort(IdxT m, IdxT n, IdxT nnz, IdxT* rows, IdxT* cols, T* vals, cudaStream_t stream) { detail::coo_sort(m, n, nnz, rows, cols, vals, stream); } @@ -49,10 +49,10 @@ void coo_sort(int m, int n, int nnz, int* rows, int* cols, T* vals, cudaStream_t * @param in: COO to sort by row * @param stream: the cuda stream to use */ -template -void coo_sort(COO* const in, cudaStream_t stream) +template +void coo_sort(COO* const in, cudaStream_t stream) { - coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); + coo_sort(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(), in->vals(), stream); } /** @@ -75,4 +75,4 @@ void coo_sort_by_weight( }; // end NAMESPACE sparse }; // end NAMESPACE raft -#endif \ No newline at end of file +#endif From e60cd7d063c3ab7830959d5aef465461f2daf99f Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 18 Sep 2024 03:05:44 -0500 Subject: [PATCH 04/28] Update to flake8 7.1.1. (#2435) We need to update flake8 to fix a false-positive that appears with older flake8 versions on Python 3.12. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2435 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88be628e55..458d8b1b51 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # Explicitly specify the pyproject.toml at the repo root, not per-project. args: ["--config", "pyproject.toml"] - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 7.1.1 hooks: - id: flake8 args: ["--config=.flake8"] From c5dcba0333baa4daf41292a8901044442c8d79c8 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 18 Sep 2024 14:43:38 -0500 Subject: [PATCH 05/28] Use CI workflow branch 'branch-24.10' again [skip ci] (#2437) --- .github/workflows/build.yaml | 16 ++++++++-------- .github/workflows/pr.yaml | 26 +++++++++++++------------- .github/workflows/test.yaml | 10 +++++----- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 037d7cc05f..2b0ae5099c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: package-name: pylibraft wheel-build-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -98,7 +98,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 26c6928f77..381ca6b378 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 90518c1132..ad0456d526 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} From 12537c57aadc7436962fbf3109444cb7d50b4103 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Thu, 19 Sep 2024 12:04:39 -0400 Subject: [PATCH 06/28] DOC v24.12 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 6 ++-- .devcontainer/cuda11.8-pip/devcontainer.json | 8 ++--- .../cuda12.5-conda/devcontainer.json | 6 ++-- .devcontainer/cuda12.5-pip/devcontainer.json | 8 ++--- .github/workflows/build.yaml | 16 +++++----- .github/workflows/pr.yaml | 26 ++++++++-------- .github/workflows/test.yaml | 10 +++---- README.md | 2 +- VERSION | 2 +- .../all_cuda-118_arch-aarch64.yaml | 14 ++++----- .../all_cuda-118_arch-x86_64.yaml | 14 ++++----- .../all_cuda-125_arch-aarch64.yaml | 14 ++++----- .../all_cuda-125_arch-x86_64.yaml | 14 ++++----- .../bench_ann_cuda-118_arch-aarch64.yaml | 4 +-- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +-- .../bench_ann_cuda-120_arch-aarch64.yaml | 4 +-- .../bench_ann_cuda-120_arch-x86_64.yaml | 4 +-- .../recipes/raft-dask/conda_build_config.yaml | 4 +-- .../cmake/thirdparty/fetch_rapids.cmake | 2 +- dependencies.yaml | 30 +++++++++---------- docs/source/build.md | 2 +- docs/source/developer_guide.md | 6 ++-- docs/source/raft_ann_benchmarks.md | 12 ++++---- python/pylibraft/pyproject.toml | 4 +-- .../raft-dask/cmake/thirdparty/get_ucxx.cmake | 4 +-- python/raft-dask/pyproject.toml | 10 +++---- 26 files changed, 115 insertions(+), 115 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index a2b12764c5..008bf8730a 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index f319536b18..75aed80f9f 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.10": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index adc79408a3..240ba02131 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 26b9a56e48..c23c79017a 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.10": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { "version": "12.5", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2b0ae5099c..db379c9d47 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: package-name: pylibraft wheel-build-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -98,7 +98,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 381ca6b378..844f9f9441 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ad0456d526..2bee8a3d1d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 25ce059630..2db45cf749 100755 --- a/README.md +++ b/README.md @@ -293,7 +293,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.5 ``` -If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.10/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.12/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ### Installing Python through Pip diff --git a/VERSION b/VERSION index 7c7ba04436..af28c42b52 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.10.00 +24.12.00 diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 462874a7e7..12ce2afcb6 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -20,8 +20,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - graphviz @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - nccl>=2.9.9 - ninja - numba>=0.57 @@ -44,18 +44,18 @@ dependencies: - nvcc_linux-aarch64=11.8 - pre-commit - pydata-sphinx-theme -- pylibraft==24.10.*,>=0.0.0a0 +- pylibraft==24.12.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 -- ucx-py==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index cfd974a6a8..0f178a648d 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,8 +20,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-64=11.* - graphviz @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - nccl>=2.9.9 - ninja - numba>=0.57 @@ -44,18 +44,18 @@ dependencies: - nvcc_linux-64=11.8 - pre-commit - pydata-sphinx-theme -- pylibraft==24.10.*,>=0.0.0a0 +- pylibraft==24.12.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-64==2.17 -- ucx-py==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 82e391e9ae..8e985df433 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -21,8 +21,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - graphviz @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - nccl>=2.9.9 - ninja - numba>=0.57 @@ -40,18 +40,18 @@ dependencies: - numpydoc - pre-commit - pydata-sphinx-theme -- pylibraft==24.10.*,>=0.0.0a0 +- pylibraft==24.12.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 -- ucx-py==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 0389427d13..9bf6d83f1f 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -21,8 +21,8 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 - doxygen>=1.8.20 - gcc_linux-64=11.* - graphviz @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - nccl>=2.9.9 - ninja - numba>=0.57 @@ -40,18 +40,18 @@ dependencies: - numpydoc - pre-commit - pydata-sphinx-theme -- pylibraft==24.10.*,>=0.0.0a0 +- pylibraft==24.12.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - recommonmark -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn - scipy - sphinx-copybutton - sphinx-markdown-tables - sysroot_linux-64==2.17 -- ucx-py==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index eff1c56840..b182d5ae1a 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -30,7 +30,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -40,7 +40,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 87b19d2952..92f61150b7 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -30,7 +30,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -40,7 +40,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index ff3451c15c..207680b788 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -27,7 +27,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -36,7 +36,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-120_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index 085e099ae8..510d82eb60 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -27,7 +27,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libucxx==0.40.*,>=0.0.0a0 +- libucxx==0.41.*,>=0.0.0a0 - matplotlib - nccl>=2.9.9 - ninja @@ -36,7 +36,7 @@ dependencies: - pandas - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rmm==24.10.*,>=0.0.0a0 +- rmm==24.12.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-120_arch-x86_64 diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index e6afed2890..ffff76e378 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -17,10 +17,10 @@ c_stdlib_version: - "2.17" ucx_py_version: - - "0.40.*" + - "0.41.*" ucxx_version: - - "0.40.*" + - "0.41.*" cmake_version: - ">=3.26.4,!=3.30.0" diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake index f64a924cf5..6f4c627ed4 100644 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.10") +set(RAPIDS_VERSION "24.12") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/dependencies.yaml b/dependencies.yaml index 8f5c69245f..857532237e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -171,7 +171,7 @@ dependencies: - c-compiler - cxx-compiler - nccl>=2.9.9 - - libucxx==0.40.*,>=0.0.0a0 + - libucxx==0.41.*,>=0.0.0a0 specific: - output_types: conda matrices: @@ -210,7 +210,7 @@ dependencies: common: - output_types: [conda] packages: - - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -237,12 +237,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - &rmm_cu12 rmm-cu12==24.10.*,>=0.0.0a0 + - &rmm_cu12 rmm-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - &rmm_cu11 rmm-cu11==24.10.*,>=0.0.0a0 + - &rmm_cu11 rmm-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*rmm_unsuffixed] } checks: common: @@ -510,14 +510,14 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask-cuda==24.10.*,>=0.0.0a0 + - dask-cuda==24.12.*,>=0.0.0a0 - joblib>=0.11 - numba>=0.57 - - rapids-dask-dependency==24.10.*,>=0.0.0a0 + - rapids-dask-dependency==24.12.*,>=0.0.0a0 - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==24.10.*,>=0.0.0a0 - - &ucx_py_unsuffixed ucx-py==0.40.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -531,14 +531,14 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - &pylibraft_cu12 pylibraft-cu12==24.10.*,>=0.0.0a0 - - &ucx_py_cu12 ucx-py-cu12==0.40.*,>=0.0.0a0 + - &pylibraft_cu12 pylibraft-cu12==24.12.*,>=0.0.0a0 + - &ucx_py_cu12 ucx-py-cu12==0.41.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - &pylibraft_cu11 pylibraft-cu11==24.10.*,>=0.0.0a0 - - &ucx_py_cu11 ucx-py-cu11==0.40.*,>=0.0.0a0 + - &pylibraft_cu11 pylibraft-cu11==24.12.*,>=0.0.0a0 + - &ucx_py_cu11 ucx-py-cu11==0.41.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed, *ucx_py_unsuffixed]} test_python_common: common: @@ -558,7 +558,7 @@ dependencies: packages: # UCXX is not currently a hard-dependency thus only installed during tests, # this will change in the future. - - &distributed_ucxx_unsuffixed distributed-ucxx==0.40.*,>=0.0.0a0 + - &distributed_ucxx_unsuffixed distributed-ucxx==0.41.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -571,12 +571,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - distributed-ucxx-cu12==0.40.*,>=0.0.0a0 + - distributed-ucxx-cu12==0.41.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - distributed-ucxx-cu11==0.40.*,>=0.0.0a0 + - distributed-ucxx-cu11==0.41.*,>=0.0.0a0 - {matrix: null, packages: [*distributed_ucxx_unsuffixed]} depends_on_ucx_build: common: diff --git a/docs/source/build.md b/docs/source/build.md index 4ba087e68d..b9a1832b02 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.10/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.12/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ## Installing Python through Pip diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index 516819b1c1..c4a099fabb 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.10/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.10/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -230,7 +230,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.10/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index fc11a56ac8..faea9189c6 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -62,7 +62,7 @@ Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft - The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10: ```bash -docker pull rapidsai/raft-ann-bench:24.10a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. +docker pull rapidsai/raft-ann-bench:24.12a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. ``` The CUDA and python versions can be changed for the supported values: @@ -83,7 +83,7 @@ You can see the exact versions as well in the dockerhub site: [//]: # () [//]: # (```bash) -[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.10-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) +[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.12-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) [//]: # (```) @@ -344,7 +344,7 @@ For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder whe export DATA_FOLDER=path/to/store/datasets/and/results docker run --gpus all --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.10a-cuda11.8-py3.10 \ + rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \ @@ -355,7 +355,7 @@ Usage of the above command is as follows: | Argument | Description | |-----------------------------------------------------------|----------------------------------------------------------------------------------------------------| -| `rapidsai/raft-ann-bench:24.10a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | +| `rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | | `"--dataset deep-image-96-angular"` | Dataset name | | `"--normalize"` | Whether to normalize the dataset | | `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` | @@ -372,7 +372,7 @@ The container arguments in the above section also be used for the CPU-only conta export DATA_FOLDER=path/to/store/datasets/and/results docker run --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench-cpu:24.10a-py3.10 \ + rapidsai/raft-ann-bench-cpu:24.12a-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms hnswlib --batch-size 10 -k 10" \ @@ -389,7 +389,7 @@ docker run --gpus all --rm -it -u $(id -u) \ --entrypoint /bin/bash \ --workdir /data/benchmarks \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.10a-cuda11.8-py3.10 + rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10 ``` This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above: diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index a540915585..f0f3849c6d 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", - "rmm==24.10.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -125,7 +125,7 @@ requires = [ "cuda-python", "cython>=3.0.0", "ninja", - "rmm==24.10.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" diff --git a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake index db6039393a..db9b5c6b4d 100644 --- a/python/raft-dask/cmake/thirdparty/get_ucxx.cmake +++ b/python/raft-dask/cmake/thirdparty/get_ucxx.cmake @@ -47,9 +47,9 @@ endfunction() # Change pinned tag here to test a commit in CI # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft -find_and_configure_ucxx(VERSION 0.40 +find_and_configure_ucxx(VERSION 0.41 FORK rapidsai - PINNED_TAG branch-0.40 + PINNED_TAG branch-0.41 EXCLUDE_FROM_ALL YES UCXX_STATIC ${RAFT_DASK_UCXX_STATIC} ) diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index d1f577120f..d71f89085b 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -31,13 +31,13 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "dask-cuda==24.10.*,>=0.0.0a0", - "distributed-ucxx==0.40.*,>=0.0.0a0", + "dask-cuda==24.12.*,>=0.0.0a0", + "distributed-ucxx==0.41.*,>=0.0.0a0", "joblib>=0.11", "numba>=0.57", - "pylibraft==24.10.*,>=0.0.0a0", - "rapids-dask-dependency==24.10.*,>=0.0.0a0", - "ucx-py==0.40.*,>=0.0.0a0", + "pylibraft==24.12.*,>=0.0.0a0", + "rapids-dask-dependency==24.12.*,>=0.0.0a0", + "ucx-py==0.41.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From cd02a8f2b8dc22ec2115e80e559a4ab98d402c6e Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:05:44 -0400 Subject: [PATCH 07/28] Fix sed syntax [skip-ci] (#2441) --- ci/release/update-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 075eb896f2..5bb98511cf 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -58,7 +58,7 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done for DEP in "${UCXX_DEPENDENCIES[@]}"; do - sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*/,>=0.0.0a0/g" "${FILE}" + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done done for FILE in python/*/pyproject.toml; do From f49567e1fb9f722f8864bc84880745b9b776eb61 Mon Sep 17 00:00:00 2001 From: Micka Date: Sat, 21 Sep 2024 01:09:00 +0200 Subject: [PATCH 08/28] Use runtime check of cudart version for eig (#2430) Authors: - Micka (https://github.com/lowener) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2430 --- cpp/include/raft/linalg/detail/eig.cuh | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/linalg/detail/eig.cuh b/cpp/include/raft/linalg/detail/eig.cuh index ba7ed3dcdf..561187178c 100644 --- a/cpp/include/raft/linalg/detail/eig.cuh +++ b/cpp/include/raft/linalg/detail/eig.cuh @@ -95,16 +95,19 @@ void eigDC(raft::resources const& handle, return; #endif -#if CUDART_VERSION <= 12040 - // Use a new stream instead of `cudaStreamPerThread` to avoid cusolver bug # 4580093. + int cudart_version = 0; + RAFT_CUDA_TRY(cudaRuntimeGetVersion(&cudart_version)); + cudaStream_t stream_new; + cudaEvent_t sync_event = resource::detail::get_cuda_stream_sync_event(handle); rmm::cuda_stream stream_new_wrapper; - cudaStream_t stream_new = stream_new_wrapper.value(); - cudaEvent_t sync_event = resource::detail::get_cuda_stream_sync_event(handle); - RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_new, sync_event)); -#else - cudaStream_t stream_new = stream; -#endif + if (cudart_version < 12050) { + // Use a new stream instead of `cudaStreamPerThread` to avoid cusolver bug # 4580093. + stream_new = stream_new_wrapper.value(); + RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_new, sync_event)); + } else { + stream_new = stream; + } cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle); cusolverDnParams_t dn_params = nullptr; @@ -152,11 +155,11 @@ void eigDC(raft::resources const& handle, "eig.cuh: eigensolver couldn't converge to a solution. " "This usually occurs when some of the features do not vary enough."); -#if CUDART_VERSION <= 12040 - // Synchronize the created stream with the original stream before return - RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream_new)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_event)); -#endif + if (cudart_version < 12050) { + // Synchronize the created stream with the original stream before return + RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream_new)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_event)); + } } enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT }; From b79f15d2f229849bc02425b2e4ffd7bd3db89d4c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 23 Sep 2024 16:09:01 -0500 Subject: [PATCH 09/28] Update fmt (to 11.0.2) and spdlog (to 1.14.1). (#2433) * Update fmt (to 11.0.2) and spdlog (to 1.14.1). * use rmm and ucxx CI artifacts * try using librmm wheels * try again to use rmm wheels * rapids-get-pr-wheel-artifact was missing RAPIDS_PY_WHEEL_NAME * ok you do not need to provide the Python slug yourself for rapids-get-pr-wheel-artifact * constraints need 'file://' protocol * try suppressing unreachable-code diagnostics from nvcc (this should be narrowed down / upstreamed before merging) * fix checks * Revert "try suppressing unreachable-code diagnostics from nvcc (this should be narrowed down / upstreamed before merging)" This reverts commit 3ba2201d678cc8befdfcaa0e89630a95a00a78b2. * copyright * move rapids-cmake overrides [skip ci] * kick off a build * fix dependency graph * devcontainer * run all CI * remove testing-only changes [skip ci] --- conda/recipes/libraft/conda_build_config.yaml | 4 ++-- .../raft-ann-bench-cpu/conda_build_config.yaml | 4 ++-- cpp/cmake/thirdparty/get_spdlog.cmake | 11 +---------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 00b133c821..5c0047fb9c 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -73,7 +73,7 @@ cuda11_cuda_profiler_api_run_version: - ">=11.4.240,<12" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml index 70d1f0490e..ed6f708e14 100644 --- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml @@ -23,7 +23,7 @@ nlohmann_json_version: - ">=3.11.2" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index 7be7804c7e..57e38c2638 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -19,15 +19,6 @@ function(find_and_configure_spdlog) rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports) rapids_export_package(BUILD spdlog rmm-exports) - if(spdlog_ADDED) - rapids_export( - BUILD spdlog - EXPORT_SET spdlog - GLOBAL_TARGETS spdlog spdlog_header_only - NAMESPACE spdlog::) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET rmm-exports) - endif() endfunction() find_and_configure_spdlog() \ No newline at end of file From 878cefc923f25df07fc362126d1c269bdff30c0c Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:10:57 -0500 Subject: [PATCH 10/28] update update-version.sh to use packaging lib (#2447) --- ci/release/update-version.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5bb98511cf..032b88b4aa 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -25,8 +25,8 @@ NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} NEXT_UCXX_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})" # Need to distutils-normalize the original version -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") -NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_UCXX_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))") +NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))") echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" From 17757b84c38dfa671abe5e8794fd2bd9bc636772 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 25 Sep 2024 13:49:36 -0500 Subject: [PATCH 11/28] Switch traceback to `native` (#2446) In cudf we have observed a ~10% speed up of pytest suite execution by switching pytest traceback to `--native`: ``` currently: 102474 passed, 2117 skipped, 902 xfailed in 892.16s (0:14:52) --tb=short: 102474 passed, 2117 skipped, 902 xfailed in 898.99s (0:14:58) --tb=no: 102474 passed, 2117 skipped, 902 xfailed in 815.98s (0:13:35) --tb=native: 102474 passed, 2117 skipped, 902 xfailed in 820.92s (0:13:40) ``` This PR makes a similar change to `raft` repo. xref: https://github.com/rapidsai/cudf/pull/16851 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2446 --- python/pylibraft/pylibraft/test/pytest.ini | 5 +++++ python/raft-dask/pytest.ini | 1 + python/raft-dask/raft_dask/test/pytest.ini | 5 +++++ 3 files changed, 11 insertions(+) create mode 100644 python/pylibraft/pylibraft/test/pytest.ini create mode 100644 python/raft-dask/raft_dask/test/pytest.ini diff --git a/python/pylibraft/pylibraft/test/pytest.ini b/python/pylibraft/pylibraft/test/pytest.ini new file mode 100644 index 0000000000..bf70c06f84 --- /dev/null +++ b/python/pylibraft/pylibraft/test/pytest.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native + diff --git a/python/raft-dask/pytest.ini b/python/raft-dask/pytest.ini index 2467e2089a..e09c2b173d 100644 --- a/python/raft-dask/pytest.ini +++ b/python/raft-dask/pytest.ini @@ -10,3 +10,4 @@ markers = nccl: marks a test as using NCCL ucx: marks a test as using UCX-Py ucxx: marks a test as using UCXX +addopts = --tb=native diff --git a/python/raft-dask/raft_dask/test/pytest.ini b/python/raft-dask/raft_dask/test/pytest.ini new file mode 100644 index 0000000000..bf70c06f84 --- /dev/null +++ b/python/raft-dask/raft_dask/test/pytest.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native + From 0f8b09720054e5cf7a65dddfa02a2f06e7ba8adb Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 25 Sep 2024 20:53:54 -0400 Subject: [PATCH 12/28] Disable NN Descent Batch tests temporarily (#2453) Linked issue https://github.com/rapidsai/raft/issues/2450 Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2453 --- cpp/test/CMakeLists.txt | 4 +++- cpp/test/neighbors/ann_nn_descent.cuh | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index a497e6d3ba..5d504d2100 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -440,7 +440,9 @@ if(BUILD_TESTS) neighbors/ann_nn_descent/test_float_uint32_t.cu neighbors/ann_nn_descent/test_int8_t_uint32_t.cu neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - neighbors/ann_nn_descent/test_batch_float_uint32_t.cu + # TODO: Investigate why this test is failing Reference issue + # https://github.com/rapidsai/raft/issues/2450 + # neighbors/ann_nn_descent/test_batch_float_uint32_t.cu LIB EXPLICIT_INSTANTIATE_ONLY GPUS diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 2f9d4e252b..5070d83b15 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -318,13 +318,15 @@ const std::vector inputs = raft::util::itertools::product inputsBatch = - raft::util::itertools::product( - {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters - {4000, 5000}, // n_rows - {192, 512}, // dim - {32, 64}, // graph_degree - {raft::distance::DistanceType::L2Expanded}, - {false, true}); +// TODO: Investigate why this test is failing +// Reference issue https://github.com/rapidsai/raft/issues/2450 +// const std::vector inputsBatch = +// raft::util::itertools::product( +// {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters +// {4000, 5000}, // n_rows +// {192, 512}, // dim +// {32, 64}, // graph_degree +// {raft::distance::DistanceType::L2Expanded}, +// {false, true}); } // namespace raft::neighbors::experimental::nn_descent From 2e705fbf5bfa9af148871ba509ac55593da9a098 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:01:05 -0700 Subject: [PATCH 13/28] Add missing `cuda_suffixed: true` (#2440) This change ensures `rmm-cuXX` is not in the package dependencies when building the pylibraft wheel with `-C "rapidsai.matrix-entry=cuda_suffixed=false`. Authors: - Paul Taylor (https://github.com/trxcllnt) - James Lamb (https://github.com/jameslamb) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2440 --- dependencies.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 8f5c69245f..91fb176b56 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -499,10 +499,14 @@ dependencies: - *cuda_python - output_types: [requirements, pyproject] matrices: - - matrix: {cuda: "12.*"} + - matrix: + cuda: "12.*" + cuda_suffixed: "true" packages: - *rmm_cu12 - - matrix: {cuda: "11.*"} + - matrix: + cuda: "11.*" + cuda_suffixed: "true" packages: - *rmm_cu11 - {matrix: null, packages: [*rmm_unsuffixed]} From 0284b424367f07220de133715043a963b341485a Mon Sep 17 00:00:00 2001 From: rhdong Date: Thu, 26 Sep 2024 07:42:29 -0700 Subject: [PATCH 14/28] [Feat] add `repeat`, `sparsity`, `eval_n_elements` APIs to `bitset` (#2439) - This PR is a part of the feature that applies the prefilter brute-force in Cagra. Authors: - rhdong (https://github.com/rhdong) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/raft/pull/2439 --- cpp/bench/prims/util/popc.cu | 7 +- cpp/include/raft/core/bitset.cuh | 107 +++++++++++++++++++++++- cpp/include/raft/core/bitset.hpp | 76 +++++++++++++++++ cpp/include/raft/util/detail/popc.cuh | 6 +- cpp/include/raft/util/popc.cuh | 4 +- cpp/test/core/bitset.cu | 114 +++++++++++++++++++++++--- cpp/test/util/popc.cu | 22 +++-- 7 files changed, 311 insertions(+), 25 deletions(-) diff --git a/cpp/bench/prims/util/popc.cu b/cpp/bench/prims/util/popc.cu index 249dc13d1e..c6249fb2bd 100644 --- a/cpp/bench/prims/util/popc.cu +++ b/cpp/bench/prims/util/popc.cu @@ -89,10 +89,9 @@ struct popc_bench : public fixture { auto bits_view = raft::make_device_vector_view(bits_d.data_handle(), bits_d.size()); - index_t max_len = params.n_rows * params.n_cols; - auto max_len_view = raft::make_host_scalar_view(&max_len); - auto nnz_actual_view = - nnz_actual_d.view(); // raft::make_device_scalar_view(nnz_actual_d.data_handle()); + index_t max_len = params.n_rows * params.n_cols; + auto max_len_view = raft::make_host_scalar_view(&max_len); + auto nnz_actual_view = nnz_actual_d.view(); raft::popc(this->handle, bits_view, max_len_view, nnz_actual_view); }); } diff --git a/cpp/include/raft/core/bitset.cuh b/cpp/include/raft/core/bitset.cuh index b6e6128eca..d1bffdb81e 100644 --- a/cpp/include/raft/core/bitset.cuh +++ b/cpp/include/raft/core/bitset.cuh @@ -26,6 +26,8 @@ #include #include +#include + #include namespace raft::core { @@ -60,6 +62,109 @@ _RAFT_DEVICE void bitset_view::set(const index_t sample_index } } +template +void bitset_view::count(const raft::resources& res, + raft::device_scalar_view count_gpu_scalar) const +{ + auto max_len = raft::make_host_scalar_view(&bitset_len_); + auto values = raft::make_device_vector_view(bitset_ptr_, n_elements()); + raft::popc(res, values, max_len, count_gpu_scalar); +} + +template +RAFT_KERNEL bitset_repeat_kernel(const bitset_t* src, + bitset_t* output, + index_t src_bit_len, + index_t repeat_times) +{ + constexpr index_t bits_per_element = sizeof(bitset_t) * 8; + int output_idx = blockIdx.x * blockDim.x + threadIdx.x; + + index_t total_bits = src_bit_len * repeat_times; + index_t output_size = (total_bits + bits_per_element - 1) / bits_per_element; + index_t src_size = (src_bit_len + bits_per_element - 1) / bits_per_element; + + if (output_idx < output_size) { + bitset_t result = 0; + index_t bit_written = 0; + + index_t start_bit = output_idx * bits_per_element; + + while (bit_written < bits_per_element && start_bit + bit_written < total_bits) { + index_t bit_idx = (start_bit + bit_written) % src_bit_len; + index_t src_word_idx = bit_idx / bits_per_element; + index_t src_offset = bit_idx % bits_per_element; + + index_t remaining_bits = min(bits_per_element - bit_written, src_bit_len - bit_idx); + + bitset_t src_value = (src[src_word_idx] >> src_offset); + + if (src_offset + remaining_bits > bits_per_element) { + bitset_t next_value = src[(src_word_idx + 1) % src_size]; + src_value |= (next_value << (bits_per_element - src_offset)); + } + src_value &= ((bitset_t{1} << remaining_bits) - 1); + result |= (src_value << bit_written); + bit_written += remaining_bits; + } + output[output_idx] = result; + } +} + +template +void bitset_repeat(raft::resources const& handle, + const bitset_t* d_src, + bitset_t* d_output, + index_t src_bit_len, + index_t repeat_times) +{ + if (src_bit_len == 0 || repeat_times == 0) return; + auto stream = resource::get_cuda_stream(handle); + + constexpr index_t bits_per_element = sizeof(bitset_t) * 8; + const index_t total_bits = src_bit_len * repeat_times; + const index_t output_size = (total_bits + bits_per_element - 1) / bits_per_element; + + int threadsPerBlock = 128; + int blocksPerGrid = (output_size + threadsPerBlock - 1) / threadsPerBlock; + bitset_repeat_kernel<<>>( + d_src, d_output, src_bit_len, repeat_times); + + return; +} + +template +void bitset_view::repeat(const raft::resources& res, + index_t times, + bitset_t* output_device_ptr) const +{ + auto thrust_policy = raft::resource::get_thrust_policy(res); + constexpr index_t bits_per_element = sizeof(bitset_t) * 8; + + if (bitset_len_ % bits_per_element == 0) { + index_t num_elements_to_copy = bitset_len_ / bits_per_element; + + for (index_t i = 0; i < times; ++i) { + raft::copy(output_device_ptr + i * num_elements_to_copy, + bitset_ptr_, + num_elements_to_copy, + raft::resource::get_cuda_stream(res)); + } + } else { + bitset_repeat(res, bitset_ptr_, output_device_ptr, bitset_len_, times); + } +} + +template +double bitset_view::sparsity(const raft::resources& res) const +{ + index_t size_h = this->size(); + if (0 == size_h) { return static_cast(1.0); } + index_t count_h = this->count(res); + + return static_cast((1.0 * (size_h - count_h)) / (1.0 * size_h)); +} + template bitset::bitset(const raft::resources& res, raft::device_vector_view mask_index, @@ -155,7 +260,7 @@ template void bitset::count(const raft::resources& res, raft::device_scalar_view count_gpu_scalar) { - auto max_len = raft::make_host_scalar_view(&bitset_len_); + auto max_len = raft::make_host_scalar_view(&bitset_len_); auto values = raft::make_device_vector_view(bitset_.data(), n_elements()); raft::popc(res, values, max_len, count_gpu_scalar); diff --git a/cpp/include/raft/core/bitset.hpp b/cpp/include/raft/core/bitset.hpp index 3608ee43fa..be828def87 100644 --- a/cpp/include/raft/core/bitset.hpp +++ b/cpp/include/raft/core/bitset.hpp @@ -22,6 +22,8 @@ #include #include +#include + namespace raft::core { /** * @defgroup bitset Bitset @@ -103,6 +105,80 @@ struct bitset_view { { return raft::make_device_vector_view(bitset_ptr_, n_elements()); } + /** + * @brief Returns the number of bits set to true in count_gpu_scalar. + * + * @param[in] res RAFT resources + * @param[out] count_gpu_scalar Device scalar to store the count + */ + void count(const raft::resources& res, raft::device_scalar_view count_gpu_scalar) const; + /** + * @brief Returns the number of bits set to true. + * + * @param res RAFT resources + * @return index_t Number of bits set to true + */ + auto count(const raft::resources& res) const -> index_t + { + auto count_gpu_scalar = raft::make_device_scalar(res, 0.0); + count(res, count_gpu_scalar.view()); + index_t count_cpu = 0; + raft::update_host( + &count_cpu, count_gpu_scalar.data_handle(), 1, resource::get_cuda_stream(res)); + resource::sync_stream(res); + return count_cpu; + } + + /** + * @brief Repeats the bitset data and copies it to the output device pointer. + * + * This function takes the original bitset data stored in the device memory + * and repeats it a specified number of times into a new location in the device memory. + * The bits are copied bit-by-bit to ensure that even if the number of bits (bitset_len_) + * is not a multiple of the bitset element size (e.g., 32 for uint32_t), the bits are + * tightly packed without any gaps between rows. + * + * @param res RAFT resources for managing CUDA streams and execution policies. + * @param times Number of times the bitset data should be repeated in the output. + * @param output_device_ptr Device pointer where the repeated bitset data will be stored. + * + * The caller must ensure that the output device pointer has enough memory allocated + * to hold `times * bitset_len` bits, where `bitset_len` is the number of bits in the original + * bitset. This function uses Thrust parallel algorithms to efficiently perform the operation on + * the GPU. + */ + void repeat(const raft::resources& res, index_t times, bitset_t* output_device_ptr) const; + + /** + * @brief Calculate the sparsity (fraction of 0s) of the bitset. + * + * This function computes the sparsity of the bitset, defined as the ratio of unset bits (0s) + * to the total number of bits in the set. If the total number of bits is zero, the function + * returns 1.0, indicating the set is fully sparse. + * + * @param res RAFT resources for managing CUDA streams and execution policies. + * @return double The sparsity of the bitset, i.e., the fraction of unset bits. + * + * This API will synchronize on the stream of `res`. + */ + double sparsity(const raft::resources& res) const; + + /** + * @brief Calculates the number of `bitset_t` elements required to store a bitset. + * + * This function computes the number of `bitset_t` elements needed to store a bitset, ensuring + * that all bits are accounted for. If the bitset length is not a multiple of the `bitset_t` size + * (in bits), the calculation rounds up to include the remaining bits in an additional `bitset_t` + * element. + * + * @param bitset_len The total length of the bitset in bits. + * @return size_t The number of `bitset_t` elements required to store the bitset. + */ + static inline size_t eval_n_elements(size_t bitset_len) + { + const size_t bits_per_element = sizeof(bitset_t) * 8; + return (bitset_len + bits_per_element - 1) / bits_per_element; + } private: bitset_t* bitset_ptr_; diff --git a/cpp/include/raft/util/detail/popc.cuh b/cpp/include/raft/util/detail/popc.cuh index 20b4814216..f335be6fd0 100644 --- a/cpp/include/raft/util/detail/popc.cuh +++ b/cpp/include/raft/util/detail/popc.cuh @@ -36,12 +36,12 @@ namespace raft::detail { */ template void popc(const raft::resources& res, - device_vector_view values, - raft::host_scalar_view max_len, + device_vector_view values, + raft::host_scalar_view max_len, raft::device_scalar_view counter) { auto values_size = values.size(); - auto values_matrix = raft::make_device_matrix_view( + auto values_matrix = raft::make_device_matrix_view( values.data_handle(), values_size, 1); auto counter_vector = raft::make_device_vector_view(counter.data_handle(), 1); diff --git a/cpp/include/raft/util/popc.cuh b/cpp/include/raft/util/popc.cuh index 153694e45e..d4bc01e274 100644 --- a/cpp/include/raft/util/popc.cuh +++ b/cpp/include/raft/util/popc.cuh @@ -31,8 +31,8 @@ namespace raft { */ template void popc(const raft::resources& res, - device_vector_view values, - raft::host_scalar_view max_len, + device_vector_view values, + raft::host_scalar_view max_len, raft::device_scalar_view counter) { detail::popc(res, values, max_len, counter); diff --git a/cpp/test/core/bitset.cu b/cpp/test/core/bitset.cu index b799297e8c..ac601274c1 100644 --- a/cpp/test/core/bitset.cu +++ b/cpp/test/core/bitset.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,12 +32,13 @@ struct test_spec_bitset { uint64_t bitset_len; uint64_t mask_len; uint64_t query_len; + uint64_t repeat_times; }; auto operator<<(std::ostream& os, const test_spec_bitset& ss) -> std::ostream& { os << "bitset{bitset_len: " << ss.bitset_len << ", mask_len: " << ss.mask_len - << ", query_len: " << ss.query_len << "}"; + << ", query_len: " << ss.query_len << ", repeat_times: " << ss.repeat_times << "}"; return os; } @@ -80,6 +81,48 @@ void flip_cpu_bitset(std::vector& bitset) } } +template +void repeat_cpu_bitset(std::vector& input, + size_t input_bits, + size_t repeat, + std::vector& output) +{ + const size_t output_bits = input_bits * repeat; + const size_t output_units = (output_bits + sizeof(bitset_t) * 8 - 1) / (sizeof(bitset_t) * 8); + + std::memset(output.data(), 0, output_units * sizeof(bitset_t)); + + size_t output_bit_index = 0; + + for (size_t r = 0; r < repeat; ++r) { + for (size_t i = 0; i < input_bits; ++i) { + size_t input_unit_index = i / (sizeof(bitset_t) * 8); + size_t input_bit_offset = i % (sizeof(bitset_t) * 8); + bool bit = (input[input_unit_index] >> input_bit_offset) & 1; + + size_t output_unit_index = output_bit_index / (sizeof(bitset_t) * 8); + size_t output_bit_offset = output_bit_index % (sizeof(bitset_t) * 8); + + output[output_unit_index] |= (static_cast(bit) << output_bit_offset); + + ++output_bit_index; + } + } +} + +template +double sparsity_cpu_bitset(std::vector& data, size_t total_bits) +{ + size_t one_count = 0; + for (size_t i = 0; i < total_bits; ++i) { + size_t unit_index = i / (sizeof(bitset_t) * 8); + size_t bit_offset = i % (sizeof(bitset_t) * 8); + bool bit = (data[unit_index] >> bit_offset) & 1; + if (bit == 1) { ++one_count; } + } + return static_cast((total_bits - one_count) / (1.0 * total_bits)); +} + template class BitsetTest : public testing::TestWithParam { protected: @@ -87,13 +130,19 @@ class BitsetTest : public testing::TestWithParam { const test_spec_bitset spec; std::vector bitset_result; std::vector bitset_ref; + std::vector bitset_repeat_ref; + std::vector bitset_repeat_result; raft::resources res; public: explicit BitsetTest() : spec(testing::TestWithParam::GetParam()), bitset_result(raft::ceildiv(spec.bitset_len, uint64_t(bitset_element_size))), - bitset_ref(raft::ceildiv(spec.bitset_len, uint64_t(bitset_element_size))) + bitset_ref(raft::ceildiv(spec.bitset_len, uint64_t(bitset_element_size))), + bitset_repeat_ref( + raft::ceildiv(spec.bitset_len * spec.repeat_times, uint64_t(bitset_element_size))), + bitset_repeat_result( + raft::ceildiv(spec.bitset_len * spec.repeat_times, uint64_t(bitset_element_size))) { } @@ -145,6 +194,50 @@ class BitsetTest : public testing::TestWithParam { resource::sync_stream(res, stream); ASSERT_TRUE(hostVecMatch(bitset_ref, bitset_result, raft::Compare())); + // test sparsity, repeat and eval_n_elements + { + auto my_bitset_view = my_bitset.view(); + auto sparsity_result = my_bitset_view.sparsity(res); + auto sparsity_ref = sparsity_cpu_bitset(bitset_ref, size_t(spec.bitset_len)); + ASSERT_EQ(sparsity_result, sparsity_ref); + + auto eval_n_elements = + bitset_view::eval_n_elements(spec.bitset_len * spec.repeat_times); + ASSERT_EQ(bitset_repeat_ref.size(), eval_n_elements); + + auto repeat_device = raft::make_device_vector(res, eval_n_elements); + RAFT_CUDA_TRY(cudaMemsetAsync( + repeat_device.data_handle(), 0, eval_n_elements * sizeof(bitset_t), stream)); + repeat_cpu_bitset( + bitset_ref, size_t(spec.bitset_len), size_t(spec.repeat_times), bitset_repeat_ref); + + my_bitset_view.repeat(res, index_t(spec.repeat_times), repeat_device.data_handle()); + + ASSERT_EQ(bitset_repeat_ref.size(), repeat_device.size()); + update_host( + bitset_repeat_result.data(), repeat_device.data_handle(), repeat_device.size(), stream); + ASSERT_EQ(bitset_repeat_ref.size(), bitset_repeat_result.size()); + + index_t errors = 0; + static constexpr index_t len_per_item = sizeof(bitset_t) * 8; + bitset_t tail_len = (index_t(spec.bitset_len * spec.repeat_times) % len_per_item); + bitset_t tail_mask = + tail_len ? (bitset_t)((bitset_t{1} << tail_len) - bitset_t{1}) : ~bitset_t{0}; + for (index_t i = 0; i < bitset_repeat_ref.size(); i++) { + if (i == bitset_repeat_ref.size() - 1) { + errors += (bitset_repeat_ref[i] & tail_mask) != (bitset_repeat_result[i] & tail_mask); + } else { + errors += (bitset_repeat_ref[i] != bitset_repeat_result[i]); + } + } + ASSERT_EQ(errors, 0); + + // recheck the sparsity after repeat + sparsity_result = + sparsity_cpu_bitset(bitset_repeat_result, size_t(spec.bitset_len * spec.repeat_times)); + ASSERT_EQ(sparsity_result, sparsity_ref); + } + // Flip the bitset and re-test auto bitset_count = my_bitset.count(res); my_bitset.flip(res); @@ -167,13 +260,14 @@ class BitsetTest : public testing::TestWithParam { } }; -auto inputs_bitset = ::testing::Values(test_spec_bitset{32, 5, 10}, - test_spec_bitset{100, 30, 10}, - test_spec_bitset{1024, 55, 100}, - test_spec_bitset{10000, 1000, 1000}, - test_spec_bitset{1 << 15, 1 << 3, 1 << 12}, - test_spec_bitset{1 << 15, 1 << 24, 1 << 13}, - test_spec_bitset{1 << 25, 1 << 23, 1 << 14}); +auto inputs_bitset = ::testing::Values(test_spec_bitset{32, 5, 10, 101}, + test_spec_bitset{100, 30, 10, 13}, + test_spec_bitset{1024, 55, 100, 1}, + test_spec_bitset{10000, 1000, 1000, 100}, + test_spec_bitset{1 << 15, 1 << 3, 1 << 12, 5}, + test_spec_bitset{1 << 15, 1 << 24, 1 << 13, 3}, + test_spec_bitset{1 << 25, 1 << 23, 1 << 14, 3}, + test_spec_bitset{1 << 25, 1 << 23, 1 << 14, 21}); using Uint16_32 = BitsetTest; TEST_P(Uint16_32, Run) { run(); } diff --git a/cpp/test/util/popc.cu b/cpp/test/util/popc.cu index c08faacb07..28eaad2fcb 100644 --- a/cpp/test/util/popc.cu +++ b/cpp/test/util/popc.cu @@ -76,7 +76,7 @@ class PopcTest : public ::testing::TestWithParam> { index_t bit_position = index % (8 * sizeof(bits_t)); if (((element >> bit_position) & 1) == 0) { - element |= (static_cast(1) << bit_position); + element |= (static_cast(1) << bit_position); num_ones--; } } @@ -101,7 +101,7 @@ class PopcTest : public ::testing::TestWithParam> { raft::make_device_vector_view(bits_d.data(), bits_d.size()); index_t max_len = params.n_rows * params.n_cols; - auto max_len_view = raft::make_host_scalar_view(&max_len); + auto max_len_view = raft::make_host_scalar_view(&max_len); index_t nnz_actual_h = 0; rmm::device_scalar nnz_actual_d(0, stream); @@ -123,8 +123,17 @@ class PopcTest : public ::testing::TestWithParam> { index_t nnz_expected; }; -using PopcTestI32 = PopcTest; -TEST_P(PopcTestI32, Result) { Run(); } +using PopcTestI32_U32 = PopcTest; +TEST_P(PopcTestI32_U32, Result) { Run(); } + +using PopcTestI32_U64 = PopcTest; +TEST_P(PopcTestI32_U64, Result) { Run(); } + +using PopcTestI32_U16 = PopcTest; +TEST_P(PopcTestI32_U16, Result) { Run(); } + +using PopcTestI32_U8 = PopcTest; +TEST_P(PopcTestI32_U8, Result) { Run(); } template const std::vector> popc_inputs = { @@ -154,6 +163,9 @@ const std::vector> popc_inputs = { {2, 33, 0.2}, }; -INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U32, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U64, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U16, ::testing::ValuesIn(popc_inputs)); +INSTANTIATE_TEST_CASE_P(PopcTest, PopcTestI32_U8, ::testing::ValuesIn(popc_inputs)); } // namespace raft From f37c41c54fc64a4e3689e5a61851ba3821800fee Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 26 Sep 2024 09:45:11 -0500 Subject: [PATCH 15/28] bump NCCL floor to 2.18.1.1 (#2443) Contributes to https://github.com/rapidsai/build-planning/issues/102 Some RAPIDS libraries are using `ncclCommSplit()`, which was introduced in `nccl==2.18.1.1`. This is part of a series of PRs across RAPIDS updating libraries' pins to `nccl>=2.18.1.1` to ensure they get a new-enough version that supports that. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/raft/pull/2443 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-120_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-120_arch-x86_64.yaml | 2 +- conda/recipes/libraft/conda_build_config.yaml | 2 +- conda/recipes/raft-ann-bench/conda_build_config.yaml | 2 +- conda/recipes/raft-dask/conda_build_config.yaml | 3 +++ conda/recipes/raft-dask/meta.yaml | 4 ++-- dependencies.yaml | 2 +- 13 files changed, 16 insertions(+), 13 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 462874a7e7..de4eb7e690 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -36,7 +36,7 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index cfd974a6a8..26f4c1efaa 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -36,7 +36,7 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 82e391e9ae..692956502b 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 0389427d13..133d42bfee 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index eff1c56840..f99cedd627 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 87b19d2952..08aea32ab1 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index ff3451c15c..572ad85ab5 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -29,7 +29,7 @@ dependencies: - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index 085e099ae8..9fd23edb03 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -29,7 +29,7 @@ dependencies: - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.9.9 +- nccl>=2.18.1.1 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 5c0047fb9c..6dc06648bd 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -20,7 +20,7 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.9.9" + - ">=2.18.1.1" glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml index db0083b583..bdb4e883ea 100644 --- a/conda/recipes/raft-ann-bench/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml @@ -20,7 +20,7 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.9.9" + - ">=2.18.1.1" glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index e6afed2890..58e8ec3c9e 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -24,3 +24,6 @@ ucxx_version: cmake_version: - ">=3.26.4,!=3.30.0" + +nccl_version: + - ">=2.18.1.1" diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index 74b26b5935..bc13d352b7 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -50,7 +50,7 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} - cython >=3.0.0 - - nccl >=2.9.9 + - nccl {{ nccl_version }} - pylibraft {{ version }} - python x.x - rmm ={{ minor_version }} @@ -68,7 +68,7 @@ requirements: - dask-cuda ={{ minor_version }} - rapids-dask-dependency ={{ minor_version }} - joblib >=0.11 - - nccl >=2.9.9 + - nccl {{ nccl_version }} - pylibraft {{ version }} - python x.x - rmm ={{ minor_version }} diff --git a/dependencies.yaml b/dependencies.yaml index 91fb176b56..e833e8519a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -170,7 +170,7 @@ dependencies: packages: - c-compiler - cxx-compiler - - nccl>=2.9.9 + - nccl>=2.18.1.1 - libucxx==0.40.*,>=0.0.0a0 specific: - output_types: conda From 704feb1a0249e73bb0cab96968b892e8d5823b46 Mon Sep 17 00:00:00 2001 From: rhdong Date: Thu, 26 Sep 2024 09:32:41 -0700 Subject: [PATCH 16/28] [Opt] Enforce the UT Coverity and add benchmark for `transpose` (#2438) Authors: - rhdong (https://github.com/rhdong) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2438 --- cpp/bench/prims/CMakeLists.txt | 1 + cpp/bench/prims/linalg/transpose.cu | 85 ++++ cpp/include/raft/linalg/detail/transpose.cuh | 67 ++- cpp/test/linalg/transpose.cu | 445 ++++++++++++++----- 4 files changed, 465 insertions(+), 133 deletions(-) create mode 100644 cpp/bench/prims/linalg/transpose.cu diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index 9d80cbaac2..52c63ad73b 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -132,6 +132,7 @@ if(BUILD_PRIMS_BENCH) linalg/reduce_rows_by_key.cu linalg/reduce.cu linalg/sddmm.cu + linalg/transpose.cu main.cpp ) diff --git a/cpp/bench/prims/linalg/transpose.cu b/cpp/bench/prims/linalg/transpose.cu new file mode 100644 index 0000000000..e60e50c125 --- /dev/null +++ b/cpp/bench/prims/linalg/transpose.cu @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace raft::bench::linalg { + +template +struct transpose_input { + IdxT rows, cols; +}; + +template +inline auto operator<<(std::ostream& os, const transpose_input& p) -> std::ostream& +{ + os << p.rows << "#" << p.cols; + return os; +} + +template +struct TransposeBench : public fixture { + TransposeBench(const transpose_input& p) + : params(p), in(p.rows * p.cols, stream), out(p.rows * p.cols, stream) + { + raft::random::RngState rng{1234}; + raft::random::uniform(handle, rng, in.data(), p.rows * p.cols, (T)-10.0, (T)10.0); + } + + void run_benchmark(::benchmark::State& state) override + { + std::ostringstream label_stream; + label_stream << params; + state.SetLabel(label_stream.str()); + + loop_on_state(state, [this]() { + auto input_view = + raft::make_device_matrix_view(in.data(), params.rows, params.cols); + auto output_view = raft::make_device_vector_view(out.data(), params.rows); + raft::linalg::transpose(handle, + input_view.data_handle(), + output_view.data_handle(), + params.rows, + params.cols, + handle.get_stream()); + }); + } + + private: + transpose_input params; + rmm::device_uvector in, out; +}; // struct TransposeBench + +const std::vector> transpose_inputs_i32 = + raft::util::itertools::product>({10, 128, 256, 512, 1024}, + {10000, 100000, 1000000}); + +RAFT_BENCH_REGISTER((TransposeBench), "", transpose_inputs_i32); +RAFT_BENCH_REGISTER((TransposeBench), "", transpose_inputs_i32); + +RAFT_BENCH_REGISTER((TransposeBench), "", transpose_inputs_i32); +RAFT_BENCH_REGISTER((TransposeBench), "", transpose_inputs_i32); + +} // namespace raft::bench::linalg diff --git a/cpp/include/raft/linalg/detail/transpose.cuh b/cpp/include/raft/linalg/detail/transpose.cuh index ec60aacc9c..c5f0544b5c 100644 --- a/cpp/include/raft/linalg/detail/transpose.cuh +++ b/cpp/include/raft/linalg/detail/transpose.cuh @@ -38,7 +38,9 @@ template RAFT_KERNEL transpose_half_kernel(IndexType n_rows, IndexType n_cols, const half* __restrict__ in, - half* __restrict__ out) + half* __restrict__ out, + const IndexType stride_in, + const IndexType stride_out) { __shared__ half tile[TILE_DIM][TILE_DIM + 1]; @@ -49,7 +51,7 @@ RAFT_KERNEL transpose_half_kernel(IndexType n_rows, for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) { if (x < n_cols && (y + j) < n_rows) { - tile[threadIdx.y + j][threadIdx.x] = __ldg(&in[(y + j) * n_cols + x]); + tile[threadIdx.y + j][threadIdx.x] = __ldg(&in[(y + j) * stride_in + x]); } } __syncthreads(); @@ -59,7 +61,7 @@ RAFT_KERNEL transpose_half_kernel(IndexType n_rows, for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) { if (x < n_rows && (y + j) < n_cols) { - out[(y + j) * n_rows + x] = tile[threadIdx.x][threadIdx.y + j]; + out[(y + j) * stride_out + x] = tile[threadIdx.x][threadIdx.y + j]; } } __syncthreads(); @@ -67,9 +69,33 @@ RAFT_KERNEL transpose_half_kernel(IndexType n_rows, } } +/** + * @brief Transposes a matrix stored in row-major order. + * + * This function transposes a matrix of half-precision floating-point numbers (`half`). + * Both the input (`in`) and output (`out`) matrices are assumed to be stored in row-major order. + * + * @tparam IndexType The type used for indexing the matrix dimensions (e.g., int). + * @param handle The RAFT resource handle which contains resources. + * @param n_rows The number of rows in the input matrix. + * @param n_cols The number of columns in the input matrix. + * @param in Pointer to the input matrix in row-major order. + * @param out Pointer to the output matrix in row-major order, where the transposed matrix will be + * stored. + * @param stride_in The stride (number of elements between consecutive rows) for the input matrix. + * Default is 1, which means the input matrix is contiguous in memory. + * @param stride_out The stride (number of elements between consecutive rows) for the output matrix. + * Default is 1, which means the output matrix is contiguous in memory. + */ + template -void transpose_half( - raft::resources const& handle, IndexType n_rows, IndexType n_cols, const half* in, half* out) +void transpose_half(raft::resources const& handle, + IndexType n_rows, + IndexType n_cols, + const half* in, + half* out, + const IndexType stride_in = 1, + const IndexType stride_out = 1) { if (n_cols == 0 || n_rows == 0) return; auto stream = resource::get_cuda_stream(handle); @@ -100,8 +126,13 @@ void transpose_half( dim3 grids(adjusted_grid_x, adjusted_grid_y); - transpose_half_kernel - <<>>(n_rows, n_cols, in, out); + if (stride_in > 1 || stride_out > 1) { + transpose_half_kernel + <<>>(n_rows, n_cols, in, out, stride_in, stride_out); + } else { + transpose_half_kernel + <<>>(n_rows, n_cols, in, out, n_cols, n_rows); + } RAFT_CUDA_TRY(cudaPeekAtLastError()); } @@ -118,7 +149,7 @@ void transpose(raft::resources const& handle, int out_n_cols = n_rows; if constexpr (std::is_same_v) { - transpose_half(handle, out_n_rows, out_n_cols, in, out); + transpose_half(handle, n_cols, n_rows, in, out); } else { cublasHandle_t cublas_h = resource::get_cublas_handle(handle); RAFT_CUBLAS_TRY(cublasSetStream(cublas_h, stream)); @@ -195,9 +226,13 @@ void transpose_row_major_impl( raft::mdspan, LayoutPolicy, AccessorPolicy> in, raft::mdspan, LayoutPolicy, AccessorPolicy> out) { - auto out_n_rows = in.extent(1); - auto out_n_cols = in.extent(0); - transpose_half(handle, out_n_cols, out_n_rows, in.data_handle(), out.data_handle()); + transpose_half(handle, + in.extent(0), + in.extent(1), + in.data_handle(), + out.data_handle(), + in.stride(0), + out.stride(0)); } template @@ -233,9 +268,13 @@ void transpose_col_major_impl( raft::mdspan, LayoutPolicy, AccessorPolicy> in, raft::mdspan, LayoutPolicy, AccessorPolicy> out) { - auto out_n_rows = in.extent(1); - auto out_n_cols = in.extent(0); - transpose_half(handle, out_n_rows, out_n_cols, in.data_handle(), out.data_handle()); + transpose_half(handle, + in.extent(1), + in.extent(0), + in.data_handle(), + out.data_handle(), + in.stride(1), + out.stride(1)); } }; // end namespace detail diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu index cbe869a9a5..22fc1c1d60 100644 --- a/cpp/test/linalg/transpose.cu +++ b/cpp/test/linalg/transpose.cu @@ -29,48 +29,104 @@ #include +#include + +namespace std { +template <> +struct is_floating_point : std::true_type {}; +} // namespace std + namespace raft { namespace linalg { template -struct TranposeInputs { +void initialize_array(T* data_h, size_t size) +{ + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis(0.0, 1.0); + + for (size_t i = 0; i < size; ++i) { + if constexpr (std::is_same_v) { + data_h[i] = __float2half(static_cast(dis(gen))); + } else { + data_h[i] = static_cast(dis(gen)); + } + } +} + +template +void cpu_transpose_row_major( + const T* input, T* output, int rows, int cols, int stride_in = -1, int stride_out = -1) +{ + stride_in = stride_in == -1 ? cols : stride_in; + stride_out = stride_out == -1 ? rows : stride_out; + if (stride_in) + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + output[j * stride_out + i] = input[i * stride_in + j]; + } + } +} + +template +void cpu_transpose_col_major( + const T* input, T* output, int rows, int cols, int stride_in = -1, int stride_out = -1) +{ + cpu_transpose_row_major(input, output, cols, rows, stride_in, stride_out); +} + +bool validate_half(const half* h_ref, const half* h_result, half tolerance, int len) +{ + bool success = true; + for (int i = 0; i < len; ++i) { + if (raft::abs(__half2float(h_result[i]) - __half2float(h_ref[i])) >= __half2float(tolerance)) { + success = false; + break; + } + if (!success) break; + } + return success; +} + +namespace transpose_regular_test { + +template +struct TransposeInputs { T tolerance; - int len; int n_row; int n_col; unsigned long long int seed; }; template -::std::ostream& operator<<(::std::ostream& os, const TranposeInputs& dims) -{ - return os; -} - -template -class TransposeTest : public ::testing::TestWithParam> { +class TransposeTest : public ::testing::TestWithParam> { public: TransposeTest() - : params(::testing::TestWithParam>::GetParam()), + : params(::testing::TestWithParam>::GetParam()), stream(resource::get_cuda_stream(handle)), - data(params.len, stream), - data_trans_ref(params.len, stream), - data_trans(params.len, stream) + data(params.n_row * params.n_col, stream), + data_trans_ref(params.n_row * params.n_col, stream), + data_trans(params.n_row * params.n_col, stream) { } protected: void SetUp() override { - int len = params.len; - ASSERT(params.len == 9, "This test works only with len=9!"); - T data_h[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; - raft::update_device(data.data(), data_h, len, stream); - T data_ref_h[] = {1.0, 4.0, 7.0, 2.0, 5.0, 8.0, 3.0, 6.0, 9.0}; - raft::update_device(data_trans_ref.data(), data_ref_h, len, stream); + int len = params.n_row * params.n_col; + std::vector data_h(len); + std::vector data_ref_h(len); + + initialize_array(data_h.data(), len); + + cpu_transpose_col_major(data_h.data(), data_ref_h.data(), params.n_row, params.n_col); + + raft::update_device(data.data(), data_h.data(), len, stream); + raft::update_device(data_trans_ref.data(), data_ref_h.data(), len, stream); transpose(handle, data.data(), data_trans.data(), params.n_row, params.n_col, stream); - transpose(data.data(), params.n_row, stream); + if (params.n_row == params.n_col) { transpose(data.data(), params.n_col, stream); } resource::sync_stream(handle, stream); } @@ -78,28 +134,45 @@ class TransposeTest : public ::testing::TestWithParam> { raft::resources handle; cudaStream_t stream; - TranposeInputs params; + TransposeInputs params; rmm::device_uvector data, data_trans, data_trans_ref; }; -const std::vector> inputsf2 = {{0.1f, 3 * 3, 3, 3, 1234ULL}}; - -const std::vector> inputsd2 = {{0.1, 3 * 3, 3, 3, 1234ULL}}; - -const std::vector> inputsh2 = {{0.1, 3 * 3, 3, 3, 1234ULL}}; +const std::vector> inputsf2 = {{0.1f, 3, 3, 1234ULL}, + {0.1f, 3, 4, 1234ULL}, + {0.1f, 300, 300, 1234ULL}, + {0.1f, 300, 4100, 1234ULL}, + {0.1f, 1, 13000, 1234ULL}, + {0.1f, 3, 130001, 1234ULL}}; + +const std::vector> inputsd2 = {{0.1f, 3, 3, 1234ULL}, + {0.1f, 3, 4, 1234ULL}, + {0.1f, 300, 300, 1234ULL}, + {0.1f, 300, 4100, 1234ULL}, + {0.1f, 1, 13000, 1234ULL}, + {0.1f, 3, 130001, 1234ULL}}; + +const std::vector> inputsh2 = {{0.1f, 3, 3, 1234ULL}, + {0.1f, 3, 4, 1234ULL}, + {0.1f, 300, 300, 1234ULL}, + {0.1f, 300, 4100, 1234ULL}, + {0.1f, 1, 13000, 1234ULL}, + {0.1f, 3, 130001, 1234ULL}}; typedef TransposeTest TransposeTestValF; TEST_P(TransposeTestValF, Result) { ASSERT_TRUE(raft::devArrMatch(data_trans_ref.data(), data_trans.data(), - params.len, + params.n_row * params.n_col, raft::CompareApproxAbs(params.tolerance))); - ASSERT_TRUE(raft::devArrMatch(data_trans_ref.data(), - data.data(), - params.len, - raft::CompareApproxAbs(params.tolerance))); + if (params.n_row == params.n_col) { + ASSERT_TRUE(raft::devArrMatch(data_trans_ref.data(), + data.data(), + params.n_row * params.n_col, + raft::CompareApproxAbs(params.tolerance))); + } } typedef TransposeTest TransposeTestValD; @@ -107,59 +180,47 @@ TEST_P(TransposeTestValD, Result) { ASSERT_TRUE(raft::devArrMatch(data_trans_ref.data(), data_trans.data(), - params.len, - raft::CompareApproxAbs(params.tolerance))); - - ASSERT_TRUE(raft::devArrMatch(data_trans_ref.data(), - data.data(), - params.len, + params.n_row * params.n_col, raft::CompareApproxAbs(params.tolerance))); -} - -bool validate_half(const half* h_ref, const half* h_result, half tolerance, int len) -{ - bool success = true; - for (int i = 0; i < len; ++i) { - if (raft::abs(__half2float(h_result[i]) - __half2float(h_ref[i])) >= __half2float(tolerance)) { - success = false; - break; - } - if (!success) break; + if (params.n_row == params.n_col) { + ASSERT_TRUE(raft::devArrMatch(data_trans_ref.data(), + data.data(), + params.n_row * params.n_col, + raft::CompareApproxAbs(params.tolerance))); } - return success; } typedef TransposeTest TransposeTestValH; TEST_P(TransposeTestValH, Result) { - half data_trans_ref_h[params.len]; - half data_trans_h[params.len]; - half data_h[params.len]; + auto len = params.n_row * params.n_col; - RAFT_CUDA_TRY(cudaMemcpyAsync(data_trans_ref_h, - data_trans_ref.data(), - params.len * sizeof(half), - cudaMemcpyDeviceToHost, - stream)); - - RAFT_CUDA_TRY(cudaMemcpyAsync( - data_trans_h, data_trans.data(), params.len * sizeof(half), cudaMemcpyDeviceToHost, stream)); - RAFT_CUDA_TRY(cudaMemcpyAsync( - data_h, data.data(), params.len * sizeof(half), cudaMemcpyDeviceToHost, stream)); + std::vector data_trans_ref_h(len); + std::vector data_trans_h(len); + std::vector data_h(len); + raft::copy( + data_trans_ref_h.data(), data_trans_ref.data(), len, resource::get_cuda_stream(handle)); + raft::copy(data_trans_h.data(), data_trans.data(), len, resource::get_cuda_stream(handle)); + raft::copy(data_h.data(), data.data(), len, resource::get_cuda_stream(handle)); resource::sync_stream(handle, stream); - ASSERT_TRUE(validate_half(data_trans_ref_h, data_trans_h, params.tolerance, params.len)); - ASSERT_TRUE(validate_half(data_trans_ref_h, data_h, params.tolerance, params.len)); + ASSERT_TRUE(validate_half( + data_trans_ref_h.data(), data_trans_h.data(), params.tolerance, params.n_row * params.n_col)); + + if (params.n_row == params.n_col) { + ASSERT_TRUE(validate_half( + data_trans_ref_h.data(), data_h.data(), params.tolerance, params.n_row * params.n_col)); + } } INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValF, ::testing::ValuesIn(inputsf2)); - INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValD, ::testing::ValuesIn(inputsd2)); - INSTANTIATE_TEST_SUITE_P(TransposeTests, TransposeTestValH, ::testing::ValuesIn(inputsh2)); +} // namespace transpose_regular_test + +namespace transpose_extra_test { -namespace { /** * We hide these functions in tests for now until we have a heterogeneous mdarray * implementation. @@ -230,79 +291,225 @@ template } } +template +struct TransposeMdspanInputs { + int n_row; + int n_col; + T tolerance = T{0.01}; +}; + template -void test_transpose_with_mdspan() +void test_transpose_with_mdspan(const TransposeMdspanInputs& param) { + auto len = param.n_row * param.n_col; + std::vector in_h(len); + std::vector out_ref_h(len); + + initialize_array(in_h.data(), len); + raft::resources handle; - auto v = make_device_matrix(handle, 32, 3); - T k{0}; - for (size_t i = 0; i < v.extent(0); ++i) { - for (size_t j = 0; j < v.extent(1); ++j) { - v(i, j) = k++; - } + auto stream = resource::get_cuda_stream(handle); + auto in = make_device_matrix(handle, param.n_row, param.n_col); + auto out_ref = make_device_matrix(handle, param.n_row, param.n_col); + resource::sync_stream(handle, stream); + if constexpr (std::is_same_v) { + cpu_transpose_row_major(in_h.data(), out_ref_h.data(), param.n_row, param.n_col); + } else { + cpu_transpose_col_major(in_h.data(), out_ref_h.data(), param.n_row, param.n_col); } - auto out = transpose(handle, v.view()); - static_assert(std::is_same_v); - ASSERT_EQ(out.extent(0), v.extent(1)); - ASSERT_EQ(out.extent(1), v.extent(0)); + raft::copy(in.data_handle(), in_h.data(), len, resource::get_cuda_stream(handle)); + raft::copy(out_ref.data_handle(), out_ref_h.data(), len, resource::get_cuda_stream(handle)); - k = 0; - for (size_t i = 0; i < out.extent(1); ++i) { - for (size_t j = 0; j < out.extent(0); ++j) { - ASSERT_EQ(out(j, i), k++); - } + auto out = transpose(handle, in.view()); + static_assert(std::is_same_v); + ASSERT_EQ(out.extent(0), in.extent(1)); + ASSERT_EQ(out.extent(1), in.extent(0)); + if constexpr (std::is_same_v) { + std::vector out_h(len); + raft::copy(out_h.data(), out.data_handle(), len, resource::get_cuda_stream(handle)); + ASSERT_TRUE(validate_half(out_ref_h.data(), out_h.data(), param.tolerance, len)); + } else { + ASSERT_TRUE(raft::devArrMatch( + out_ref.data_handle(), out.data_handle(), len, raft::CompareApproxAbs(param.tolerance))); } } -} // namespace -TEST(TransposeTest, MDSpan) +const std::vector> inputs_mdspan_f = {{3, 3}, + {3, 4}, + {300, 300}, + {300, 4100}, + {1, 13000}, + {3, 130001}, + {4100, 300}, + {13000, 1}, + {130001, 3}}; +const std::vector> inputs_mdspan_d = {{3, 3}, + {3, 4}, + {300, 300}, + {300, 4100}, + {1, 13000}, + {3, 130001}, + {4100, 300}, + {13000, 1}, + {130001, 3}}; +const std::vector> inputs_mdspan_h = {{3, 3}, + {3, 4}, + {300, 300}, + {300, 4100}, + {1, 13000}, + {3, 130001}, + {4100, 300}, + {13000, 1}, + {130001, 3}}; + +TEST(TransposeTest, MDSpanFloat) { - test_transpose_with_mdspan(); - test_transpose_with_mdspan(); - - test_transpose_with_mdspan(); - test_transpose_with_mdspan(); + for (const auto& p : inputs_mdspan_f) { + test_transpose_with_mdspan(p); + test_transpose_with_mdspan(p); + } +} +TEST(TransposeTest, MDSpanDouble) +{ + for (const auto& p : inputs_mdspan_d) { + test_transpose_with_mdspan(p); + test_transpose_with_mdspan(p); + } +} +TEST(TransposeTest, MDSpanHalf) +{ + for (const auto& p : inputs_mdspan_h) { + test_transpose_with_mdspan(p); + test_transpose_with_mdspan(p); + } } -namespace { +template +struct TransposeSubmatrixInputs { + int n_row; + int n_col; + int row_beg; + int row_end; + int col_beg; + int col_end; + T tolerance = T{0.01}; +}; + template -void test_transpose_submatrix() +void test_transpose_submatrix(const TransposeSubmatrixInputs& param) { + auto len = param.n_row * param.n_col; + auto sub_len = (param.row_end - param.row_beg) * (param.col_end - param.col_beg); + + std::vector in_h(len); + std::vector out_ref_h(sub_len); + + initialize_array(in_h.data(), len); + raft::resources handle; - auto v = make_device_matrix(handle, 32, 33); - T k{0}; - size_t row_beg{3}, row_end{13}, col_beg{2}, col_end{11}; - for (size_t i = row_beg; i < row_end; ++i) { - for (size_t j = col_beg; j < col_end; ++j) { - v(i, j) = k++; - } + auto stream = resource::get_cuda_stream(handle); + + auto in = make_device_matrix(handle, param.n_row, param.n_col); + auto out_ref = make_device_matrix( + handle, (param.row_end - param.row_beg), (param.col_end - param.col_beg)); + + if constexpr (std::is_same_v) { + auto offset = param.row_beg * param.n_col + param.col_beg; + cpu_transpose_row_major(in_h.data() + offset, + out_ref_h.data(), + (param.row_end - param.row_beg), + (param.col_end - param.col_beg), + in.extent(1), + (param.row_end - param.row_beg)); + } else { + auto offset = param.col_beg * param.n_row + param.row_beg; + cpu_transpose_col_major(in_h.data() + offset, + out_ref_h.data(), + (param.row_end - param.row_beg), + (param.col_end - param.col_beg), + in.extent(0), + (param.col_end - param.col_beg)); } - auto vv = v.view(); - auto submat = std::experimental::submdspan( - vv, std::make_tuple(row_beg, row_end), std::make_tuple(col_beg, col_end)); - static_assert(std::is_same_v); + raft::copy(in.data_handle(), in_h.data(), len, resource::get_cuda_stream(handle)); + raft::copy(out_ref.data_handle(), out_ref_h.data(), sub_len, resource::get_cuda_stream(handle)); + resource::sync_stream(handle, stream); - auto out = transpose(handle, submat); - ASSERT_EQ(out.extent(0), submat.extent(1)); - ASSERT_EQ(out.extent(1), submat.extent(0)); + auto in_submat = std::experimental::submdspan(in.view(), + std::make_tuple(param.row_beg, param.row_end), + std::make_tuple(param.col_beg, param.col_end)); - k = 0; - for (size_t i = 0; i < out.extent(1); ++i) { - for (size_t j = 0; j < out.extent(0); ++j) { - ASSERT_EQ(out(j, i), k++); - } + static_assert(std::is_same_v); + auto out = transpose(handle, in_submat); + + ASSERT_EQ(out.extent(0), in_submat.extent(1)); + ASSERT_EQ(out.extent(1), in_submat.extent(0)); + + if constexpr (std::is_same_v) { + std::vector out_h(sub_len); + + raft::copy(out_h.data(), out.data_handle(), sub_len, resource::get_cuda_stream(handle)); + ASSERT_TRUE(validate_half(out_ref_h.data(), out_h.data(), param.tolerance, sub_len)); + } else { + ASSERT_TRUE(raft::devArrMatch(out_ref.data_handle(), + out.data_handle(), + sub_len, + raft::CompareApproxAbs(param.tolerance))); } } -} // namespace - -TEST(TransposeTest, SubMatrix) +const std::vector> inputs_submatrix_f = { + {3, 3, 1, 2, 0, 2}, + {3, 4, 1, 3, 2, 3}, + {300, 300, 1, 299, 2, 239}, + {300, 4100, 3, 299, 101, 4001}, + {2, 13000, 0, 1, 3, 13000}, + {3, 130001, 0, 3, 3999, 129999}, + {4100, 300, 159, 4001, 125, 300}, + {13000, 5, 0, 11111, 0, 3}, + {130001, 3, 19, 130000, 2, 3}}; +const std::vector> inputs_submatrix_d = { + {3, 3, 1, 2, 0, 2}, + {3, 4, 1, 3, 2, 3}, + {300, 300, 1, 299, 2, 239}, + {300, 4100, 3, 299, 101, 4001}, + {2, 13000, 0, 1, 3, 13000}, + {3, 130001, 0, 3, 3999, 129999}, + {4100, 300, 159, 4001, 125, 300}, + {13000, 5, 0, 11111, 0, 3}, + {130001, 3, 19, 130000, 2, 3}}; +const std::vector> inputs_submatrix_h = { + {3, 3, 1, 2, 0, 2}, + {3, 4, 1, 3, 2, 3}, + {300, 300, 1, 299, 2, 239}, + {300, 4100, 3, 299, 101, 4001}, + {2, 13000, 0, 1, 3, 13000}, + {3, 130001, 0, 3, 3999, 129999}, + {4100, 300, 159, 4001, 125, 300}, + {13000, 5, 0, 11111, 0, 3}, + {130001, 3, 19, 130000, 2, 3}}; + +TEST(TransposeTest, SubMatrixFloat) { - test_transpose_submatrix(); - test_transpose_submatrix(); - - test_transpose_submatrix(); - test_transpose_submatrix(); + for (const auto& p : inputs_submatrix_f) { + test_transpose_submatrix(p); + test_transpose_submatrix(p); + } } +TEST(TransposeTest, SubMatrixDouble) +{ + for (const auto& p : inputs_submatrix_d) { + test_transpose_submatrix(p); + test_transpose_submatrix(p); + } +} +TEST(TransposeTest, SubMatrixHalf) +{ + for (const auto& p : inputs_submatrix_h) { + test_transpose_submatrix(p); + test_transpose_submatrix(p); + } +} + +} // namespace transpose_extra_test } // end namespace linalg } // end namespace raft From b78a07d9d2a117c12f396f330526a8e8ed23a9ba Mon Sep 17 00:00:00 2001 From: Victor Lafargue Date: Fri, 27 Sep 2024 00:36:44 +0200 Subject: [PATCH 17/28] Adding NCCL clique to the RAFT handle (#2431) Authors: - Victor Lafargue (https://github.com/viclafargue) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2431 --- cpp/include/raft/comms/detail/std_comms.hpp | 8 +- cpp/include/raft/comms/nccl_clique.hpp | 156 ++++++++++++++++++ .../raft/core/resource/nccl_clique.hpp | 66 ++++++++ .../raft/core/resource/resource_types.hpp | 1 + 4 files changed, 227 insertions(+), 4 deletions(-) create mode 100644 cpp/include/raft/comms/nccl_clique.hpp create mode 100644 cpp/include/raft/core/resource/nccl_clique.hpp diff --git a/cpp/include/raft/comms/detail/std_comms.hpp b/cpp/include/raft/comms/detail/std_comms.hpp index c5d64f6a29..ed869e6cae 100644 --- a/cpp/include/raft/comms/detail/std_comms.hpp +++ b/cpp/include/raft/comms/detail/std_comms.hpp @@ -310,13 +310,13 @@ class std_comms : public comms_iface { // Wait for a UCXX progress thread roundtrip, prevent waiting for longer // than 10ms for each operation, will retry in next iteration. ucxx::utils::CallbackNotifier callbackNotifierPre{}; - worker->registerGenericPre([&callbackNotifierPre]() { callbackNotifierPre.set(); }, - 10000000 /* 10ms */); + (void)worker->registerGenericPre( + [&callbackNotifierPre]() { callbackNotifierPre.set(); }, 10000000 /* 10ms */); callbackNotifierPre.wait(); ucxx::utils::CallbackNotifier callbackNotifierPost{}; - worker->registerGenericPost([&callbackNotifierPost]() { callbackNotifierPost.set(); }, - 10000000 /* 10ms */); + (void)worker->registerGenericPost( + [&callbackNotifierPost]() { callbackNotifierPost.set(); }, 10000000 /* 10ms */); callbackNotifierPost.wait(); } else { // Causes UCXX to progress through the send/recv message queue diff --git a/cpp/include/raft/comms/nccl_clique.hpp b/cpp/include/raft/comms/nccl_clique.hpp new file mode 100644 index 0000000000..c6520af753 --- /dev/null +++ b/cpp/include/raft/comms/nccl_clique.hpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define RAFT_NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "NCCL error encountered at: ", \ + "call='%s', Reason=%d:%s", \ + #call, \ + status, \ + ncclGetErrorString(status)); \ + throw raft::logic_error(msg); \ + } \ + } while (0); + +namespace raft::comms { +void build_comms_nccl_only(raft::resources* handle, ncclComm_t nccl_comm, int num_ranks, int rank); +} + +namespace raft::comms { + +struct nccl_clique { + using pool_mr = rmm::mr::pool_memory_resource; + + /** + * Instantiates a NCCL clique with all available GPUs + * + * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool + * + */ + nccl_clique(int percent_of_free_memory = 80) + : root_rank_(0), + percent_of_free_memory_(percent_of_free_memory), + per_device_pools_(0), + device_resources_(0) + { + cudaGetDeviceCount(&num_ranks_); + device_ids_.resize(num_ranks_); + std::iota(device_ids_.begin(), device_ids_.end(), 0); + nccl_comms_.resize(num_ranks_); + nccl_clique_init(); + } + + /** + * Instantiates a NCCL clique + * + * Usage example: + * @code{.cpp} + * int n_devices; + * cudaGetDeviceCount(&n_devices); + * std::vector device_ids(n_devices); + * std::iota(device_ids.begin(), device_ids.end(), 0); + * cuvs::neighbors::mg::nccl_clique& clique(device_ids); // first device is the root rank + * @endcode + * + * @param[in] device_ids list of device IDs to be used to initiate the clique + * @param[in] percent_of_free_memory percentage of device memory to pre-allocate as memory pool + * + */ + nccl_clique(const std::vector& device_ids, int percent_of_free_memory = 80) + : root_rank_(0), + num_ranks_(device_ids.size()), + percent_of_free_memory_(percent_of_free_memory), + device_ids_(device_ids), + nccl_comms_(device_ids.size()), + per_device_pools_(0), + device_resources_(0) + { + nccl_clique_init(); + } + + void nccl_clique_init() + { + RAFT_NCCL_TRY(ncclCommInitAll(nccl_comms_.data(), num_ranks_, device_ids_.data())); + + for (int rank = 0; rank < num_ranks_; rank++) { + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); + + // create a pool memory resource for each device + auto old_mr = rmm::mr::get_current_device_resource(); + per_device_pools_.push_back(std::make_unique( + old_mr, rmm::percent_of_free_device_memory(percent_of_free_memory_))); + rmm::cuda_device_id id(device_ids_[rank]); + rmm::mr::set_per_device_resource(id, per_device_pools_.back().get()); + + // create a device resource handle for each device + device_resources_.emplace_back(); + + // add NCCL communications to the device resource handle + raft::comms::build_comms_nccl_only( + &device_resources_[rank], nccl_comms_[rank], num_ranks_, rank); + } + + for (int rank = 0; rank < num_ranks_; rank++) { + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[rank])); + raft::resource::sync_stream(device_resources_[rank]); + } + } + + const raft::device_resources& set_current_device_to_root_rank() const + { + int root_device_id = device_ids_[root_rank_]; + RAFT_CUDA_TRY(cudaSetDevice(root_device_id)); + return device_resources_[root_rank_]; + } + + ~nccl_clique() + { +#pragma omp parallel for // necessary to avoid hangs + for (int rank = 0; rank < num_ranks_; rank++) { + cudaSetDevice(device_ids_[rank]); + ncclCommDestroy(nccl_comms_[rank]); + rmm::cuda_device_id id(device_ids_[rank]); + rmm::mr::set_per_device_resource(id, nullptr); + } + } + + int root_rank_; + int num_ranks_; + int percent_of_free_memory_; + std::vector device_ids_; + std::vector nccl_comms_; + std::vector> per_device_pools_; + std::vector device_resources_; +}; + +} // namespace raft::comms diff --git a/cpp/include/raft/core/resource/nccl_clique.hpp b/cpp/include/raft/core/resource/nccl_clique.hpp new file mode 100644 index 0000000000..edda5043ae --- /dev/null +++ b/cpp/include/raft/core/resource/nccl_clique.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace raft::resource { + +class nccl_clique_resource : public resource { + public: + nccl_clique_resource() : clique_(std::make_unique()) {} + ~nccl_clique_resource() override {} + void* get_resource() override { return clique_.get(); } + + private: + std::unique_ptr clique_; +}; + +/** Factory that knows how to construct a specific raft::resource to populate the res_t. */ +class nccl_clique_resource_factory : public resource_factory { + public: + resource_type get_resource_type() override { return resource_type::NCCL_CLIQUE; } + resource* make_resource() override { return new nccl_clique_resource(); } +}; + +/** + * @defgroup nccl_clique_resource resource functions + * @{ + */ + +/** + * Retrieves a NCCL clique from raft res if it exists, otherwise initializes it and return it. + * + * @param[in] res the raft resources object + * @return NCCL clique + */ +inline const raft::comms::nccl_clique& get_nccl_clique(resources const& res) +{ + if (!res.has_resource_factory(resource_type::NCCL_CLIQUE)) { + res.add_resource_factory(std::make_shared()); + } + return *res.get_resource(resource_type::NCCL_CLIQUE); +}; + +/** + * @} + */ + +} // namespace raft::resource diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp index d9126251c9..4fa84c3bdb 100644 --- a/cpp/include/raft/core/resource/resource_types.hpp +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -46,6 +46,7 @@ enum resource_type { CUBLASLT_HANDLE, // cublasLt handle CUSTOM, // runtime-shared default-constructible resource LARGE_WORKSPACE_RESOURCE, // rmm device memory resource for somewhat large temporary allocations + NCCL_CLIQUE, // nccl clique LAST_KEY // reserved for the last key }; From 5ee0e79a7aa033d1f312e805fca1194cdf0483b3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 26 Sep 2024 18:22:10 -0500 Subject: [PATCH 18/28] bump NCCL floor to 2.19 (#2458) Follow-up to #2443 As part of the work to support NumPy 2 across RAPIDS, we found reason to upgrade some libraries like `cugraph` to slightly newer NCCL (`>=2.19`). Context: https://github.com/rapidsai/build-planning/issues/102#issuecomment-2375595743 This applies that same bump here, to keep the range of NCCL versions consistent across RAPIDS. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - https://github.com/jakirkham - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2458 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/bench_ann_cuda-120_arch-aarch64.yaml | 2 +- conda/environments/bench_ann_cuda-120_arch-x86_64.yaml | 2 +- conda/recipes/libraft/conda_build_config.yaml | 2 +- conda/recipes/raft-ann-bench/conda_build_config.yaml | 2 +- conda/recipes/raft-dask/conda_build_config.yaml | 2 +- dependencies.yaml | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index de4eb7e690..0b84772fad 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -36,7 +36,7 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 26f4c1efaa..d1c01f1b16 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -36,7 +36,7 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 692956502b..4c506f5297 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 133d42bfee..a123950e3a 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - numba>=0.57 - numpy>=1.23,<3.0a0 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index f99cedd627..864eb2130b 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-aarch64=11.8 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 08aea32ab1..5da6eaf17e 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusparse=11.7.5.86 - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index 572ad85ab5..65de97c170 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -29,7 +29,7 @@ dependencies: - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index 9fd23edb03..7e1adbc483 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -29,7 +29,7 @@ dependencies: - libcusparse-dev - libucxx==0.40.*,>=0.0.0a0 - matplotlib -- nccl>=2.18.1.1 +- nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 6dc06648bd..bc0ff1fae7 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -20,7 +20,7 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.18.1.1" + - ">=2.19" glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml index bdb4e883ea..47bd730daf 100644 --- a/conda/recipes/raft-ann-bench/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml @@ -20,7 +20,7 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.18.1.1" + - ">=2.19" glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index 58e8ec3c9e..65c589fc0c 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -26,4 +26,4 @@ cmake_version: - ">=3.26.4,!=3.30.0" nccl_version: - - ">=2.18.1.1" + - ">=2.19" diff --git a/dependencies.yaml b/dependencies.yaml index e833e8519a..d0991f4d04 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -170,7 +170,7 @@ dependencies: packages: - c-compiler - cxx-compiler - - nccl>=2.18.1.1 + - nccl>=2.19 - libucxx==0.40.*,>=0.0.0a0 specific: - output_types: conda From 6c4fdfb3bfa32c3f08d03adad0d461c167f277ce Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 26 Sep 2024 21:25:22 -0400 Subject: [PATCH 19/28] Deprecating vector search APIs and updating README accordingly (#2448) I opted to deprecate just the necessary pieces, such as the `index` classes instead of deprecating every single function. Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/2448 --- README.md | 111 +-------------- cpp/include/raft/cluster/kmeans.cuh | 63 +++++---- cpp/include/raft/cluster/kmeans_balanced.cuh | 68 ++++----- cpp/include/raft/cluster/single_linkage.cuh | 33 ++--- cpp/include/raft/distance/distance-ext.cuh | 133 +++++++++--------- cpp/include/raft/neighbors/ball_cover.cuh | 1 + .../raft/neighbors/brute_force_types.hpp | 32 +++-- cpp/include/raft/neighbors/cagra_types.hpp | 14 +- cpp/include/raft/neighbors/hnsw_types.hpp | 6 +- cpp/include/raft/neighbors/ivf_flat_types.hpp | 16 ++- cpp/include/raft/neighbors/ivf_pq_types.hpp | 20 +-- .../raft/neighbors/nn_descent_types.hpp | 16 ++- cpp/include/raft/neighbors/refine-ext.cuh | 32 ++--- docs/source/conf.py | 2 +- docs/source/raft_ann_benchmarks.md | 4 + 15 files changed, 239 insertions(+), 312 deletions(-) diff --git a/README.md b/README.md index 25ce059630..af2219fdd1 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ #
 RAFT: Reusable Accelerated Functions and Tools for Vector Search and More
> [!IMPORTANT] -> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.08 (August) release. +> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and will be removing them altogether in the 24.12 (December) release. ![RAFT tech stack](img/raft-tech-stack-vss.png) @@ -36,7 +36,7 @@ ## What is RAFT? -RAFT contains fundamental widely-used algorithms and primitives for machine learning and information retrieval. The algorithms are CUDA-accelerated and form building blocks for more easily writing high performance applications. +RAFT contains fundamental widely-used algorithms and primitives for machine learning and data mining. The algorithms are CUDA-accelerated and form building blocks for more easily writing high performance applications. By taking a primitives-based approach to algorithm development, RAFT - accelerates algorithm construction time @@ -47,12 +47,10 @@ While not exhaustive, the following general categories help summarize the accele ##### | Category | Accelerated Functions in RAFT | |-----------------------|-----------------------------------------------------------------------------------------------------------------------------------| -| **Nearest Neighbors** | vector search, neighborhood graph construction, epsilon neighborhoods, pairwise distances | -| **Basic Clustering** | spectral clustering, hierarchical clustering, k-means | -| **Solvers** | combinatorial optimization, iterative solvers | | **Data Formats** | sparse & dense, conversions, data generation | | **Dense Operations** | linear algebra, matrix and vector operations, reductions, slicing, norms, factorization, least squares, svd & eigenvalue problems | | **Sparse Operations** | linear algebra, eigenvalue problems, slicing, norms, reductions, factorization, symmetrization, components & labeling | +| **Solvers** | combinatorial optimization, iterative solvers | | **Statistics** | sampling, moments and summary statistics, metrics, model evaluation | | **Tools & Utilities** | common tools and utilities for developing CUDA applications, multi-node multi-gpu infrastructure | @@ -67,42 +65,6 @@ In addition being a C++ library, RAFT also provides 2 Python libraries: ![RAFT is a C++ header-only template library with optional shared library and lightweight Python wrappers](img/arch.png) -## Use cases - -### Vector Similarity Search - -RAFT contains state-of-the-art implementations of approximate nearest neighbors search (ANNS) algorithms on the GPU, such as: - -* [Brute force](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#brute-force). Performs a brute force nearest neighbors search without an index. -* [IVF-Flat](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#ivf-flat) and [IVF-PQ](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#ivf-pq). Use an inverted file index structure to map contents to their locations. IVF-PQ additionally uses product quantization to reduce the memory usage of vectors. These methods were originally popularized by the [FAISS](https://github.com/facebookresearch/faiss) library. -* [CAGRA](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#cagra) (Cuda Anns GRAph-based). Uses a fast ANNS graph construction and search implementation optimized for the GPU. CAGRA outperforms state-of-the art CPU methods (i.e. HNSW) for large batch queries, single queries, and graph construction time. - -Projects that use the RAFT ANNS algorithms for accelerating vector search include: [Milvus](https://milvus.io/), [Redis](https://redis.io/), and [Faiss](https://github.com/facebookresearch/faiss). - -Please see the example [Jupyter notebook](https://github.com/rapidsai/raft/blob/HEAD/notebooks/VectorSearch_QuestionRetrieval.ipynb) to get started RAFT for vector search in Python. - - - -### Information Retrieval - -RAFT contains a catalog of reusable primitives for composing algorithms that require fast neighborhood computations, such as - -1. Computing distances between vectors and computing kernel gramm matrices -2. Performing ball radius queries for constructing epsilon neighborhoods -3. Clustering points to partition a space for smaller and faster searches -4. Constructing neighborhood "connectivities" graphs from dense vectors - -### Machine Learning - -RAFT's primitives are used in several RAPIDS libraries, including [cuML](https://github.com/rapidsai/cuml), [cuGraph](https://github.com/rapidsai/cugraph), and [cuOpt](https://github.com/rapidsai/cuopt) to build many end-to-end machine learning algorithms that span a large spectrum of different applications, including -- data generation -- model evaluation -- classification and regression -- clustering -- manifold learning -- dimensionality reduction. - -RAFT is also used by the popular collaborative filtering library [implicit](https://github.com/benfred/implicit) for recommender systems. ## Is RAFT right for me? @@ -327,70 +289,3 @@ When citing RAFT generally, please consider referencing this Github project. year={2022} } ``` -If citing the sparse pairwise distances API, please consider using the following bibtex: -```bibtex -@article{nolet2021semiring, - title={Semiring primitives for sparse neighborhood methods on the gpu}, - author={Nolet, Corey J and Gala, Divye and Raff, Edward and Eaton, Joe and Rees, Brad and Zedlewski, John and Oates, Tim}, - journal={arXiv preprint arXiv:2104.06357}, - year={2021} -} -``` - -If citing the single-linkage agglomerative clustering APIs, please consider the following bibtex: -```bibtex -@misc{nolet2023cuslink, - title={cuSLINK: Single-linkage Agglomerative Clustering on the GPU}, - author={Corey J. Nolet and Divye Gala and Alex Fender and Mahesh Doijade and Joe Eaton and Edward Raff and John Zedlewski and Brad Rees and Tim Oates}, - year={2023}, - eprint={2306.16354}, - archivePrefix={arXiv}, - primaryClass={cs.LG} -} -``` - -If citing CAGRA, please consider the following bibtex: -```bibtex -@misc{ootomo2023cagra, - title={CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search for GPUs}, - author={Hiroyuki Ootomo and Akira Naruse and Corey Nolet and Ray Wang and Tamas Feher and Yong Wang}, - year={2024}, - series = {ICDE '24} -} -``` - -If citing the k-selection routines, please consider the following bibtex: - -```bibtex -@proceedings{10.1145/3581784, - title = {Parallel Top-K Algorithms on GPU: A Comprehensive Study and New Methods}, - author={Jingrong Zhang, Akira Naruse, Xipeng Li, and Yong Wang}, - year = {2023}, - isbn = {9798400701092}, - publisher = {Association for Computing Machinery}, - address = {New York, NY, USA}, - location = {Denver, CO, USA}, - series = {SC '23} -} -``` - -If citing the nearest neighbors descent API, please consider the following bibtex: -```bibtex -@inproceedings{10.1145/3459637.3482344, - author = {Wang, Hui and Zhao, Wan-Lei and Zeng, Xiangxiang and Yang, Jianye}, - title = {Fast K-NN Graph Construction by GPU Based NN-Descent}, - year = {2021}, - isbn = {9781450384469}, - publisher = {Association for Computing Machinery}, - address = {New York, NY, USA}, - url = {https://doi.org/10.1145/3459637.3482344}, - doi = {10.1145/3459637.3482344}, - abstract = {NN-Descent is a classic k-NN graph construction approach. It is still widely employed in machine learning, computer vision, and information retrieval tasks due to its efficiency and genericness. However, the current design only works well on CPU. In this paper, NN-Descent has been redesigned to adapt to the GPU architecture. A new graph update strategy called selective update is proposed. It reduces the data exchange between GPU cores and GPU global memory significantly, which is the processing bottleneck under GPU computation architecture. This redesign leads to full exploitation of the parallelism of the GPU hardware. In the meantime, the genericness, as well as the simplicity of NN-Descent, are well-preserved. Moreover, a procedure that allows to k-NN graph to be merged efficiently on GPU is proposed. It makes the construction of high-quality k-NN graphs for out-of-GPU-memory datasets tractable. Our approach is 100-250\texttimes{} faster than the single-thread NN-Descent and is 2.5-5\texttimes{} faster than the existing GPU-based approaches as we tested on million as well as billion scale datasets.}, - booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management}, - pages = {1929–1938}, - numpages = {10}, - keywords = {high-dimensional, nn-descent, gpu, k-nearest neighbor graph}, - location = {Virtual Event, Queensland, Australia}, - series = {CIKM '21} -} -``` diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index eb28cc1626..38318e8ec8 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -86,13 +86,14 @@ using KeyValueIndexOp = detail::KeyValueIndexOp; * @param[out] n_iter Number of iterations run. */ template -void fit(raft::resources const& handle, - const KMeansParams& params, - raft::device_matrix_view X, - std::optional> sample_weight, - raft::device_matrix_view centroids, - raft::host_scalar_view inertia, - raft::host_scalar_view n_iter) +[[deprecated("Use cuVS instead")]] void fit( + raft::resources const& handle, + const KMeansParams& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) { detail::kmeans_fit(handle, params, X, sample_weight, centroids, inertia, n_iter); } @@ -150,14 +151,15 @@ void fit(raft::resources const& handle, * their closest cluster center. */ template -void predict(raft::resources const& handle, - const KMeansParams& params, - raft::device_matrix_view X, - std::optional> sample_weight, - raft::device_matrix_view centroids, - raft::device_vector_view labels, - bool normalize_weight, - raft::host_scalar_view inertia) +[[deprecated("Use cuVS instead")]] void predict( + raft::resources const& handle, + const KMeansParams& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia) { detail::kmeans_predict( handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); @@ -213,14 +215,15 @@ void predict(raft::resources const& handle, * @param[out] n_iter Number of iterations run. */ template -void fit_predict(raft::resources const& handle, - const KMeansParams& params, - raft::device_matrix_view X, - std::optional> sample_weight, - std::optional> centroids, - raft::device_vector_view labels, - raft::host_scalar_view inertia, - raft::host_scalar_view n_iter) +[[deprecated("Use cuVS instead")]] void fit_predict( + raft::resources const& handle, + const KMeansParams& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) { detail::kmeans_fit_predict( handle, params, X, sample_weight, centroids, labels, inertia, n_iter); @@ -252,13 +255,13 @@ void transform(raft::resources const& handle, } template -void transform(raft::resources const& handle, - const KMeansParams& params, - const DataT* X, - const DataT* centroids, - IndexT n_samples, - IndexT n_features, - DataT* X_new) +[[deprecated("Use cuVS instead")]] void transform(raft::resources const& handle, + const KMeansParams& params, + const DataT* X, + const DataT* centroids, + IndexT n_samples, + IndexT n_features, + DataT* X_new) { detail::kmeans_transform( handle, params, X, centroids, n_samples, n_features, X_new); diff --git a/cpp/include/raft/cluster/kmeans_balanced.cuh b/cpp/include/raft/cluster/kmeans_balanced.cuh index a1a182608b..7479047fce 100644 --- a/cpp/include/raft/cluster/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/kmeans_balanced.cuh @@ -73,11 +73,11 @@ namespace raft::cluster::kmeans_balanced { * datatype. If DataT == MathT, this must be the identity. */ template -void fit(const raft::resources& handle, - kmeans_balanced_params const& params, - raft::device_matrix_view X, - raft::device_matrix_view centroids, - MappingOpT mapping_op = raft::identity_op()) +[[deprecated("Use cuVS instead")]] void fit(const raft::resources& handle, + kmeans_balanced_params const& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + MappingOpT mapping_op = raft::identity_op()) { RAFT_EXPECTS(X.extent(1) == centroids.extent(1), "Number of features in dataset and centroids are different"); @@ -131,12 +131,13 @@ template -void predict(const raft::resources& handle, - kmeans_balanced_params const& params, - raft::device_matrix_view X, - raft::device_matrix_view centroids, - raft::device_vector_view labels, - MappingOpT mapping_op = raft::identity_op()) +[[deprecated("Use cuVS instead")]] void predict( + const raft::resources& handle, + kmeans_balanced_params const& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + MappingOpT mapping_op = raft::identity_op()) { RAFT_EXPECTS(X.extent(0) == labels.extent(0), "Number of rows in dataset and labels are different"); @@ -196,12 +197,13 @@ template -void fit_predict(const raft::resources& handle, - kmeans_balanced_params const& params, - raft::device_matrix_view X, - raft::device_matrix_view centroids, - raft::device_vector_view labels, - MappingOpT mapping_op = raft::identity_op()) +[[deprecated("Use cuVS instead")]] void fit_predict( + const raft::resources& handle, + kmeans_balanced_params const& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + MappingOpT mapping_op = raft::identity_op()) { auto centroids_const = raft::make_device_matrix_view( centroids.data_handle(), centroids.extent(0), centroids.extent(1)); @@ -255,14 +257,15 @@ template -void build_clusters(const raft::resources& handle, - const kmeans_balanced_params& params, - raft::device_matrix_view X, - raft::device_matrix_view centroids, - raft::device_vector_view labels, - raft::device_vector_view cluster_sizes, - MappingOpT mapping_op = raft::identity_op(), - std::optional> X_norm = std::nullopt) +[[deprecated("Use cuVS instead")]] void build_clusters( + const raft::resources& handle, + const kmeans_balanced_params& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + raft::device_vector_view cluster_sizes, + MappingOpT mapping_op = raft::identity_op(), + std::optional> X_norm = std::nullopt) { RAFT_EXPECTS(X.extent(0) == labels.extent(0), "Number of rows in dataset and labels are different"); @@ -334,13 +337,14 @@ template -void calc_centers_and_sizes(const raft::resources& handle, - raft::device_matrix_view X, - raft::device_vector_view labels, - raft::device_matrix_view centroids, - raft::device_vector_view cluster_sizes, - bool reset_counters = true, - MappingOpT mapping_op = raft::identity_op()) +[[deprecated("Use cuVS instead")]] void calc_centers_and_sizes( + const raft::resources& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + raft::device_matrix_view centroids, + raft::device_vector_view cluster_sizes, + bool reset_counters = true, + MappingOpT mapping_op = raft::identity_op()) { RAFT_EXPECTS(X.extent(0) == labels.extent(0), "Number of rows in dataset and labels are different"); diff --git a/cpp/include/raft/cluster/single_linkage.cuh b/cpp/include/raft/cluster/single_linkage.cuh index d9eba6edc5..067445c542 100644 --- a/cpp/include/raft/cluster/single_linkage.cuh +++ b/cpp/include/raft/cluster/single_linkage.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,14 +50,14 @@ namespace raft::cluster { template -void single_linkage(raft::resources const& handle, - const value_t* X, - size_t m, - size_t n, - raft::distance::DistanceType metric, - linkage_output* out, - int c, - size_t n_clusters) +[[deprecated("Use cuVS instead")]] void single_linkage(raft::resources const& handle, + const value_t* X, + size_t m, + size_t n, + raft::distance::DistanceType metric, + linkage_output* out, + int c, + size_t n_clusters) { detail::single_linkage( handle, X, m, n, metric, out, c, n_clusters); @@ -87,13 +87,14 @@ constexpr int DEFAULT_CONST_C = 15; control of k. The algorithm will set `k = log(n) + c` */ template -void single_linkage(raft::resources const& handle, - raft::device_matrix_view X, - raft::device_matrix_view dendrogram, - raft::device_vector_view labels, - raft::distance::DistanceType metric, - size_t n_clusters, - std::optional c = std::make_optional(DEFAULT_CONST_C)) +[[deprecated("Use cuVS instead")]] void single_linkage( + raft::resources const& handle, + raft::device_matrix_view X, + raft::device_matrix_view dendrogram, + raft::device_vector_view labels, + raft::distance::DistanceType metric, + size_t n_clusters, + std::optional c = std::make_optional(DEFAULT_CONST_C)) { linkage_output out_arrs; out_arrs.children = dendrogram.data_handle(); diff --git a/cpp/include/raft/distance/distance-ext.cuh b/cpp/include/raft/distance/distance-ext.cuh index 2d41e029fe..dcbfbfdbc3 100644 --- a/cpp/include/raft/distance/distance-ext.cuh +++ b/cpp/include/raft/distance/distance-ext.cuh @@ -35,42 +35,43 @@ template -void distance(raft::resources const& handle, - const DataT* x, - const DataT* y, - OutT* dist, - IdxT m, - IdxT n, - IdxT k, - void* workspace, - size_t worksize, - FinalLambda fin_op, - bool isRowMajor = true, - DataT metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void distance(raft::resources const& handle, + const DataT* x, + const DataT* y, + OutT* dist, + IdxT m, + IdxT n, + IdxT k, + void* workspace, + size_t worksize, + FinalLambda fin_op, + bool isRowMajor = true, + DataT metric_arg = 2.0f) RAFT_EXPLICIT; template -void distance(raft::resources const& handle, - const DataT* x, - const DataT* y, - OutT* dist, - IdxT m, - IdxT n, - IdxT k, - void* workspace, - size_t worksize, - bool isRowMajor = true, - DataT metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void distance(raft::resources const& handle, + const DataT* x, + const DataT* y, + OutT* dist, + IdxT m, + IdxT n, + IdxT k, + void* workspace, + size_t worksize, + bool isRowMajor = true, + DataT metric_arg = 2.0f) RAFT_EXPLICIT; template -size_t getWorkspaceSize(const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] size_t getWorkspaceSize( + const DataT* x, const DataT* y, IdxT m, IdxT n, IdxT k) RAFT_EXPLICIT; template -void distance(raft::resources const& handle, - const DataT* x, - const DataT* y, - OutT* dist, - IdxT m, - IdxT n, - IdxT k, - bool isRowMajor = true, - DataT metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void distance(raft::resources const& handle, + const DataT* x, + const DataT* y, + OutT* dist, + IdxT m, + IdxT n, + IdxT k, + bool isRowMajor = true, + DataT metric_arg = 2.0f) RAFT_EXPLICIT; template -void pairwise_distance(raft::resources const& handle, - const Type* x, - const Type* y, - Type* dist, - IdxT m, - IdxT n, - IdxT k, - rmm::device_uvector& workspace, - raft::distance::DistanceType metric, - bool isRowMajor = true, - Type metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void pairwise_distance(raft::resources const& handle, + const Type* x, + const Type* y, + Type* dist, + IdxT m, + IdxT n, + IdxT k, + rmm::device_uvector& workspace, + raft::distance::DistanceType metric, + bool isRowMajor = true, + Type metric_arg = 2.0f) RAFT_EXPLICIT; template -void pairwise_distance(raft::resources const& handle, - const Type* x, - const Type* y, - Type* dist, - IdxT m, - IdxT n, - IdxT k, - raft::distance::DistanceType metric, - bool isRowMajor = true, - Type metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void pairwise_distance(raft::resources const& handle, + const Type* x, + const Type* y, + Type* dist, + IdxT m, + IdxT n, + IdxT k, + raft::distance::DistanceType metric, + bool isRowMajor = true, + Type metric_arg = 2.0f) RAFT_EXPLICIT; template -void distance(raft::resources const& handle, - raft::device_matrix_view const x, - raft::device_matrix_view const y, - raft::device_matrix_view dist, - DataT metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void distance( + raft::resources const& handle, + raft::device_matrix_view const x, + raft::device_matrix_view const y, + raft::device_matrix_view dist, + DataT metric_arg = 2.0f) RAFT_EXPLICIT; template -void pairwise_distance(raft::resources const& handle, - device_matrix_view const x, - device_matrix_view const y, - device_matrix_view dist, - raft::distance::DistanceType metric, - Type metric_arg = 2.0f) RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void pairwise_distance( + raft::resources const& handle, + device_matrix_view const x, + device_matrix_view const y, + device_matrix_view dist, + raft::distance::DistanceType metric, + Type metric_arg = 2.0f) RAFT_EXPLICIT; }; // namespace distance }; // namespace raft diff --git a/cpp/include/raft/neighbors/ball_cover.cuh b/cpp/include/raft/neighbors/ball_cover.cuh index 20c88f3318..09938020b9 100644 --- a/cpp/include/raft/neighbors/ball_cover.cuh +++ b/cpp/include/raft/neighbors/ball_cover.cuh @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #pragma once #ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "ball_cover-inl.cuh" diff --git a/cpp/include/raft/neighbors/brute_force_types.hpp b/cpp/include/raft/neighbors/brute_force_types.hpp index a8f073edc6..4511f8d8ba 100644 --- a/cpp/include/raft/neighbors/brute_force_types.hpp +++ b/cpp/include/raft/neighbors/brute_force_types.hpp @@ -94,12 +94,14 @@ struct index : ann::index { * the dataset. If the dataset is in host memory, it will be copied to the device and the * index will own the device memory. */ + template - index(raft::resources const& res, - mdspan, row_major, data_accessor> dataset, - std::optional>&& norms, - raft::distance::DistanceType metric, - T metric_arg = 0.0) + [[deprecated("Use cuVS instead")]] index( + raft::resources const& res, + mdspan, row_major, data_accessor> dataset, + std::optional>&& norms, + raft::distance::DistanceType metric, + T metric_arg = 0.0) : ann::index(), metric_(metric), dataset_(make_device_matrix(res, 0, 0)), @@ -116,11 +118,12 @@ struct index : ann::index { * This class stores a non-owning reference to the dataset and norms here. * Having precomputed norms gives us a performance advantage at query time. */ - index(raft::resources const& res, - raft::device_matrix_view dataset_view, - std::optional> norms_view, - raft::distance::DistanceType metric, - T metric_arg = 0.0) + [[deprecated("Use cuVS instead")]] index( + raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional> norms_view, + raft::distance::DistanceType metric, + T metric_arg = 0.0) : ann::index(), metric_(metric), dataset_(make_device_matrix(res, 0, 0)), @@ -131,10 +134,11 @@ struct index : ann::index { } template - index(raft::resources const& res, - index_params const& params, - mdspan, row_major, data_accessor> dataset, - std::optional>&& norms = std::nullopt) + [[deprecated("Use cuVS instead")]] index( + raft::resources const& res, + index_params const& params, + mdspan, row_major, data_accessor> dataset, + std::optional>&& norms = std::nullopt) : ann::index(), metric_(params.metric), dataset_(make_device_matrix(res, 0, 0)), diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 97c9c0d098..bc7c380db1 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -201,8 +201,9 @@ struct index : ann::index { ~index() = default; /** Construct an empty index. */ - index(raft::resources const& res, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded) + [[deprecated("Use cuVS instead")]] index( + raft::resources const& res, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded) : ann::index(), metric_(metric), graph_(make_device_matrix(res, 0, 0)), @@ -265,10 +266,11 @@ struct index : ann::index { * */ template - index(raft::resources const& res, - raft::distance::DistanceType metric, - mdspan, row_major, data_accessor> dataset, - mdspan, row_major, graph_accessor> knn_graph) + [[deprecated("Use cuVS instead")]] index( + raft::resources const& res, + raft::distance::DistanceType metric, + mdspan, row_major, data_accessor> dataset, + mdspan, row_major, graph_accessor> knn_graph) : ann::index(), metric_(metric), graph_(make_device_matrix(res, 0, 0)), diff --git a/cpp/include/raft/neighbors/hnsw_types.hpp b/cpp/include/raft/neighbors/hnsw_types.hpp index f90de6f01b..f78571f491 100644 --- a/cpp/include/raft/neighbors/hnsw_types.hpp +++ b/cpp/include/raft/neighbors/hnsw_types.hpp @@ -38,7 +38,6 @@ struct search_params : ann::search_params { int num_threads = 0; // number of host threads to use for concurrent searches. Value of 0 // automatically maximizes parallelism }; - template struct index : ann::index { public: @@ -51,7 +50,10 @@ struct index : ann::index { * @param[in] dim dimensions of the training dataset * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct") */ - index(int dim, raft::distance::DistanceType metric) : dim_{dim}, metric_{metric} {} + [[deprecated("Use cuVS instead")]] index(int dim, raft::distance::DistanceType metric) + : dim_{dim}, metric_{metric} + { + } /** @brief Get underlying index diff --git a/cpp/include/raft/neighbors/ivf_flat_types.hpp b/cpp/include/raft/neighbors/ivf_flat_types.hpp index 7605bd82a3..2cafceb512 100644 --- a/cpp/include/raft/neighbors/ivf_flat_types.hpp +++ b/cpp/include/raft/neighbors/ivf_flat_types.hpp @@ -261,12 +261,12 @@ struct index : ann::index { ~index() = default; /** Construct an empty index. It needs to be trained and then populated. */ - index(raft::resources const& res, - raft::distance::DistanceType metric, - uint32_t n_lists, - bool adaptive_centers, - bool conservative_memory_allocation, - uint32_t dim) + [[deprecated("Use cuVS instead")]] index(raft::resources const& res, + raft::distance::DistanceType metric, + uint32_t n_lists, + bool adaptive_centers, + bool conservative_memory_allocation, + uint32_t dim) : ann::index(), veclen_(calculate_veclen(dim)), metric_(metric), @@ -285,7 +285,9 @@ struct index : ann::index { } /** Construct an empty index. It needs to be trained and then populated. */ - index(raft::resources const& res, const index_params& params, uint32_t dim) + [[deprecated("Use cuVS instead")]] index(raft::resources const& res, + const index_params& params, + uint32_t dim) : index(res, params.metric, params.n_lists, diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 3ee350c6fb..d5906d621c 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -361,14 +361,14 @@ struct index : ann::index { ~index() = default; /** Construct an empty index. It needs to be trained and then populated. */ - index(raft::resources const& handle, - raft::distance::DistanceType metric, - codebook_gen codebook_kind, - uint32_t n_lists, - uint32_t dim, - uint32_t pq_bits = 8, - uint32_t pq_dim = 0, - bool conservative_memory_allocation = false) + [[deprecated("Use cuVS instead")]] index(raft::resources const& handle, + raft::distance::DistanceType metric, + codebook_gen codebook_kind, + uint32_t n_lists, + uint32_t dim, + uint32_t pq_bits = 8, + uint32_t pq_dim = 0, + bool conservative_memory_allocation = false) : ann::index(), metric_(metric), codebook_kind_(codebook_kind), @@ -391,7 +391,9 @@ struct index : ann::index { } /** Construct an empty index. It needs to be trained and then populated. */ - index(raft::resources const& handle, const index_params& params, uint32_t dim) + [[deprecated("Use cuVS instead")]] index(raft::resources const& handle, + const index_params& params, + uint32_t dim) : index(handle, params.metric, params.codebook_kind, diff --git a/cpp/include/raft/neighbors/nn_descent_types.hpp b/cpp/include/raft/neighbors/nn_descent_types.hpp index eb01a423be..9decf47f39 100644 --- a/cpp/include/raft/neighbors/nn_descent_types.hpp +++ b/cpp/include/raft/neighbors/nn_descent_types.hpp @@ -101,7 +101,10 @@ struct index : ann::index { * @param n_cols number of cols in knn-graph * @param return_distances whether to allocate and get distances information */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) + [[deprecated("Use cuVS instead")]] index(raft::resources const& res, + int64_t n_rows, + int64_t n_cols, + bool return_distances = false) : ann::index(), res_{res}, metric_{raft::distance::DistanceType::L2Expanded}, @@ -128,11 +131,12 @@ struct index : ann::index { * storing knn-graph distances * @param return_distances whether to allocate and get distances information */ - index(raft::resources const& res, - raft::host_matrix_view graph_view, - std::optional> distances_view = - std::nullopt, - bool return_distances = false) + [[deprecated("Use cuVS instead")]] index( + raft::resources const& res, + raft::host_matrix_view graph_view, + std::optional> distances_view = + std::nullopt, + bool return_distances = false) : ann::index(), res_{res}, metric_{raft::distance::DistanceType::L2Expanded}, diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index 7948a0e4f2..216e1b9ab5 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -29,24 +29,24 @@ namespace raft::neighbors { template -void refine(raft::resources const& handle, - raft::device_matrix_view dataset, - raft::device_matrix_view queries, - raft::device_matrix_view neighbor_candidates, - raft::device_matrix_view indices, - raft::device_matrix_view distances, - raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded) - RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void refine( + raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded) RAFT_EXPLICIT; template -void refine(raft::resources const& handle, - raft::host_matrix_view dataset, - raft::host_matrix_view queries, - raft::host_matrix_view neighbor_candidates, - raft::host_matrix_view indices, - raft::host_matrix_view distances, - raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded) - RAFT_EXPLICIT; +[[deprecated("Use cuVS instead")]] void refine( + raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + raft::distance::DistanceType metric = distance::DistanceType::L2Unexpanded) RAFT_EXPLICIT; } // namespace raft::neighbors diff --git a/docs/source/conf.py b/docs/source/conf.py index 8b2040baa2..7a287b689f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -71,7 +71,7 @@ .. attention:: - The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called `cuVS `_. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.08 (August) release. + The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called `cuVS `_. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release. """ diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index fc11a56ac8..7bac2047fc 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -2,6 +2,10 @@ This project provides a benchmark program for various ANN search implementations. It's especially suitable for comparing GPU implementations as well as comparing GPU against CPU. +> [!IMPORTANT] +> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). As a result, `raft-ann-bench` is being migrated to `cuvs-bench` and will be removed from RAFT altogether in the 24.12 (December) release. + + ## Table of Contents - [Installing the benchmarks](#installing-the-benchmarks) From 94cc2c23bba3ca38514a3a5c8b736a968e5aa48e Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 4 Oct 2024 10:23:58 -0400 Subject: [PATCH 20/28] Prune workflows based on changed files (#2466) Contributes to https://github.com/rapidsai/build-planning/issues/94 Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/raft/pull/2466 --- .github/workflows/pr.yaml | 49 +++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 844f9f9441..fe8e730921 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - changed-files - checks - conda-cpp-build - conda-cpp-tests @@ -26,6 +27,42 @@ jobs: - devcontainer secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + if: always() + with: + needs: ${{ toJSON(needs) }} + changed-files: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + with: + files_yaml: | + test_cpp: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + - '!python/**' + - '!thirdparty/LICENSES/**' + test_notebooks: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!thirdparty/LICENSES/**' + test_python: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + - '!thirdparty/LICENSES/**' checks: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 @@ -39,9 +76,10 @@ jobs: build_type: pull-request node_type: cpu16 conda-cpp-tests: - needs: conda-cpp-build + needs: [conda-cpp-build, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: @@ -59,9 +97,10 @@ jobs: with: build_type: pull-request conda-python-tests: - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: @@ -82,9 +121,10 @@ jobs: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: - needs: wheel-build-pylibraft + needs: [wheel-build-pylibraft, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_pylibraft.sh @@ -96,9 +136,10 @@ jobs: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: - needs: wheel-build-raft-dask + needs: [wheel-build-raft-dask, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_raft_dask.sh From b7d0e986d843b28e7e8be4b4b99fca02dfa62f91 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Mon, 7 Oct 2024 22:04:26 -0400 Subject: [PATCH 21/28] Update all rmm imports to use pylibrmm/librmm (#2451) This PR updates all the RMM imports to use pylibrmm/librmm now that `rmm._lib` is deprecated . It should be merged after [rmm/1676](https://github.com/rapidsai/rmm/pull/1676). Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/2451 --- python/pylibraft/pylibraft/common/handle.pxd | 6 +++--- python/pylibraft/pylibraft/common/handle.pyx | 7 +++++-- python/pylibraft/pylibraft/common/interruptible.pxd | 4 ++-- python/pylibraft/pylibraft/common/interruptible.pyx | 4 ++-- python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx | 6 ++---- .../pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd | 2 +- python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd | 2 +- python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd | 2 +- .../pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd | 4 ++-- .../pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx | 8 +++----- .../pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 2 +- python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 6 ++---- 12 files changed, 25 insertions(+), 28 deletions(-) diff --git a/python/pylibraft/pylibraft/common/handle.pxd b/python/pylibraft/pylibraft/common/handle.pxd index c090663547..78a07ac8bf 100644 --- a/python/pylibraft/pylibraft/common/handle.pxd +++ b/python/pylibraft/pylibraft/common/handle.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ from libcpp.memory cimport shared_ptr, unique_ptr -from rmm._lib.cuda_stream_pool cimport cuda_stream_pool -from rmm._lib.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_pool cimport cuda_stream_pool +from rmm.librmm.cuda_stream_view cimport cuda_stream_view # Keeping `handle_t` around for backwards compatibility at the diff --git a/python/pylibraft/pylibraft/common/handle.pyx b/python/pylibraft/pylibraft/common/handle.pyx index 7e3dc289e0..d256e671bf 100644 --- a/python/pylibraft/pylibraft/common/handle.pyx +++ b/python/pylibraft/pylibraft/common/handle.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,10 @@ import functools from cuda.ccudart cimport cudaStream_t from libc.stdint cimport uintptr_t -from rmm._lib.cuda_stream_view cimport cuda_stream_per_thread, cuda_stream_view +from rmm.librmm.cuda_stream_view cimport ( + cuda_stream_per_thread, + cuda_stream_view, +) from .cuda cimport Stream diff --git a/python/pylibraft/pylibraft/common/interruptible.pxd b/python/pylibraft/pylibraft/common/interruptible.pxd index aaccf8aeab..27259571ea 100644 --- a/python/pylibraft/pylibraft/common/interruptible.pxd +++ b/python/pylibraft/pylibraft/common/interruptible.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ from libcpp.memory cimport shared_ptr -from rmm._lib.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport cuda_stream_view cdef extern from "raft/core/interruptible.hpp" namespace "raft" nogil: diff --git a/python/pylibraft/pylibraft/common/interruptible.pyx b/python/pylibraft/pylibraft/common/interruptible.pyx index bb5415428f..c489f2ee20 100644 --- a/python/pylibraft/pylibraft/common/interruptible.pyx +++ b/python/pylibraft/pylibraft/common/interruptible.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ import signal from cuda.ccudart cimport cudaStream_t from cython.operator cimport dereference -from rmm._lib.cuda_stream_view cimport cuda_stream_view +from rmm.librmm.cuda_stream_view cimport cuda_stream_view from .cuda cimport Stream diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx index 0e488a51ca..9b376f5f0a 100644 --- a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx +++ b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx @@ -52,10 +52,8 @@ from pylibraft.common.handle cimport device_resources from pylibraft.common.handle import auto_sync_handle from pylibraft.common.input_validation import is_c_contiguous -from rmm._lib.memory_resource cimport ( - DeviceMemoryResource, - device_memory_resource, -) +from rmm.librmm.memory_resource cimport device_memory_resource +from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cimport pylibraft.neighbors.cagra.cpp.c_cagra as c_cagra from pylibraft.common.optional cimport make_optional, optional diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd b/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd index 1dffd40186..75ace7f1a8 100644 --- a/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd +++ b/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd @@ -27,7 +27,7 @@ from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t from libcpp cimport bool, nullptr from libcpp.string cimport string -from rmm._lib.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, diff --git a/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd b/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd index 5f6a83a9dc..f513517868 100644 --- a/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd +++ b/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd @@ -28,7 +28,7 @@ from libcpp cimport bool, nullptr from libcpp.string cimport string from libcpp.vector cimport vector -from rmm._lib.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, diff --git a/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd b/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd index 531c0dc2c1..d544797119 100644 --- a/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd +++ b/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd @@ -28,7 +28,7 @@ from libcpp cimport bool, nullptr from libcpp.string cimport string from libcpp.vector cimport vector -from rmm._lib.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd b/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd index a281d33310..22b08b3f19 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uintptr_t from libcpp cimport bool, nullptr from libcpp.string cimport string -from rmm._lib.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx index d8fbdc74da..6826b2bc59 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -51,10 +51,8 @@ from pylibraft.common.handle cimport device_resources from pylibraft.common.handle import auto_sync_handle from pylibraft.common.input_validation import is_c_contiguous -from rmm._lib.memory_resource cimport ( - DeviceMemoryResource, - device_memory_resource, -) +from rmm.librmm.memory_resource cimport device_memory_resource +from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cimport pylibraft.neighbors.ivf_flat.cpp.c_ivf_flat as c_ivf_flat from pylibraft.common.cpp.optional cimport optional diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 895abbadca..18319bf452 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -27,7 +27,7 @@ from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uintptr_t from libcpp cimport bool, nullptr from libcpp.string cimport string -from rmm._lib.memory_resource cimport device_memory_resource +from rmm.librmm.memory_resource cimport device_memory_resource from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 5b89f0d9a5..f467957fd6 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -44,10 +44,8 @@ from pylibraft.common.handle cimport device_resources from pylibraft.common.handle import auto_sync_handle from pylibraft.common.input_validation import is_c_contiguous -from rmm._lib.memory_resource cimport ( - DeviceMemoryResource, - device_memory_resource, -) +from rmm.librmm.memory_resource cimport device_memory_resource +from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cimport pylibraft.neighbors.ivf_flat.cpp.c_ivf_flat as c_ivf_flat cimport pylibraft.neighbors.ivf_pq.cpp.c_ivf_pq as c_ivf_pq From ec12aba62032e2075ba10bdd40cf863937eca7fd Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 9 Oct 2024 09:39:25 -0400 Subject: [PATCH 22/28] Update Changelog [skip ci] --- CHANGELOG.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0685145dca..9caa5ef571 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,48 @@ +# raft 24.10.00 (9 Oct 2024) + +## 🚨 Breaking Changes + +- [Feat] add `repeat`, `sparsity`, `eval_n_elements` APIs to `bitset` ([#2439](https://github.com/rapidsai/raft/pull/2439)) [@rhdong](https://github.com/rhdong) + +## 🐛 Bug Fixes + +- Disable NN Descent Batch tests temporarily ([#2453](https://github.com/rapidsai/raft/pull/2453)) [@divyegala](https://github.com/divyegala) +- Fix sed syntax in `update-version.sh` ([#2441](https://github.com/rapidsai/raft/pull/2441)) [@raydouglass](https://github.com/raydouglass) +- Use runtime check of cudart version for eig ([#2430](https://github.com/rapidsai/raft/pull/2430)) [@lowener](https://github.com/lowener) +- [BUG] Fix bitset function visibility ([#2429](https://github.com/rapidsai/raft/pull/2429)) [@lowener](https://github.com/lowener) +- Exclude any kernel symbol that uses cutlass ([#2425](https://github.com/rapidsai/raft/pull/2425)) [@robertmaynard](https://github.com/robertmaynard) + +## 🚀 New Features + +- [Feat] add `repeat`, `sparsity`, `eval_n_elements` APIs to `bitset` ([#2439](https://github.com/rapidsai/raft/pull/2439)) [@rhdong](https://github.com/rhdong) +- [Opt] Enforce the UT Coverity and add benchmark for `transpose` ([#2438](https://github.com/rapidsai/raft/pull/2438)) [@rhdong](https://github.com/rhdong) +- [FEA] Support for half-float mixed precise in brute-force ([#2382](https://github.com/rapidsai/raft/pull/2382)) [@rhdong](https://github.com/rhdong) + +## 🛠️ Improvements + +- bump NCCL floor to 2.19 ([#2458](https://github.com/rapidsai/raft/pull/2458)) [@jameslamb](https://github.com/jameslamb) +- Deprecating vector search APIs and updating README accordingly ([#2448](https://github.com/rapidsai/raft/pull/2448)) [@cjnolet](https://github.com/cjnolet) +- Update update-version.sh to use packaging lib ([#2447](https://github.com/rapidsai/raft/pull/2447)) [@AyodeAwe](https://github.com/AyodeAwe) +- Switch traceback to `native` ([#2446](https://github.com/rapidsai/raft/pull/2446)) [@galipremsagar](https://github.com/galipremsagar) +- bump NCCL floor to 2.18.1.1 ([#2443](https://github.com/rapidsai/raft/pull/2443)) [@jameslamb](https://github.com/jameslamb) +- Add missing `cuda_suffixed: true` ([#2440](https://github.com/rapidsai/raft/pull/2440)) [@trxcllnt](https://github.com/trxcllnt) +- Use CI workflow branch 'branch-24.10' again ([#2437](https://github.com/rapidsai/raft/pull/2437)) [@jameslamb](https://github.com/jameslamb) +- Update to flake8 7.1.1. ([#2435](https://github.com/rapidsai/raft/pull/2435)) [@bdice](https://github.com/bdice) +- Update fmt (to 11.0.2) and spdlog (to 1.14.1). ([#2433](https://github.com/rapidsai/raft/pull/2433)) [@jameslamb](https://github.com/jameslamb) +- Allow coo_sort to work on int64_t indices ([#2432](https://github.com/rapidsai/raft/pull/2432)) [@benfred](https://github.com/benfred) +- Adding NCCL clique to the RAFT handle ([#2431](https://github.com/rapidsai/raft/pull/2431)) [@viclafargue](https://github.com/viclafargue) +- Add support for Python 3.12 ([#2428](https://github.com/rapidsai/raft/pull/2428)) [@jameslamb](https://github.com/jameslamb) +- Update rapidsai/pre-commit-hooks ([#2420](https://github.com/rapidsai/raft/pull/2420)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Drop Python 3.9 support ([#2417](https://github.com/rapidsai/raft/pull/2417)) [@jameslamb](https://github.com/jameslamb) +- Use CUDA math wheels ([#2415](https://github.com/rapidsai/raft/pull/2415)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Remove NumPy <2 pin ([#2414](https://github.com/rapidsai/raft/pull/2414)) [@seberg](https://github.com/seberg) +- Update pre-commit hooks ([#2409](https://github.com/rapidsai/raft/pull/2409)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Improve update-version.sh ([#2408](https://github.com/rapidsai/raft/pull/2408)) [@bdice](https://github.com/bdice) +- Use tool.scikit-build.cmake.version, set scikit-build-core minimum-version ([#2406](https://github.com/rapidsai/raft/pull/2406)) [@jameslamb](https://github.com/jameslamb) +- [FEA] Batching NN Descent ([#2403](https://github.com/rapidsai/raft/pull/2403)) [@jinsolp](https://github.com/jinsolp) +- Update pip devcontainers to UCX v1.17.0 ([#2401](https://github.com/rapidsai/raft/pull/2401)) [@jameslamb](https://github.com/jameslamb) +- Merge branch-24.08 into branch-24.10 ([#2397](https://github.com/rapidsai/raft/pull/2397)) [@jameslamb](https://github.com/jameslamb) + # raft 24.08.00 (7 Aug 2024) ## 🚨 Breaking Changes From 907edb8261a94a06f8857d0a10f2ec9abcd1ae42 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 11 Oct 2024 08:53:52 -0500 Subject: [PATCH 23/28] make package installations in CI stricter (#2467) Contributes to https://github.com/rapidsai/build-planning/issues/106 Proposes specifying the RAPIDS version in `conda install` calls in CI that install CI artifacts, to reduce the risk of CI jobs picking up artifacts from other releases. Also proposes combining together successive `pip install` calls. `pip install AB` is safer than `pip install A; pip install B` because `pip` doesn't take the current set of installed packages into consideration when it installs new packages. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/raft/pull/2467 --- ci/build_docs.sh | 16 ++++++++-------- ci/test_cpp.sh | 6 +++++- ci/test_python.sh | 7 ++++++- ci/test_wheel_raft_dask.sh | 5 +++-- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index a2447f5f06..aff7674892 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -6,6 +6,9 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" +export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + rapids-dependency-file-generator \ --output conda \ --file-key docs \ @@ -23,14 +26,11 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - libraft \ - libraft-headers \ - pylibraft \ - raft-dask + "libraft=${RAPIDS_VERSION}" \ + "libraft-headers=${RAPIDS_VERSION}" \ + "pylibraft=${RAPIDS_VERSION}" \ + "raft-dask=${RAPIDS_VERSION}" -export RAPIDS_VERSION="$(rapids-version)" -export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" -export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build CPP docs" @@ -45,4 +45,4 @@ mkdir -p "${RAPIDS_DOCS_DIR}/raft/"html mv _html/* "${RAPIDS_DOCS_DIR}/raft/html" popd -rapids-upload-docs +RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 05323e4f5d..9d0edc6b21 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-logger "Generate C++ testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -29,7 +31,9 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ - libraft-headers libraft libraft-tests + "libraft-headers=${RAPIDS_VERSION}" \ + "libraft=${RAPIDS_VERSION}" \ + "libraft-tests=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_python.sh b/ci/test_python.sh index 01e5ac9456..af93d2e04b 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,6 +8,8 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -34,7 +36,10 @@ rapids-print-env rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - libraft libraft-headers pylibraft raft-dask + "libraft=${RAPIDS_VERSION}" \ + "libraft-headers=${RAPIDS_VERSION}" \ + "pylibraft=${RAPIDS_VERSION}" \ + "raft-dask=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index 9b1187592d..a778a3ec51 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -9,10 +9,11 @@ RAPIDS_PY_WHEEL_NAME="raft_dask_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels # Download the pylibraft built in the previous step RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibraft-dep -python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install -v "$(echo ./dist/raft_dask_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" +python -m pip install -v \ + ./local-pylibraft-dep/pylibraft*.whl \ + "$(echo ./dist/raft_dask_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" test_dir="python/raft-dask/raft_dask/test" From 714e07b3124e41180a88041402a341ccab519f10 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 21 Oct 2024 14:24:25 -0500 Subject: [PATCH 24/28] Use Python for computation. (#2474) --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index a77dd188f4..bd60914517 100755 --- a/build.sh +++ b/build.sh @@ -463,14 +463,14 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has if [[ ${CACHE_TOOL} == "sccache" && -x "$(command -v sccache)" ]]; then COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }') CACHE_HITS=$(sccache -s | grep "Cache hits \+ [0-9]\+$" | awk '{ print $NF }') - HIT_RATE=$(echo - | awk "{printf \"%.2f\n\", $CACHE_HITS / $COMPILE_REQUESTS * 100}") + HIT_RATE=$(python3 -c "print(f'{${CACHE_HITS} / ${COMPILE_REQUESTS}:.2f}' if ${COMPILE_REQUESTS} else 'nan')") MSG="${MSG}
cache hit rate ${HIT_RATE} %" elif [[ ${CACHE_TOOL} == "ccache" && -x "$(command -v ccache)" ]]; then CACHE_STATS_LINE=$(ccache -s | grep "Hits: \+ [0-9]\+ / [0-9]\+" | tail -n1) if [[ ! -z "$CACHE_STATS_LINE" ]]; then CACHE_HITS=$(echo "$CACHE_STATS_LINE" - | awk '{ print $2 }') COMPILE_REQUESTS=$(echo "$CACHE_STATS_LINE" - | awk '{ print $4 }') - HIT_RATE=$(echo - | awk "{printf \"%.2f\n\", $CACHE_HITS / $COMPILE_REQUESTS * 100}") + HIT_RATE=$(python3 -c "print(f'{${CACHE_HITS} / ${COMPILE_REQUESTS}:.2f}' if ${COMPILE_REQUESTS} else 'nan')") MSG="${MSG}
cache hit rate ${HIT_RATE} %" fi fi From 8dc8245be55a757c4ef04565562c134c6455063d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 22 Oct 2024 11:02:27 -0500 Subject: [PATCH 25/28] Use environment variables in cache hit rate computation. (#2475) Follow-up PR to address feedback: https://github.com/rapidsai/raft/pull/2474#discussion_r1809398110 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/raft/pull/2475 --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index bd60914517..feb2d7256e 100755 --- a/build.sh +++ b/build.sh @@ -463,14 +463,14 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has if [[ ${CACHE_TOOL} == "sccache" && -x "$(command -v sccache)" ]]; then COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }') CACHE_HITS=$(sccache -s | grep "Cache hits \+ [0-9]\+$" | awk '{ print $NF }') - HIT_RATE=$(python3 -c "print(f'{${CACHE_HITS} / ${COMPILE_REQUESTS}:.2f}' if ${COMPILE_REQUESTS} else 'nan')") + HIT_RATE=$(COMPILE_REQUESTS="${COMPILE_REQUESTS}" CACHE_HITS="${CACHE_HITS}" python3 -c "import os; print(f'{int(os.getenv(\"CACHE_HITS\")) / int(os.getenv(\"COMPILE_REQUESTS\")):.2f}' if int(os.getenv(\"COMPILE_REQUESTS\")) else 'nan')") MSG="${MSG}
cache hit rate ${HIT_RATE} %" elif [[ ${CACHE_TOOL} == "ccache" && -x "$(command -v ccache)" ]]; then CACHE_STATS_LINE=$(ccache -s | grep "Hits: \+ [0-9]\+ / [0-9]\+" | tail -n1) if [[ ! -z "$CACHE_STATS_LINE" ]]; then CACHE_HITS=$(echo "$CACHE_STATS_LINE" - | awk '{ print $2 }') COMPILE_REQUESTS=$(echo "$CACHE_STATS_LINE" - | awk '{ print $4 }') - HIT_RATE=$(python3 -c "print(f'{${CACHE_HITS} / ${COMPILE_REQUESTS}:.2f}' if ${COMPILE_REQUESTS} else 'nan')") + HIT_RATE=$(COMPILE_REQUESTS="${COMPILE_REQUESTS}" CACHE_HITS="${CACHE_HITS}" python3 -c "import os; print(f'{int(os.getenv(\"CACHE_HITS\")) / int(os.getenv(\"COMPILE_REQUESTS\")):.2f}' if int(os.getenv(\"COMPILE_REQUESTS\")) else 'nan')") MSG="${MSG}
cache hit rate ${HIT_RATE} %" fi fi From 673a5bbd5c968d7768e1677ac8e7f4dc40dfaf8b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 24 Oct 2024 17:25:25 -0400 Subject: [PATCH 26/28] Allow compilation with CUDA 12.6.1 (#2469) The 12.6.1 CUDA compiler has issues with enable_if inside the template arguments of some kernels. We can simplify kernel logic and remove the usage of enable_if. Authors: - Robert Maynard (https://github.com/robertmaynard) - Paul Taylor (https://github.com/trxcllnt) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/raft/pull/2469 --- .../raft/matrix/detail/columnWiseSort.cuh | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/matrix/detail/columnWiseSort.cuh b/cpp/include/raft/matrix/detail/columnWiseSort.cuh index a02621d054..9e94724d3d 100644 --- a/cpp/include/raft/matrix/detail/columnWiseSort.cuh +++ b/cpp/include/raft/matrix/detail/columnWiseSort.cuh @@ -72,12 +72,11 @@ RAFT_KERNEL devOffsetKernel(T* in, T value, int n_times) } // block level radix sort - can only sort as much data we can fit within shared memory -template < - typename InType, - typename OutType, - int BLOCK_SIZE, - int ITEMS_PER_THREAD, - typename std::enable_if::IsValid, InType>::type* = nullptr> +template ::IsValid, bool> = true> RAFT_KERNEL __launch_bounds__(1024, 1) devKeyValSortColumnPerRow(const InType* inputKeys, InType* outputKeys, OutType* inputVals, @@ -120,12 +119,11 @@ RAFT_KERNEL __launch_bounds__(1024, 1) devKeyValSortColumnPerRow(const InType* i } } -template < - typename InType, - typename OutType, - int BLOCK_SIZE, - int ITEMS_PER_THREAD, - typename std::enable_if::IsValid), InType>::type* = nullptr> +template ::IsValid, bool> = true> RAFT_KERNEL devKeyValSortColumnPerRow(const InType* inputKeys, InType* outputKeys, OutType* inputVals, From c877dc7e570fe39f26b82230c51fc72379424f26 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Fri, 25 Oct 2024 19:19:53 +0200 Subject: [PATCH 27/28] devcontainer: replace `VAULT_HOST` with `AWS_ROLE_ARN` (#2472) This PR is replacing the `VAULT_HOST` variable with `AWS_ROLE_ARN`. This is required to use the new token service to get AWS credentials. Authors: - Jordan Jacobelli (https://github.com/jjacobelli) Approvers: - Paul Taylor (https://github.com/trxcllnt) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2472 --- .devcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 594ba8c3c4..dc12ab2ade 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -33,5 +33,5 @@ ENV PYTHONDONTWRITEBYTECODE="1" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" From 41ddbfefa5058d206cc0aeed3f9667782869a94e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 31 Oct 2024 17:14:51 -0500 Subject: [PATCH 28/28] print sccache stats in builds (#2470) Contributes to https://github.com/rapidsai/build-planning/issues/111 Proposes some small packaging/CI changes, matching similar changes being made across RAPIDS. * printing `sccache` stats to CI logs * reducing `pip`'s verbosity in wheel building scripts * updating to the latest `rapids-dependency-file-generator` (v1.16.0) * always explicitly specifying `cpp` / `python` in calls to `rapids-upload-wheels-to-s3` ## Notes for Reviewers This originally also ran wheel builds with `--no-build-isolation`, but I reverted that based on https://github.com/rapidsai/build-planning/issues/108#issuecomment-2436764212. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/raft/pull/2470 --- .pre-commit-config.yaml | 2 +- ci/build_cpp.sh | 4 ++++ ci/build_python.sh | 14 ++++++++++++++ ci/build_wheel.sh | 18 ++++++++++++++---- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 458d8b1b51..5a5342a74e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -117,7 +117,7 @@ repos: cpp/cmake/modules/FindAVX[.]cmake - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.13.11 + rev: v1.16.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index c456bcae80..92586c7c0a 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -15,6 +15,10 @@ rapids-print-env rapids-logger "Begin cpp build" +sccache --zero-stats + RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild conda/recipes/libraft +sccache --show-adv-stats + rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 80d37b5ae3..dc303de4f5 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -22,6 +22,8 @@ git_commit=$(git rev-parse HEAD) export RAPIDS_PACKAGE_VERSION=${version} echo "${version}" > VERSION +sccache --zero-stats + # TODO: Remove `--no-test` flags once importing on a CPU # node works correctly rapids-conda-retry mambabuild \ @@ -29,12 +31,18 @@ rapids-conda-retry mambabuild \ --channel "${CPP_CHANNEL}" \ conda/recipes/pylibraft +sccache --show-adv-stats +sccache --zero-stats + rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/raft-dask +sccache --show-adv-stats +sccache --zero-stats + # Build ann-bench for each cuda and python version rapids-conda-retry mambabuild \ --no-test \ @@ -42,15 +50,21 @@ rapids-conda-retry mambabuild \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/raft-ann-bench +sccache --show-adv-stats + # Build ann-bench-cpu only in CUDA 11 jobs since it only depends on python # version RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then + sccache --zero-stats + rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/raft-ann-bench-cpu + + sccache --show-adv-stats fi rapids-upload-conda-to-s3 python diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index e7ae52f33a..326ee9a4c7 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -16,7 +16,7 @@ source rapids-date-string RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -rapids-generate-version > VERSION +rapids-generate-version > ./VERSION cd "${package_dir}" @@ -39,10 +39,20 @@ case "${RAPIDS_CUDA_VERSION}" in ;; esac -# Hardcode the output dir -python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +sccache --zero-stats + +rapids-logger "Building '${package_name}' wheel" + +python -m pip wheel \ + -w dist \ + -v \ + --no-deps \ + --disable-pip-version-check \ + . + +sccache --show-adv-stats mkdir -p final_dist python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* -RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist +RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist