diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d7e81bcd3a6..093063c21e7 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -52,3 +52,45 @@ jobs: branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} + wheel-build-cudf: + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@main + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cudf + package-dir: python/cudf + skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + wheel-publish-cudf: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-publish.yml@main + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cudf + wheel-build-dask-cudf: + needs: wheel-publish-cudf + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@main + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: dask_cudf + package-dir: python/dask_cudf + wheel-publish-dask-cudf: + needs: wheel-build-dask-cudf + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-publish.yml@main + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: dask_cudf diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index fd910e68854..cb83aab31cd 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -20,6 +20,10 @@ jobs: - conda-python-other-tests - conda-java-tests - conda-notebook-tests + - wheel-build-cudf + - wheel-tests-cudf + - wheel-build-dask-cudf + - wheel-tests-dask-cudf secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-118 checks: @@ -78,3 +82,40 @@ jobs: arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/test_notebooks.sh" + wheel-build-cudf: + needs: checks + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-build.yml@main + with: + build_type: pull-request + package-name: cudf + package-dir: python/cudf + skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" + wheel-tests-cudf: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@main + with: + build_type: pull-request + package-name: cudf + test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" + test-smoketest: "python ./ci/wheel_smoke_test_cudf.py" + wheel-build-dask-cudf: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-build.yml@main + with: + build_type: pull-request + package-name: dask_cudf + package-dir: python/dask_cudf + before-wheel: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf && pip install --no-deps ./local-cudf/cudf*.whl" + wheel-tests-dask-cudf: + needs: wheel-build-dask-cudf + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@main + with: + build_type: pull-request + package-name: dask_cudf + test-before: "RAPIDS_PY_WHEEL_NAME=cudf_cu11 rapids-download-wheels-from-s3 ./local-cudf-dep && pip install --no-deps ./local-cudf-dep/cudf*.whl" + test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d29c30cdb58..2b583773e05 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -65,3 +65,24 @@ jobs: arch: "amd64" container_image: "rapidsai/ci:latest" run_script: "ci/test_notebooks.sh" + wheel-tests-cudf: + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@main + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + package-name: cudf + test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" + test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" + wheel-tests-dask-cudf: + secrets: inherit + uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure-test.yml@main + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + package-name: dask_cudf + test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml deleted file mode 100644 index 7f1c708c9a7..00000000000 --- a/.github/workflows/wheels.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: cuDF wheels - -on: - workflow_call: - inputs: - versioneer-override: - type: string - default: '' - build-tag: - type: string - default: '' - branch: - required: true - type: string - date: - required: true - type: string - sha: - required: true - type: string - build-type: - type: string - default: nightly - -concurrency: - group: "cudf-${{ github.workflow }}-${{ github.ref }}" - cancel-in-progress: true - -jobs: - cudf-wheels: - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main - with: - repo: rapidsai/cudf - - build-type: ${{ inputs.build-type }} - branch: ${{ inputs.branch }} - sha: ${{ inputs.sha }} - date: ${{ inputs.date }} - - package-dir: python/cudf - package-name: cudf - - python-package-versioneer-override: ${{ inputs.versioneer-override }} - python-package-build-tag: ${{ inputs.build-tag }} - - skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF" - - test-extras: test - - # Have to manually specify the cupy install location on arm. - # Have to also manually install tokenizers==0.10.2, which is the last tokenizers - # to have a binary aarch64 wheel available on PyPI - # Otherwise, the tokenizers sdist is used, which needs a Rust compiler - test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64" - - test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests" - secrets: inherit - dask_cudf-wheel: - needs: cudf-wheels - uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@main - with: - repo: rapidsai/cudf - - build-type: ${{ inputs.build-type }} - branch: ${{ inputs.branch }} - sha: ${{ inputs.sha }} - date: ${{ inputs.date }} - - package-dir: python/dask_cudf - package-name: dask_cudf - - python-package-versioneer-override: ${{ inputs.versioneer-override }} - python-package-build-tag: ${{ inputs.build-tag }} - - test-extras: test - test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests" - secrets: inherit diff --git a/ci/wheel_smoke_test_cudf.py b/ci/wheel_smoke_test_cudf.py new file mode 100644 index 00000000000..a11a97039af --- /dev/null +++ b/ci/wheel_smoke_test_cudf.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +import cudf +import pyarrow as pa + +if __name__ == '__main__': + n_legs = pa.array([2, 4, 5, 100]) + animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]) + names = ["n_legs", "animals"] + foo = pa.table([n_legs, animals], names=names) + df = cudf.DataFrame.from_arrow(foo) + assert df.loc[df["animals"] == "Centipede"]["n_legs"].iloc[0] == 100 + assert df.loc[df["animals"] == "Flamingo"]["n_legs"].iloc[0] == 2 diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 67a9e0c206e..943b89238e0 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -25,8 +25,14 @@ include_guard(GLOBAL) # Generate a FindArrow module for the case where we need to search for arrow within a pip install # pyarrow. function(find_libarrow_in_python_wheel PYARROW_VERSION) - string(REPLACE "." "" PYARROW_SO_VER "${PYARROW_VERSION}") - set(PYARROW_LIB libarrow.so.${PYARROW_SO_VER}) + string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}") + list(GET PYARROW_VER_COMPONENTS 0 PYARROW_SO_VER) + # The soname for Arrow libraries is constructed using the major version plus "00". Note that, + # although it may seem like it due to Arrow almost exclusively releasing new major versions (i.e. + # `${MINOR_VERSION}${PATCH_VERSION}` is almost always equivalent to "00"), + # the soname is not generated by concatenating the major, minor, and patch versions into a single + # version number soname, just `${MAJOR_VERSION}00` + set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00") find_package(Python REQUIRED) execute_process( diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 2ac7ef652e2..57de5577987 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -10,7 +10,7 @@ requires = [ "cmake>=3.23.1,!=3.25.0", "ninja", "numpy", - "pyarrow==10", + "pyarrow==10.0.1", "protoc-wheel", "versioneer", ] diff --git a/python/cudf/setup.py b/python/cudf/setup.py index df7c627c3ce..eb09b62cf17 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -19,7 +19,7 @@ "pandas>=1.0,<1.6.0dev0", "protobuf==4.21", "typing_extensions", - "pyarrow==10", + "pyarrow==10.0.1", f"rmm{cuda_suffix}", f"ptxcompiler{cuda_suffix}", f"cubinlinker{cuda_suffix}",