diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4b13c668f618..39adc3c96f37 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -79,5 +79,6 @@ # Runtime /runtime/src/iree/ @benvanik /runtime/src/iree/hal/cts/ @ScottTodd +/runtime/src/iree/hal/drivers/cuda2/ @antiagainst /runtime/src/iree/hal/drivers/metal/ @antiagainst /runtime/src/iree/hal/drivers/vulkan/ @antiagainst @ScottTodd diff --git a/CMakeLists.txt b/CMakeLists.txt index fdcf47507dcc..a0c70f32e6bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,8 +239,10 @@ option(IREE_HAL_DRIVER_DEFAULTS "Sets the default value for all runtime HAL driv # not cross compiling. Note: a CUDA-compatible GPU with drivers is still # required to actually run CUDA workloads. set(IREE_HAL_DRIVER_CUDA_DEFAULT ${IREE_HAL_DRIVER_DEFAULTS}) +set(IREE_HAL_DRIVER_CUDA2_DEFAULT ${IREE_HAL_DRIVER_DEFAULTS}) if(NOT IREE_CUDA_AVAILABLE OR CMAKE_CROSSCOMPILING) set(IREE_HAL_DRIVER_CUDA_DEFAULT OFF) + set(IREE_HAL_DRIVER_CUDA2_DEFAULT OFF) endif() # Vulkan support is enabled by default if the platform might support Vulkan. @@ -258,6 +260,7 @@ if(NOT APPLE OR NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64") endif() option(IREE_HAL_DRIVER_CUDA "Enables the 'cuda' runtime HAL driver" ${IREE_HAL_DRIVER_CUDA_DEFAULT}) +option(IREE_HAL_DRIVER_CUDA2 "Enables the 'cuda2' runtime HAL driver" ${IREE_HAL_DRIVER_CUDA2_DEFAULT}) option(IREE_HAL_DRIVER_LOCAL_SYNC "Enables the 'local-sync' runtime HAL driver" ${IREE_HAL_DRIVER_DEFAULTS}) option(IREE_HAL_DRIVER_LOCAL_TASK "Enables the 'local-task' runtime HAL driver" ${IREE_HAL_DRIVER_DEFAULTS}) option(IREE_HAL_DRIVER_VULKAN "Enables the 'vulkan' runtime HAL driver" ${IREE_HAL_DRIVER_VULKAN_DEFAULT}) @@ -313,6 +316,9 @@ message(STATUS "IREE HAL drivers:") if(IREE_HAL_DRIVER_CUDA) message(STATUS " - cuda") endif() +if(IREE_HAL_DRIVER_CUDA2) + message(STATUS " - cuda2") +endif() if(IREE_HAL_DRIVER_LOCAL_SYNC) message(STATUS " - local-sync") endif() @@ -575,22 +581,6 @@ set(IREE_PACKAGE_ROOT_PREFIX "iree") set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) -#------------------------------------------------------------------------------- -# Experimental next-generation CUDA HAL driver -# Enable with: -DIREE_EXTERNAL_HAL_DRIVERS=cuda2 -#------------------------------------------------------------------------------- - -iree_register_external_hal_driver( - NAME - cuda2 - SOURCE_DIR - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/cuda2" - DRIVER_TARGET - iree::experimental::cuda2::registration - REGISTER_FN - iree_hal_cuda2_driver_module_register -) - #------------------------------------------------------------------------------- # Experimental ROCM HAL driver # Enable with: -DIREE_EXTERNAL_HAL_DRIVERS=rocm diff --git a/experimental/cuda2/registration/CMakeLists.txt b/experimental/cuda2/registration/CMakeLists.txt deleted file mode 100644 index 49b49c7c0477..000000000000 --- a/experimental/cuda2/registration/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_cc_library( - NAME - registration - HDRS - "driver_module.h" - SRCS - "driver_module.c" - DEPS - iree::base - iree::experimental::cuda2 - iree::hal - DEFINES - "IREE_HAVE_HAL_CUDA2_DRIVER_MODULE=1" - PUBLIC -) diff --git a/experimental/cuda2/tests/CMakeLists.txt b/experimental/cuda2/tests/CMakeLists.txt deleted file mode 100644 index 33551b576974..000000000000 --- a/experimental/cuda2/tests/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_add_all_subdirs() diff --git a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt deleted file mode 100644 index 2b1772ef538f..000000000000 --- a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_check_single_backend_test_suite( - NAME - check_cuda2_graph - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/batch_norm_inference.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/bitcast_convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_in_dim.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/compare.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/complex.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/concatenate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/constant.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convolution.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/cosine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/divide.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_bf16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_general.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_update_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_fp16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_minus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/fft.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/finite.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/iota.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log_plus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/multiply.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/philox.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pow.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce_window.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/remainder.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reverse.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_normal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_uniform.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/round.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter_dynamic.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sort.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/subtract.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/three_fry.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/torch_index_select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=stablehlo" - # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now. - "--iree-stream-emulate-memset" - "--iree-hal-cuda-enable-legacy-sync=false" - RUNNER_ARGS - "--cuda2_use_streams=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) - -iree_check_single_backend_test_suite( - NAME - check_cuda2_stream - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/batch_norm_inference.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/bitcast_convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_in_dim.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/compare.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/complex.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/concatenate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/constant.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convolution.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/cosine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/divide.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_bf16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_general.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_update_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_fp16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_minus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/fft.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/finite.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/iota.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log_plus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/multiply.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/philox.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pow.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce_window.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/remainder.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reverse.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_normal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_uniform.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/round.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter_dynamic.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sort.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/subtract.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/three_fry.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/torch_index_select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=stablehlo" - "--iree-hal-cuda-enable-legacy-sync=false" - RUNNER_ARGS - "--cuda2_use_streams=true" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) diff --git a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt deleted file mode 100644 index e82be08f1365..000000000000 --- a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_check_single_backend_test_suite( - NAME - check_cuda2_graph - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/arithmetic_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_and.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_or.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_xor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/ceil.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clz.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/const.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/exp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/fully_connected.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater_equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/if.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_left_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift_16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/matmul.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/max_pool.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul.mlir" - # "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reciprocal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sigmoid.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sub.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/table.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=tosa" - # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now. - "--iree-stream-emulate-memset" - "--iree-hal-cuda-enable-legacy-sync=false" - RUNNER_ARGS - "--cuda2_use_streams=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) - -iree_check_single_backend_test_suite( - NAME - check_cuda2_stream - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/arithmetic_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_and.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_or.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_xor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/ceil.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clz.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/const.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/exp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/fully_connected.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater_equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/if.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_left_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift_16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/matmul.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/max_pool.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul.mlir" - # "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reciprocal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sigmoid.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sub.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/table.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=tosa" - "--iree-hal-cuda-enable-legacy-sync=false" - RUNNER_ARGS - "--cuda2_use_streams=true" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) diff --git a/runtime/src/iree/hal/drivers/BUILD.bazel b/runtime/src/iree/hal/drivers/BUILD.bazel index f7b05ddf7d83..4381637a8506 100644 --- a/runtime/src/iree/hal/drivers/BUILD.bazel +++ b/runtime/src/iree/hal/drivers/BUILD.bazel @@ -56,7 +56,10 @@ iree_runtime_cc_library( "//runtime/src/iree/base", "//runtime/src/iree/hal", ] + select({ - ":cuda_enabled": ["//runtime/src/iree/hal/drivers/cuda/registration"], + ":cuda_enabled": [ + "//runtime/src/iree/hal/drivers/cuda/registration", + "//runtime/src/iree/hal/drivers/cuda2/registration", + ], "//conditions:default": [], }) + select({ diff --git a/runtime/src/iree/hal/drivers/CMakeLists.txt b/runtime/src/iree/hal/drivers/CMakeLists.txt index 6309da90bd4e..560c93c09486 100644 --- a/runtime/src/iree/hal/drivers/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/CMakeLists.txt @@ -128,6 +128,10 @@ if(IREE_HAL_DRIVER_CUDA) add_subdirectory(cuda) list(APPEND _INIT_INTERNAL_DEPS iree::hal::drivers::cuda::registration) endif() +if(IREE_HAL_DRIVER_CUDA2) + add_subdirectory(cuda2) + list(APPEND _INIT_INTERNAL_DEPS iree::hal::drivers::cuda2::registration) +endif() if(IREE_HAL_DRIVER_LOCAL_SYNC) add_subdirectory(local_sync) list(APPEND _INIT_INTERNAL_DEPS iree::hal::drivers::local_sync::registration) diff --git a/runtime/src/iree/hal/drivers/cuda2/BUILD.bazel b/runtime/src/iree/hal/drivers/cuda2/BUILD.bazel new file mode 100644 index 000000000000..e583c52cfa94 --- /dev/null +++ b/runtime/src/iree/hal/drivers/cuda2/BUILD.bazel @@ -0,0 +1,116 @@ +# Copyright 2023 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_runtime_cc_library( + name = "cuda2", + srcs = [ + "api.h", + "cuda_allocator.c", + "cuda_allocator.h", + "cuda_buffer.c", + "cuda_buffer.h", + "cuda_device.c", + "cuda_device.h", + "cuda_driver.c", + "event_pool.c", + "event_pool.h", + "event_semaphore.c", + "event_semaphore.h", + "graph_command_buffer.c", + "graph_command_buffer.h", + "memory_pools.c", + "memory_pools.h", + "native_executable.c", + "native_executable.h", + "nccl_channel.c", + "nccl_channel.h", + "nop_executable_cache.c", + "nop_executable_cache.h", + "pending_queue_actions.c", + "pending_queue_actions.h", + "pipeline_layout.c", + "pipeline_layout.h", + "stream_command_buffer.c", + "stream_command_buffer.h", + "timepoint_pool.c", + "timepoint_pool.h", + "tracing.c", + "tracing.h", + ], + hdrs = [ + "api.h", + ], + deps = [ + ":dynamic_symbols", + "//runtime/src/iree/base", + "//runtime/src/iree/base:core_headers", + "//runtime/src/iree/base/internal", + "//runtime/src/iree/base/internal:arena", + "//runtime/src/iree/base/internal:atomic_slist", + "//runtime/src/iree/base/internal:event_pool", + "//runtime/src/iree/base/internal:synchronization", + "//runtime/src/iree/base/internal:threading", + "//runtime/src/iree/base/internal/flatcc:parsing", + "//runtime/src/iree/hal", + "//runtime/src/iree/hal/utils:collective_batch", + "//runtime/src/iree/hal/utils:deferred_command_buffer", + "//runtime/src/iree/hal/utils:file_transfer", + "//runtime/src/iree/hal/utils:memory_file", + "//runtime/src/iree/hal/utils:resource_set", + "//runtime/src/iree/hal/utils:semaphore_base", + "//runtime/src/iree/schemas:cuda_executable_def_c_fbs", + ], +) + +iree_runtime_cc_library( + name = "dynamic_symbols", + srcs = [ + "cuda_dynamic_symbols.c", + "cuda_headers.h", + "cuda_status_util.c", + "nccl_dynamic_symbols.c", + "nccl_headers.h", + "nccl_status_util.c", + ], + hdrs = [ + "cuda_dynamic_symbols.h", + "cuda_status_util.h", + "nccl_dynamic_symbols.h", + "nccl_status_util.h", + ], + textual_hdrs = [ + "cuda_dynamic_symbol_table.h", + "nccl_dynamic_symbol_table.h", + ], + deps = [ + "//runtime/src/iree/base", + "//runtime/src/iree/base/internal:dynamic_library", + "@iree_cuda//:headers", + "@nccl//:headers", + ], +) + +iree_runtime_cc_test( + name = "dynamic_symbols_test", + srcs = [ + "dynamic_symbols_test.cc", + ], + tags = ["driver=cuda2"], + deps = [ + ":dynamic_symbols", + "//runtime/src/iree/base", + "//runtime/src/iree/testing:gtest", + "//runtime/src/iree/testing:gtest_main", + ], +) diff --git a/experimental/cuda2/CMakeLists.txt b/runtime/src/iree/hal/drivers/cuda2/CMakeLists.txt similarity index 69% rename from experimental/cuda2/CMakeLists.txt rename to runtime/src/iree/hal/drivers/cuda2/CMakeLists.txt index 29e3dab2b094..96831b4b7ed3 100644 --- a/experimental/cuda2/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/cuda2/CMakeLists.txt @@ -1,12 +1,12 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# Set the root for package namespacing to the current directory. -set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}") -set(IREE_PACKAGE_ROOT_PREFIX "iree/experimental/cuda2") +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# runtime/src/iree/hal/drivers/cuda2/BUILD.bazel # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ iree_add_all_subdirs() @@ -34,10 +34,10 @@ iree_cc_library( "memory_pools.h" "native_executable.c" "native_executable.h" - "nop_executable_cache.c" - "nop_executable_cache.h" "nccl_channel.c" "nccl_channel.h" + "nop_executable_cache.c" + "nop_executable_cache.h" "pending_queue_actions.c" "pending_queue_actions.h" "pipeline_layout.c" @@ -51,10 +51,13 @@ iree_cc_library( DEPS ::dynamic_symbols iree::base + iree::base::core_headers iree::base::internal iree::base::internal::arena + iree::base::internal::atomic_slist iree::base::internal::event_pool iree::base::internal::flatcc::parsing + iree::base::internal::synchronization iree::base::internal::threading iree::hal iree::hal::utils::collective_batch @@ -106,3 +109,5 @@ iree_cc_test( LABELS "driver=cuda2" ) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/experimental/cuda2/README.md b/runtime/src/iree/hal/drivers/cuda2/README.md similarity index 98% rename from experimental/cuda2/README.md rename to runtime/src/iree/hal/drivers/cuda2/README.md index f2a497a7b0a0..b53cfd00689a 100644 --- a/experimental/cuda2/README.md +++ b/runtime/src/iree/hal/drivers/cuda2/README.md @@ -3,10 +3,10 @@ This document lists technical details regarding the CUDA implemenation of IREE's [Hardware Abstraction Layer (HAL)][iree-hal], called a CUDA HAL driver. -Note that there is an existing CUDA HAL driver under the +Note that there is another CUDA HAL driver under the [`iree/hal/drivers/cuda/`][iree-cuda] directory; what this directory holds is a rewrite for it. Once this rewrite is mature enough, it will replace the -existing one. For the rewrite rationale, goals, and plans, please see +other one. For the rewrite rationale, goals, and plans, please see [Issue #13245][iree-cuda-rewrite]. ## Synchronization diff --git a/experimental/cuda2/api.h b/runtime/src/iree/hal/drivers/cuda2/api.h similarity index 97% rename from experimental/cuda2/api.h rename to runtime/src/iree/hal/drivers/cuda2/api.h index 403b3dcf7ee6..f2193c9f5ae7 100644 --- a/experimental/cuda2/api.h +++ b/runtime/src/iree/hal/drivers/cuda2/api.h @@ -6,8 +6,8 @@ // See iree/base/api.h for documentation on the API conventions used. -#ifndef IREE_EXPERIMENTAL_CUDA2_API_H_ -#define IREE_EXPERIMENTAL_CUDA2_API_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_API_H_ +#define IREE_HAL_DRIVERS_CUDA2_API_H_ #include "iree/base/api.h" #include "iree/hal/api.h" @@ -128,4 +128,4 @@ IREE_API_EXPORT iree_status_t iree_hal_cuda2_driver_create( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_API_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_API_H_ diff --git a/experimental/cuda2/cts/CMakeLists.txt b/runtime/src/iree/hal/drivers/cuda2/cts/CMakeLists.txt similarity index 82% rename from experimental/cuda2/cts/CMakeLists.txt rename to runtime/src/iree/hal/drivers/cuda2/cts/CMakeLists.txt index e48f470380c1..58e5411e6483 100644 --- a/experimental/cuda2/cts/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/cuda2/cts/CMakeLists.txt @@ -10,7 +10,7 @@ iree_hal_cts_test_suite( VARIANT_SUFFIX graph DRIVER_REGISTRATION_HDR - "experimental/cuda2/registration/driver_module.h" + "runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h" DRIVER_REGISTRATION_FN "iree_hal_cuda2_driver_module_register" COMPILER_TARGET_BACKEND @@ -20,7 +20,7 @@ iree_hal_cts_test_suite( ARGS "--cuda2_use_streams=false" DEPS - iree::experimental::cuda2::registration + iree::hal::drivers::cuda2::registration EXCLUDED_TESTS # HAL event is unimplemented for now. "event" @@ -35,7 +35,7 @@ iree_hal_cts_test_suite( VARIANT_SUFFIX stream DRIVER_REGISTRATION_HDR - "experimental/cuda2/registration/driver_module.h" + "runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h" DRIVER_REGISTRATION_FN "iree_hal_cuda2_driver_module_register" COMPILER_TARGET_BACKEND @@ -45,7 +45,7 @@ iree_hal_cts_test_suite( ARGS "--cuda2_use_streams=true" DEPS - iree::experimental::cuda2::registration + iree::hal::drivers::cuda2::registration EXCLUDED_TESTS # HAL event is unimplemented for now. "event" diff --git a/experimental/cuda2/cuda_allocator.c b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.c similarity index 99% rename from experimental/cuda2/cuda_allocator.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_allocator.c index 88550e6090a4..cf3452f286a0 100644 --- a/experimental/cuda2/cuda_allocator.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_allocator.h" +#include "iree/hal/drivers/cuda2/cuda_allocator.h" #include -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING static const char* IREE_HAL_CUDA_ALLOCATOR_ID = "CUDA2 unpooled"; diff --git a/experimental/cuda2/cuda_allocator.h b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.h similarity index 79% rename from experimental/cuda2/cuda_allocator.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_allocator.h index 8fb1bf193ad5..767951072e8d 100644 --- a/experimental/cuda2/cuda_allocator.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_CUDA_ALLOCATOR_H_ -#define EXPERIMENTAL_CUDA2_CUDA_ALLOCATOR_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_ALLOCATOR_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_ALLOCATOR_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/memory_pools.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/memory_pools.h" #ifdef __cplusplus extern "C" { @@ -30,4 +30,4 @@ iree_status_t iree_hal_cuda2_allocator_create( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_CUDA_ALLOCATOR_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_ALLOCATOR_H_ diff --git a/experimental/cuda2/cuda_buffer.c b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.c similarity index 99% rename from experimental/cuda2/cuda_buffer.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_buffer.c index d1d017fed202..578aeb5861a6 100644 --- a/experimental/cuda2/cuda_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.c @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" #include #include diff --git a/experimental/cuda2/cuda_buffer.h b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.h similarity index 93% rename from experimental/cuda2/cuda_buffer.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_buffer.h index dfeed297a5a9..075776d2a9e4 100644 --- a/experimental/cuda2/cuda_buffer.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_CUDA_BUFFER_H_ -#define EXPERIMENTAL_CUDA2_CUDA_BUFFER_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_BUFFER_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_BUFFER_H_ -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -66,4 +66,4 @@ void iree_hal_cuda2_buffer_drop_release_callback(iree_hal_buffer_t* buffer); } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_CUDA_BUFFER_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_BUFFER_H_ diff --git a/experimental/cuda2/cuda_device.c b/runtime/src/iree/hal/drivers/cuda2/cuda_device.c similarity index 97% rename from experimental/cuda2/cuda_device.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_device.c index 9bf052fc5285..ac519a2f3104 100644 --- a/experimental/cuda2/cuda_device.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_device.c @@ -4,30 +4,30 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_device.h" +#include "iree/hal/drivers/cuda2/cuda_device.h" #include #include #include -#include "experimental/cuda2/cuda_allocator.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/event_pool.h" -#include "experimental/cuda2/event_semaphore.h" -#include "experimental/cuda2/graph_command_buffer.h" -#include "experimental/cuda2/memory_pools.h" -#include "experimental/cuda2/nccl_channel.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/nop_executable_cache.h" -#include "experimental/cuda2/pending_queue_actions.h" -#include "experimental/cuda2/pipeline_layout.h" -#include "experimental/cuda2/stream_command_buffer.h" -#include "experimental/cuda2/timepoint_pool.h" -#include "experimental/cuda2/tracing.h" #include "iree/base/internal/arena.h" #include "iree/base/internal/event_pool.h" #include "iree/base/internal/math.h" +#include "iree/hal/drivers/cuda2/cuda_allocator.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/event_pool.h" +#include "iree/hal/drivers/cuda2/event_semaphore.h" +#include "iree/hal/drivers/cuda2/graph_command_buffer.h" +#include "iree/hal/drivers/cuda2/memory_pools.h" +#include "iree/hal/drivers/cuda2/nccl_channel.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nop_executable_cache.h" +#include "iree/hal/drivers/cuda2/pending_queue_actions.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" +#include "iree/hal/drivers/cuda2/stream_command_buffer.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" +#include "iree/hal/drivers/cuda2/tracing.h" #include "iree/hal/utils/deferred_command_buffer.h" #include "iree/hal/utils/file_transfer.h" #include "iree/hal/utils/memory_file.h" diff --git a/experimental/cuda2/cuda_device.h b/runtime/src/iree/hal/drivers/cuda2/cuda_device.h similarity index 87% rename from experimental/cuda2/cuda_device.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_device.h index 39d86e877da6..4c32cc7c7e3a 100644 --- a/experimental/cuda2/cuda_device.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_device.h @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_CUDA_DEVICE_H_ -#define EXPERIMENTAL_CUDA2_CUDA_DEVICE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_DEVICE_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_DEVICE_H_ -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -55,4 +55,4 @@ const iree_hal_cuda2_dynamic_symbols_t* iree_hal_cuda2_device_dynamic_symbols( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_CUDA_DEVICE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_DEVICE_H_ diff --git a/experimental/cuda2/cuda_driver.c b/runtime/src/iree/hal/drivers/cuda2/cuda_driver.c similarity index 98% rename from experimental/cuda2/cuda_driver.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_driver.c index 4fab5596185f..79632a3c02b1 100644 --- a/experimental/cuda2/cuda_driver.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_driver.c @@ -7,14 +7,14 @@ #include #include -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_device.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/nccl_status_util.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_device.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" // Maximum device name length supported by the CUDA HAL driver. #define IREE_HAL_CUDA_MAX_DEVICE_NAME_LENGTH 128 diff --git a/experimental/cuda2/cuda_dynamic_symbol_table.h b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h similarity index 100% rename from experimental/cuda2/cuda_dynamic_symbol_table.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h diff --git a/experimental/cuda2/cuda_dynamic_symbols.c b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.c similarity index 94% rename from experimental/cuda2/cuda_dynamic_symbols.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.c index fa6580969eed..643fac1a243b 100644 --- a/experimental/cuda2/cuda_dynamic_symbols.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.c @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #include -#include "experimental/cuda2/cuda_status_util.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" static const char* iree_hal_cuda_dylib_names[] = { #if defined(IREE_PLATFORM_WINDOWS) @@ -43,7 +43,7 @@ static iree_status_t iree_hal_cuda2_dynamic_symbols_resolve_all( CU_GET_PROC_ADDRESS_DEFAULT), \ "when resolving " #cuda_symbol_name " using cuGetProcAddress"); \ } -#include "experimental/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: keep +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: keep #undef IREE_CU_PFN_DECL return iree_ok_status(); } diff --git a/experimental/cuda2/cuda_dynamic_symbols.h b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.h similarity index 84% rename from experimental/cuda2/cuda_dynamic_symbols.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.h index 5bed39251532..c8e305a23c35 100644 --- a/experimental/cuda2/cuda_dynamic_symbols.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ -#define IREE_EXPERIMENTAL_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -28,7 +28,7 @@ typedef struct iree_hal_cuda2_dynamic_symbols_t { // Concrete CUDA symbols defined by including the `dynamic_symbol_tables.h`. #define IREE_CU_PFN_DECL(cudaSymbolName, ...) \ CUresult (*cudaSymbolName)(__VA_ARGS__); -#include "experimental/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: export +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: export #undef IREE_CU_PFN_DECL } iree_hal_cuda2_dynamic_symbols_t; @@ -49,4 +49,4 @@ void iree_hal_cuda2_dynamic_symbols_deinitialize( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ diff --git a/experimental/cuda2/cuda_headers.h b/runtime/src/iree/hal/drivers/cuda2/cuda_headers.h similarity index 64% rename from experimental/cuda2/cuda_headers.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_headers.h index 20ed2686595c..7f8dd8ed19f9 100644 --- a/experimental/cuda2/cuda_headers.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_headers.h @@ -4,9 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_CUDA_HEADERS_H_ -#define IREE_EXPERIMENTAL_CUDA2_CUDA_HEADERS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_HEADERS_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_HEADERS_H_ #include "cuda.h" // IWYU pragma: export -#endif // IREE_EXPERIMENTAL_CUDA2_CUDA_HEADERS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_HEADERS_H_ diff --git a/experimental/cuda2/cuda_status_util.c b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.c similarity index 98% rename from experimental/cuda2/cuda_status_util.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_status_util.c index c8d57b50377b..b6abec1b6a3f 100644 --- a/experimental/cuda2/cuda_status_util.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.c @@ -4,12 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "iree/base/status.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" // The list of CUDA error strings with their corresponding IREE error state // classification. diff --git a/experimental/cuda2/cuda_status_util.h b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.h similarity index 91% rename from experimental/cuda2/cuda_status_util.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_status_util.h index 75c6057ad7bf..2c0fe4e08fbb 100644 --- a/experimental/cuda2/cuda_status_util.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_CUDA_STATUS_UTIL_H_ -#define IREE_EXPERIMENTAL_CUDA2_CUDA_STATUS_UTIL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_STATUS_UTIL_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_STATUS_UTIL_H_ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -65,4 +65,4 @@ iree_status_t iree_hal_cuda2_result_to_status( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_CUDA_STATUS_UTIL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_STATUS_UTIL_H_ diff --git a/experimental/cuda2/dynamic_symbols_test.cc b/runtime/src/iree/hal/drivers/cuda2/dynamic_symbols_test.cc similarity index 95% rename from experimental/cuda2/dynamic_symbols_test.cc rename to runtime/src/iree/hal/drivers/cuda2/dynamic_symbols_test.cc index 9a40e3986227..b1bd4556edce 100644 --- a/experimental/cuda2/dynamic_symbols_test.cc +++ b/runtime/src/iree/hal/drivers/cuda2/dynamic_symbols_test.cc @@ -6,9 +6,9 @@ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #include "iree/testing/gtest.h" namespace iree { diff --git a/experimental/cuda2/event_pool.c b/runtime/src/iree/hal/drivers/cuda2/event_pool.c similarity index 98% rename from experimental/cuda2/event_pool.c rename to runtime/src/iree/hal/drivers/cuda2/event_pool.c index 478ecda33909..19ec51a49f94 100644 --- a/experimental/cuda2/event_pool.c +++ b/runtime/src/iree/hal/drivers/cuda2/event_pool.c @@ -4,18 +4,18 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/event_pool.h" +#include "iree/hal/drivers/cuda2/event_pool.h" #include #include #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" #include "iree/base/api.h" #include "iree/base/internal/atomics.h" #include "iree/base/internal/synchronization.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" //===----------------------------------------------------------------------===// // iree_hal_cuda2_event_t diff --git a/experimental/cuda2/event_pool.h b/runtime/src/iree/hal/drivers/cuda2/event_pool.h similarity index 93% rename from experimental/cuda2/event_pool.h rename to runtime/src/iree/hal/drivers/cuda2/event_pool.h index 3a81c404aa3f..3ad20739acc1 100644 --- a/experimental/cuda2/event_pool.h +++ b/runtime/src/iree/hal/drivers/cuda2/event_pool.h @@ -4,11 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_EVENT_POOL_H_ -#define EXPERIMENTAL_CUDA2_EVENT_POOL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_EVENT_POOL_H_ +#define IREE_HAL_DRIVERS_CUDA2_EVENT_POOL_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -77,4 +77,4 @@ iree_status_t iree_hal_cuda2_event_pool_acquire( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_EVENT_POOL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_EVENT_POOL_H_ diff --git a/experimental/cuda2/event_semaphore.c b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.c similarity index 98% rename from experimental/cuda2/event_semaphore.c rename to runtime/src/iree/hal/drivers/cuda2/event_semaphore.c index a01af41eb9bf..93e80f2cb4d4 100644 --- a/experimental/cuda2/event_semaphore.c +++ b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.c @@ -4,13 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/event_semaphore.h" +#include "iree/hal/drivers/cuda2/event_semaphore.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/timepoint_pool.h" #include "iree/base/internal/synchronization.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" #include "iree/hal/utils/semaphore_base.h" typedef struct iree_hal_cuda2_semaphore_t { diff --git a/experimental/cuda2/event_semaphore.h b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.h similarity index 85% rename from experimental/cuda2/event_semaphore.h rename to runtime/src/iree/hal/drivers/cuda2/event_semaphore.h index ec49704bd96c..2c73e3d5a16e 100644 --- a/experimental/cuda2/event_semaphore.h +++ b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.h @@ -4,16 +4,16 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_EVENT_SEMAPHORE_H_ -#define EXPERIMENTAL_CUDA2_EVENT_SEMAPHORE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_EVENT_SEMAPHORE_H_ +#define IREE_HAL_DRIVERS_CUDA2_EVENT_SEMAPHORE_H_ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/pending_queue_actions.h" -#include "experimental/cuda2/timepoint_pool.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/pending_queue_actions.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" #ifdef __cplusplus extern "C" { @@ -53,4 +53,4 @@ iree_status_t iree_hal_cuda2_event_semaphore_acquire_timepoint_device_wait( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_EVENT_SEMAPHORE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_EVENT_SEMAPHORE_H_ diff --git a/experimental/cuda2/graph_command_buffer.c b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.c similarity index 99% rename from experimental/cuda2/graph_command_buffer.c rename to runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.c index fd4310fc1ce8..19990d8c9e7b 100644 --- a/experimental/cuda2/graph_command_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.c @@ -4,17 +4,17 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/graph_command_buffer.h" +#include "iree/hal/drivers/cuda2/graph_command_buffer.h" #include #include -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/native_executable.h" -#include "experimental/cuda2/pipeline_layout.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/native_executable.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" #include "iree/hal/utils/collective_batch.h" #include "iree/hal/utils/resource_set.h" diff --git a/experimental/cuda2/graph_command_buffer.h b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.h similarity index 84% rename from experimental/cuda2/graph_command_buffer.h rename to runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.h index 413e46ad70f1..cf10c5e4e070 100644 --- a/experimental/cuda2/graph_command_buffer.h +++ b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_GRAPH_COMMAND_BUFFER_H_ -#define EXPERIMENTAL_CUDA2_GRAPH_COMMAND_BUFFER_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_GRAPH_COMMAND_BUFFER_H_ +#define IREE_HAL_DRIVERS_CUDA2_GRAPH_COMMAND_BUFFER_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -44,4 +44,4 @@ CUgraphExec iree_hal_cuda2_graph_command_buffer_handle( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_GRAPH_COMMAND_BUFFER_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_GRAPH_COMMAND_BUFFER_H_ diff --git a/experimental/cuda2/memory_pools.c b/runtime/src/iree/hal/drivers/cuda2/memory_pools.c similarity index 98% rename from experimental/cuda2/memory_pools.c rename to runtime/src/iree/hal/drivers/cuda2/memory_pools.c index 84b38d01fbd3..5da268da143d 100644 --- a/experimental/cuda2/memory_pools.c +++ b/runtime/src/iree/hal/drivers/cuda2/memory_pools.c @@ -4,11 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/memory_pools.h" +#include "iree/hal/drivers/cuda2/memory_pools.h" -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" // NOTE: these are currently global for all devices; we could make // device-specific ones by malloc() and leaking (with LSAN note) unique string diff --git a/experimental/cuda2/memory_pools.h b/runtime/src/iree/hal/drivers/cuda2/memory_pools.h similarity index 95% rename from experimental/cuda2/memory_pools.h rename to runtime/src/iree/hal/drivers/cuda2/memory_pools.h index 9c1e59b0c0d0..7cb0d7a453da 100644 --- a/experimental/cuda2/memory_pools.h +++ b/runtime/src/iree/hal/drivers/cuda2/memory_pools.h @@ -7,12 +7,12 @@ #ifndef IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_ #define IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_ -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/internal/atomics.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { diff --git a/experimental/cuda2/native_executable.c b/runtime/src/iree/hal/drivers/cuda2/native_executable.c similarity index 98% rename from experimental/cuda2/native_executable.c rename to runtime/src/iree/hal/drivers/cuda2/native_executable.c index db74f75a0a33..ce252a3ed858 100644 --- a/experimental/cuda2/native_executable.c +++ b/runtime/src/iree/hal/drivers/cuda2/native_executable.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/native_executable.h" +#include "iree/hal/drivers/cuda2/native_executable.h" #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/pipeline_layout.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" // flatcc schemas: #include "iree/base/internal/flatcc/parsing.h" diff --git a/experimental/cuda2/native_executable.h b/runtime/src/iree/hal/drivers/cuda2/native_executable.h similarity index 84% rename from experimental/cuda2/native_executable.h rename to runtime/src/iree/hal/drivers/cuda2/native_executable.h index 2b633fe529ac..10a942e2a4cc 100644 --- a/experimental/cuda2/native_executable.h +++ b/runtime/src/iree/hal/drivers/cuda2/native_executable.h @@ -4,16 +4,16 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_NATIVE_EXECUTABLE_H_ -#define EXPERIMENTAL_CUDA2_NATIVE_EXECUTABLE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NATIVE_EXECUTABLE_H_ +#define IREE_HAL_DRIVERS_CUDA2_NATIVE_EXECUTABLE_H_ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/tracing.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -47,4 +47,4 @@ iree_status_t iree_hal_cuda2_native_executable_entry_point_kernel_info( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_NATIVE_EXECUTABLE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NATIVE_EXECUTABLE_H_ diff --git a/experimental/cuda2/nccl_channel.c b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.c similarity index 99% rename from experimental/cuda2/nccl_channel.c rename to runtime/src/iree/hal/drivers/cuda2/nccl_channel.c index d8a658455741..9df206aac16d 100644 --- a/experimental/cuda2/nccl_channel.c +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.c @@ -4,17 +4,16 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nccl_channel.h" +#include "iree/hal/drivers/cuda2/nccl_channel.h" #include #include -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/nccl_headers.h" -#include "experimental/cuda2/nccl_status_util.h" #include "iree/base/api.h" #include "iree/base/tracing.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" typedef struct iree_hal_cuda2_nccl_channel_t { iree_hal_resource_t resource; diff --git a/experimental/cuda2/nccl_channel.h b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.h similarity index 84% rename from experimental/cuda2/nccl_channel.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_channel.h index ad8bfef68667..ddfc2b18f18a 100644 --- a/experimental/cuda2/nccl_channel.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.h @@ -4,16 +4,15 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_NCCL_CHANNEL_H_ -#define EXPERIMENTAL_CUDA2_NCCL_CHANNEL_H_ - -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/tracing.h" +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_CHANNEL_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_CHANNEL_H_ + #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/tracing.h" #include "iree/hal/utils/collective_batch.h" #ifdef __cplusplus @@ -56,4 +55,4 @@ iree_status_t iree_hal_cuda2_nccl_submit_batch( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_NCCL_CHANNEL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_CHANNEL_H_ diff --git a/experimental/cuda2/nccl_dynamic_symbol_table.h b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h similarity index 100% rename from experimental/cuda2/nccl_dynamic_symbol_table.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h diff --git a/experimental/cuda2/nccl_dynamic_symbols.c b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.c similarity index 96% rename from experimental/cuda2/nccl_dynamic_symbols.c rename to runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.c index f51f8fd55dcf..2fc36f56ae83 100644 --- a/experimental/cuda2/nccl_dynamic_symbols.c +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.c @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #include -#include "experimental/cuda2/nccl_status_util.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" static const char* iree_hal_cuda_nccl_dylib_names[] = { #if defined(IREE_PLATFORM_WINDOWS) @@ -36,7 +36,7 @@ static iree_status_t iree_hal_cuda2_nccl_dynamic_symbols_resolve_all( IREE_RETURN_IF_ERROR(iree_dynamic_library_lookup_symbol( \ syms->dylib, name, (void**)&syms->nccl_symbol_name)); \ } -#include "experimental/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: keep +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: keep #undef IREE_NCCL_PFN_DECL #undef IREE_NCCL_PFN_DECL_STR_RETURN return iree_ok_status(); diff --git a/experimental/cuda2/nccl_dynamic_symbols.h b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.h similarity index 84% rename from experimental/cuda2/nccl_dynamic_symbols.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.h index e15253ffb96b..23a4e2e532d5 100644 --- a/experimental/cuda2/nccl_dynamic_symbols.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ -#define IREE_EXPERIMENTAL_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/nccl_headers.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_headers.h" #ifdef __cplusplus extern "C" { @@ -31,7 +31,7 @@ typedef struct iree_hal_cuda2_nccl_dynamic_symbols_t { ncclResult_t (*ncclSymbolName)(__VA_ARGS__); #define IREE_NCCL_PFN_DECL_STR_RETURN(ncclSymbolName, ...) \ const char* (*ncclSymbolName)(__VA_ARGS__); -#include "experimental/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: export +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: export #undef IREE_NCCL_PFN_DECL #undef IREE_NCCL_PFN_DECL_STR_RETURN } iree_hal_cuda2_nccl_dynamic_symbols_t; @@ -54,4 +54,4 @@ void iree_hal_cuda2_nccl_dynamic_symbols_deinitialize( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ diff --git a/experimental/cuda2/nccl_headers.h b/runtime/src/iree/hal/drivers/cuda2/nccl_headers.h similarity index 65% rename from experimental/cuda2/nccl_headers.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_headers.h index 973c6a4f65df..09af6ce2634c 100644 --- a/experimental/cuda2/nccl_headers.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_headers.h @@ -4,9 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_NCCL_HEADERS_H_ -#define IREE_EXPERIMENTAL_CUDA2_NCCL_HEADERS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_HEADERS_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_HEADERS_H_ #include "third_party/nccl/nccl.h" // IWYU pragma: export -#endif // IREE_EXPERIMENTAL_CUDA2_NCCL_HEADERS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_HEADERS_H_ diff --git a/experimental/cuda2/nccl_status_util.c b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.c similarity index 91% rename from experimental/cuda2/nccl_status_util.c rename to runtime/src/iree/hal/drivers/cuda2/nccl_status_util.c index 81ca09751a7e..5d1779ce91a6 100644 --- a/experimental/cuda2/nccl_status_util.c +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.c @@ -4,12 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nccl_status_util.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" #include -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "iree/base/status.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" iree_status_t iree_hal_cuda2_nccl_result_to_status( const iree_hal_cuda2_nccl_dynamic_symbols_t* syms, ncclResult_t result, diff --git a/experimental/cuda2/nccl_status_util.h b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.h similarity index 92% rename from experimental/cuda2/nccl_status_util.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_status_util.h index 65db9a9c6123..b2d690552625 100644 --- a/experimental/cuda2/nccl_status_util.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_NCCL_STATUS_UTIL_H_ -#define IREE_EXPERIMENTAL_CUDA2_NCCL_STATUS_UTIL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_STATUS_UTIL_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_STATUS_UTIL_H_ #include -#include "experimental/cuda2/nccl_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -66,4 +66,4 @@ iree_status_t iree_hal_cuda2_nccl_result_to_status( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_NCCL_STATUS_UTIL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_STATUS_UTIL_H_ diff --git a/experimental/cuda2/nop_executable_cache.c b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.c similarity index 97% rename from experimental/cuda2/nop_executable_cache.c rename to runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.c index ce63c1c0e46f..284f9562466c 100644 --- a/experimental/cuda2/nop_executable_cache.c +++ b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nop_executable_cache.h" +#include "iree/hal/drivers/cuda2/nop_executable_cache.h" #include #include -#include "experimental/cuda2/native_executable.h" #include "iree/base/api.h" #include "iree/base/tracing.h" +#include "iree/hal/drivers/cuda2/native_executable.h" typedef struct iree_hal_cuda2_nop_executable_cache_t { // Abstract resource used for injecting reference counting and vtable; diff --git a/experimental/cuda2/nop_executable_cache.h b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.h similarity index 74% rename from experimental/cuda2/nop_executable_cache.h rename to runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.h index 92e66acd669f..a2424b4c9ce9 100644 --- a/experimental/cuda2/nop_executable_cache.h +++ b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_NOP_EXECUTABLE_CACHE_H_ -#define EXPERIMENTAL_CUDA2_NOP_EXECUTABLE_CACHE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NOP_EXECUTABLE_CACHE_H_ +#define IREE_HAL_DRIVERS_CUDA2_NOP_EXECUTABLE_CACHE_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -29,4 +29,4 @@ iree_status_t iree_hal_cuda2_nop_executable_cache_create( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_NOP_EXECUTABLE_CACHE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NOP_EXECUTABLE_CACHE_H_ diff --git a/experimental/cuda2/pending_queue_actions.c b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.c similarity index 99% rename from experimental/cuda2/pending_queue_actions.c rename to runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.c index a0fe823870ee..4886ebc757db 100644 --- a/experimental/cuda2/pending_queue_actions.c +++ b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.c @@ -4,22 +4,22 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/pending_queue_actions.h" +#include "iree/hal/drivers/cuda2/pending_queue_actions.h" #include #include -#include "experimental/cuda2/cuda_device.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/event_semaphore.h" -#include "experimental/cuda2/graph_command_buffer.h" #include "iree/base/api.h" #include "iree/base/internal/arena.h" #include "iree/base/internal/atomic_slist.h" #include "iree/base/internal/synchronization.h" #include "iree/base/internal/threading.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_device.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/event_semaphore.h" +#include "iree/hal/drivers/cuda2/graph_command_buffer.h" #include "iree/hal/utils/deferred_command_buffer.h" #include "iree/hal/utils/resource_set.h" diff --git a/experimental/cuda2/pending_queue_actions.h b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.h similarity index 91% rename from experimental/cuda2/pending_queue_actions.h rename to runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.h index 036c063030ee..1484c2bda8ff 100644 --- a/experimental/cuda2/pending_queue_actions.h +++ b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_ -#define EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_PENDING_QUEUE_ACTIONS_H_ +#define IREE_HAL_DRIVERS_CUDA2_PENDING_QUEUE_ACTIONS_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" #include "iree/base/api.h" #include "iree/base/internal/arena.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -63,4 +63,4 @@ iree_status_t iree_hal_cuda2_pending_queue_actions_issue( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_PENDING_QUEUE_ACTIONS_H_ diff --git a/experimental/cuda2/pipeline_layout.c b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.c similarity index 99% rename from experimental/cuda2/pipeline_layout.c rename to runtime/src/iree/hal/drivers/cuda2/pipeline_layout.c index bd64d037d903..7383d78f0f1c 100644 --- a/experimental/cuda2/pipeline_layout.c +++ b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.c @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/pipeline_layout.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" #include diff --git a/experimental/cuda2/pipeline_layout.h b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.h similarity index 96% rename from experimental/cuda2/pipeline_layout.h rename to runtime/src/iree/hal/drivers/cuda2/pipeline_layout.h index f2d48c80dc9b..b7b3cf7d5a12 100644 --- a/experimental/cuda2/pipeline_layout.h +++ b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.h @@ -4,8 +4,8 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_PIPELINE_LAYOUT_H_ -#define EXPERIMENTAL_CUDA2_PIPELINE_LAYOUT_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_PIPELINE_LAYOUT_H_ +#define IREE_HAL_DRIVERS_CUDA2_PIPELINE_LAYOUT_H_ #include "iree/base/api.h" #include "iree/hal/api.h" @@ -107,4 +107,4 @@ iree_host_size_t iree_hal_cuda2_pipeline_layout_push_constant_count( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_PIPELINE_LAYOUT_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_PIPELINE_LAYOUT_H_ diff --git a/runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel b/runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel new file mode 100644 index 000000000000..78ee071af0df --- /dev/null +++ b/runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel @@ -0,0 +1,32 @@ +# Copyright 2022 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_library") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_runtime_cc_library( + name = "registration", + srcs = [ + "driver_module.c", + ], + hdrs = [ + "driver_module.h", + ], + defines = [ + "IREE_HAVE_HAL_CUDA2_DRIVER_MODULE=1", + ], + deps = [ + "//runtime/src/iree/base", + "//runtime/src/iree/base/internal:flags", + "//runtime/src/iree/hal", + "//runtime/src/iree/hal/drivers/cuda2", + ], +) diff --git a/runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt b/runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt new file mode 100644 index 000000000000..b27cde34fcea --- /dev/null +++ b/runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt @@ -0,0 +1,30 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_cc_library( + NAME + registration + HDRS + "driver_module.h" + SRCS + "driver_module.c" + DEPS + iree::base + iree::base::internal::flags + iree::hal + iree::hal::drivers::cuda2 + DEFINES + "IREE_HAVE_HAL_CUDA2_DRIVER_MODULE=1" + PUBLIC +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/experimental/cuda2/registration/driver_module.c b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.c similarity index 97% rename from experimental/cuda2/registration/driver_module.c rename to runtime/src/iree/hal/drivers/cuda2/registration/driver_module.c index 166286ffce56..da3fb55e7778 100644 --- a/experimental/cuda2/registration/driver_module.c +++ b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/registration/driver_module.h" +#include "iree/hal/drivers/cuda2/registration/driver_module.h" #include #include -#include "experimental/cuda2/api.h" #include "iree/base/api.h" #include "iree/base/internal/flags.h" +#include "iree/hal/drivers/cuda2/api.h" IREE_FLAG( bool, cuda2_use_streams, false, diff --git a/experimental/cuda2/registration/driver_module.h b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h similarity index 74% rename from experimental/cuda2/registration/driver_module.h rename to runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h index c92643da78cd..996c9b83ebfb 100644 --- a/experimental/cuda2/registration/driver_module.h +++ b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h @@ -4,8 +4,8 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_REGISTRATION_DRIVER_MODULE_H_ -#define IREE_EXPERIMENTAL_CUDA2_REGISTRATION_DRIVER_MODULE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_REGISTRATION_DRIVER_MODULE_H_ +#define IREE_HAL_DRIVERS_CUDA2_REGISTRATION_DRIVER_MODULE_H_ #include "iree/base/api.h" #include "iree/hal/api.h" @@ -22,4 +22,4 @@ iree_hal_cuda2_driver_module_register(iree_hal_driver_registry_t* registry); } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_REGISTRATION_DRIVER_MODULE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_REGISTRATION_DRIVER_MODULE_H_ diff --git a/experimental/cuda2/stream_command_buffer.c b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.c similarity index 98% rename from experimental/cuda2/stream_command_buffer.c rename to runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.c index f43f6f0470d9..0f21a6771124 100644 --- a/experimental/cuda2/stream_command_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.c @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/stream_command_buffer.h" +#include "iree/hal/drivers/cuda2/stream_command_buffer.h" -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/native_executable.h" -#include "experimental/cuda2/nccl_channel.h" -#include "experimental/cuda2/pipeline_layout.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/native_executable.h" +#include "iree/hal/drivers/cuda2/nccl_channel.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" #include "iree/hal/utils/collective_batch.h" #include "iree/hal/utils/resource_set.h" diff --git a/experimental/cuda2/stream_command_buffer.h b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.h similarity index 82% rename from experimental/cuda2/stream_command_buffer.h rename to runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.h index 65448563ca7b..ae4cd0d643cc 100644 --- a/experimental/cuda2/stream_command_buffer.h +++ b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.h @@ -4,15 +4,15 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_STREAM_COMMAND_BUFFER_H_ -#define EXPERIMENTAL_CUDA2_STREAM_COMMAND_BUFFER_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_STREAM_COMMAND_BUFFER_H_ +#define IREE_HAL_DRIVERS_CUDA2_STREAM_COMMAND_BUFFER_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/tracing.h" #include "iree/base/internal/arena.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/tracing.h" #ifdef __cplusplus extern "C" { @@ -48,4 +48,4 @@ bool iree_hal_cuda2_stream_command_buffer_isa( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_STREAM_COMMAND_BUFFER_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_STREAM_COMMAND_BUFFER_H_ diff --git a/experimental/cuda2/timepoint_pool.c b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.c similarity index 98% rename from experimental/cuda2/timepoint_pool.c rename to runtime/src/iree/hal/drivers/cuda2/timepoint_pool.c index 535406f46a5c..a3225531f337 100644 --- a/experimental/cuda2/timepoint_pool.c +++ b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.c @@ -4,20 +4,20 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/timepoint_pool.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" #include #include #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/event_pool.h" #include "iree/base/api.h" #include "iree/base/internal/atomics.h" #include "iree/base/internal/event_pool.h" #include "iree/base/internal/synchronization.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/event_pool.h" #include "iree/hal/utils/semaphore_base.h" //===----------------------------------------------------------------------===// diff --git a/experimental/cuda2/timepoint_pool.h b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.h similarity index 95% rename from experimental/cuda2/timepoint_pool.h rename to runtime/src/iree/hal/drivers/cuda2/timepoint_pool.h index b0d71f5a1fbb..12ad5d9fea19 100644 --- a/experimental/cuda2/timepoint_pool.h +++ b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_TIMEPOINT_POOL_H_ -#define EXPERIMENTAL_CUDA2_TIMEPOINT_POOL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_TIMEPOINT_POOL_H_ +#define IREE_HAL_DRIVERS_CUDA2_TIMEPOINT_POOL_H_ -#include "experimental/cuda2/event_pool.h" #include "iree/base/api.h" #include "iree/base/internal/event_pool.h" +#include "iree/hal/drivers/cuda2/event_pool.h" #include "iree/hal/utils/semaphore_base.h" #ifdef __cplusplus @@ -116,4 +116,4 @@ void iree_hal_cuda2_timepoint_pool_release( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_TIMEPOINT_POOL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_TIMEPOINT_POOL_H_ diff --git a/experimental/cuda2/tracing.c b/runtime/src/iree/hal/drivers/cuda2/tracing.c similarity index 98% rename from experimental/cuda2/tracing.c rename to runtime/src/iree/hal/drivers/cuda2/tracing.c index e680c467a828..76254e3216e9 100644 --- a/experimental/cuda2/tracing.c +++ b/runtime/src/iree/hal/drivers/cuda2/tracing.c @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/tracing.h" +#include "iree/hal/drivers/cuda2/tracing.h" #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" // Total number of events per tracing context. This translates to the maximum // number of outstanding timestamp queries before collection is required. diff --git a/experimental/cuda2/tracing.h b/runtime/src/iree/hal/drivers/cuda2/tracing.h similarity index 95% rename from experimental/cuda2/tracing.h rename to runtime/src/iree/hal/drivers/cuda2/tracing.h index 57b6786dc469..68a606b64266 100644 --- a/experimental/cuda2/tracing.h +++ b/runtime/src/iree/hal/drivers/cuda2/tracing.h @@ -4,15 +4,15 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_TRACING_H_ -#define EXPERIMENTAL_CUDA2_TRACING_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_TRACING_H_ +#define IREE_HAL_DRIVERS_CUDA2_TRACING_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/internal/arena.h" #include "iree/base/tracing.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -119,4 +119,4 @@ void iree_hal_cuda2_tracing_zone_end_impl( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_TRACING_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_TRACING_H_ diff --git a/runtime/src/iree/hal/drivers/init.c b/runtime/src/iree/hal/drivers/init.c index e2d1f659f4b6..226764892a74 100644 --- a/runtime/src/iree/hal/drivers/init.c +++ b/runtime/src/iree/hal/drivers/init.c @@ -10,6 +10,10 @@ #include "iree/hal/drivers/cuda/registration/driver_module.h" #endif // IREE_HAVE_HAL_CUDA_DRIVER_MODULE +#if defined(IREE_HAVE_HAL_CUDA2_DRIVER_MODULE) +#include "iree/hal/drivers/cuda2/registration/driver_module.h" +#endif // IREE_HAVE_HAL_CUDA2_DRIVER_MODULE + #if defined(IREE_HAVE_HAL_LOCAL_SYNC_DRIVER_MODULE) #include "iree/hal/drivers/local_sync/registration/driver_module.h" #endif // IREE_HAVE_HAL_LOCAL_SYNC_DRIVER_MODULE @@ -46,6 +50,11 @@ iree_hal_register_all_available_drivers(iree_hal_driver_registry_t* registry) { z0, iree_hal_cuda_driver_module_register(registry)); #endif // IREE_HAVE_HAL_CUDA_DRIVER_MODULE +#if defined(IREE_HAVE_HAL_CUDA2_DRIVER_MODULE) + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_hal_cuda2_driver_module_register(registry)); +#endif // IREE_HAVE_HAL_CUDA2_DRIVER_MODULE + #if defined(IREE_HAVE_HAL_LOCAL_SYNC_DRIVER_MODULE) IREE_RETURN_AND_END_ZONE_IF_ERROR( z0, iree_hal_local_sync_driver_module_register(registry)); diff --git a/tests/e2e/stablehlo_ops/BUILD.bazel b/tests/e2e/stablehlo_ops/BUILD.bazel index 14852a7a56c3..5b769cb62d27 100644 --- a/tests/e2e/stablehlo_ops/BUILD.bazel +++ b/tests/e2e/stablehlo_ops/BUILD.bazel @@ -506,3 +506,112 @@ iree_check_single_backend_test_suite( ], target_backend = "cuda", ) + +CUDA_SRCS = enforce_glob( + [ + "abs.mlir", + "add.mlir", + "batch_norm_inference.mlir", + "bitcast_convert.mlir", + "broadcast.mlir", + "broadcast_add.mlir", + "broadcast_in_dim.mlir", + "clamp.mlir", + "compare.mlir", + "complex.mlir", + "concatenate.mlir", + "constant.mlir", + "convert.mlir", + "convolution.mlir", + "cosine.mlir", + "divide.mlir", + "dot.mlir", + "dot_bf16.mlir", + "dot_general.mlir", + "dynamic_slice.mlir", + "dynamic_update_slice.mlir", + "exponential.mlir", + "exponential_fp16.mlir", + "exponential_minus_one.mlir", + "fft.mlir", + "finite.mlir", + "floor.mlir", + "gather.mlir", + "iota.mlir", + "log.mlir", + "log_plus_one.mlir", + "maximum.mlir", + "minimum.mlir", + "multiply.mlir", + "negate.mlir", + "pad.mlir", + "philox.mlir", + "pow.mlir", + "reduce.mlir", + "reduce_window.mlir", + "remainder.mlir", + "reshape.mlir", + "reverse.mlir", + "rng_normal.mlir", + "rng_uniform.mlir", + "round.mlir", + "rsqrt.mlir", + "scatter.mlir", + "scatter_dynamic.mlir", + "select.mlir", + "sine.mlir", + "slice.mlir", + "sort.mlir", + "sqrt.mlir", + "subtract.mlir", + "tanh.mlir", + "three_fry.mlir", + "torch_index_select.mlir", + "transpose.mlir", + "while.mlir", + ], + include = ["*.mlir"], + exclude = [], +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_graph", + srcs = CUDA_SRCS, + compiler_flags = [ + # TODO(#13984): memset emulation required for graphs. + "--iree-stream-emulate-memset", + "--iree-hal-cuda-enable-legacy-sync=false", + ], + driver = "cuda2", + input_type = "stablehlo", + runner_args = ["--cuda2_use_streams=false"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_stream", + srcs = CUDA_SRCS, + compiler_flags = [ + "--iree-hal-cuda-enable-legacy-sync=false", + ], + driver = "cuda2", + input_type = "stablehlo", + runner_args = ["--cuda2_use_streams=true"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) diff --git a/tests/e2e/stablehlo_ops/CMakeLists.txt b/tests/e2e/stablehlo_ops/CMakeLists.txt index 3f3974f623c9..714e2a8c1312 100644 --- a/tests/e2e/stablehlo_ops/CMakeLists.txt +++ b/tests/e2e/stablehlo_ops/CMakeLists.txt @@ -460,6 +460,171 @@ iree_check_single_backend_test_suite( "requires-gpu-nvidia" ) +iree_check_single_backend_test_suite( + NAME + check_cuda2_graph + SRCS + "abs.mlir" + "add.mlir" + "batch_norm_inference.mlir" + "bitcast_convert.mlir" + "broadcast.mlir" + "broadcast_add.mlir" + "broadcast_in_dim.mlir" + "clamp.mlir" + "compare.mlir" + "complex.mlir" + "concatenate.mlir" + "constant.mlir" + "convert.mlir" + "convolution.mlir" + "cosine.mlir" + "divide.mlir" + "dot.mlir" + "dot_bf16.mlir" + "dot_general.mlir" + "dynamic_slice.mlir" + "dynamic_update_slice.mlir" + "exponential.mlir" + "exponential_fp16.mlir" + "exponential_minus_one.mlir" + "fft.mlir" + "finite.mlir" + "floor.mlir" + "gather.mlir" + "iota.mlir" + "log.mlir" + "log_plus_one.mlir" + "maximum.mlir" + "minimum.mlir" + "multiply.mlir" + "negate.mlir" + "pad.mlir" + "philox.mlir" + "pow.mlir" + "reduce.mlir" + "reduce_window.mlir" + "remainder.mlir" + "reshape.mlir" + "reverse.mlir" + "rng_normal.mlir" + "rng_uniform.mlir" + "round.mlir" + "rsqrt.mlir" + "scatter.mlir" + "scatter_dynamic.mlir" + "select.mlir" + "sine.mlir" + "slice.mlir" + "sort.mlir" + "sqrt.mlir" + "subtract.mlir" + "tanh.mlir" + "three_fry.mlir" + "torch_index_select.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + COMPILER_FLAGS + "--iree-stream-emulate-memset" + "--iree-hal-cuda-enable-legacy-sync=false" + INPUT_TYPE + "stablehlo" + RUNNER_ARGS + "--cuda2_use_streams=false" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + +iree_check_single_backend_test_suite( + NAME + check_cuda2_stream + SRCS + "abs.mlir" + "add.mlir" + "batch_norm_inference.mlir" + "bitcast_convert.mlir" + "broadcast.mlir" + "broadcast_add.mlir" + "broadcast_in_dim.mlir" + "clamp.mlir" + "compare.mlir" + "complex.mlir" + "concatenate.mlir" + "constant.mlir" + "convert.mlir" + "convolution.mlir" + "cosine.mlir" + "divide.mlir" + "dot.mlir" + "dot_bf16.mlir" + "dot_general.mlir" + "dynamic_slice.mlir" + "dynamic_update_slice.mlir" + "exponential.mlir" + "exponential_fp16.mlir" + "exponential_minus_one.mlir" + "fft.mlir" + "finite.mlir" + "floor.mlir" + "gather.mlir" + "iota.mlir" + "log.mlir" + "log_plus_one.mlir" + "maximum.mlir" + "minimum.mlir" + "multiply.mlir" + "negate.mlir" + "pad.mlir" + "philox.mlir" + "pow.mlir" + "reduce.mlir" + "reduce_window.mlir" + "remainder.mlir" + "reshape.mlir" + "reverse.mlir" + "rng_normal.mlir" + "rng_uniform.mlir" + "round.mlir" + "rsqrt.mlir" + "scatter.mlir" + "scatter_dynamic.mlir" + "select.mlir" + "sine.mlir" + "slice.mlir" + "sort.mlir" + "sqrt.mlir" + "subtract.mlir" + "tanh.mlir" + "three_fry.mlir" + "torch_index_select.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + COMPILER_FLAGS + "--iree-hal-cuda-enable-legacy-sync=false" + INPUT_TYPE + "stablehlo" + RUNNER_ARGS + "--cuda2_use_streams=true" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### iree_check_single_backend_test_suite( diff --git a/tests/e2e/tosa_ops/BUILD.bazel b/tests/e2e/tosa_ops/BUILD.bazel index a554c080acb8..8bc25934a8d0 100644 --- a/tests/e2e/tosa_ops/BUILD.bazel +++ b/tests/e2e/tosa_ops/BUILD.bazel @@ -246,9 +246,102 @@ iree_check_single_backend_test_suite( target_backend = "vulkan-spirv", ) +CUDA_SRCS = enforce_glob( + [ + "abs.mlir", + "add.mlir", + "arithmetic_right_shift.mlir", + "bitwise_and.mlir", + "bitwise_or.mlir", + "bitwise_xor.mlir", + "ceil.mlir", + "clamp.mlir", + "clz.mlir", + "const.mlir", + "equal.mlir", + "exp.mlir", + "floor.mlir", + "fully_connected.mlir", + "gather.mlir", + "greater.mlir", + "greater_equal.mlir", + "if.mlir", + "log.mlir", + "logical_left_shift.mlir", + "logical_right_shift.mlir", + "logical_right_shift_16.mlir", + "matmul.mlir", + "max_pool.mlir", + "maximum.mlir", + "minimum.mlir", + "mul.mlir", + "negate.mlir", + "pad.mlir", + "reciprocal.mlir", + "reduce.mlir", + "reshape.mlir", + "rsqrt.mlir", + "select.mlir", + "sigmoid.mlir", + "sub.mlir", + "table.mlir", + "tanh.mlir", + "transpose.mlir", + "while.mlir", + ], + include = ["*.mlir"], + exclude = [ + "mul_shift.mlir", + ], +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_graph", + srcs = CUDA_SRCS, + compiler_flags = [ + # TODO(#13984): memset emulation required for graphs. + "--iree-stream-emulate-memset", + "--iree-hal-cuda-enable-legacy-sync=false", + ], + driver = "cuda2", + input_type = "tosa", + runner_args = ["--cuda2_use_streams=false"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_stream", + srcs = CUDA_SRCS, + compiler_flags = [ + "--iree-hal-cuda-enable-legacy-sync=false", + ], + driver = "cuda2", + input_type = "tosa", + runner_args = ["--cuda2_use_streams=true"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) + test_suite( name = "check", tests = [ + ":check_cuda2_graph", + ":check_cuda2_stream", ":check_llvm-cpu_local-task", ":check_vmvx_local-task", ":check_vulkan-spirv_vulkan", diff --git a/tests/e2e/tosa_ops/CMakeLists.txt b/tests/e2e/tosa_ops/CMakeLists.txt index f3f86ffeda6a..c04e6cd42173 100644 --- a/tests/e2e/tosa_ops/CMakeLists.txt +++ b/tests/e2e/tosa_ops/CMakeLists.txt @@ -222,6 +222,131 @@ iree_check_single_backend_test_suite( "tosa" ) +iree_check_single_backend_test_suite( + NAME + check_cuda2_graph + SRCS + "abs.mlir" + "add.mlir" + "arithmetic_right_shift.mlir" + "bitwise_and.mlir" + "bitwise_or.mlir" + "bitwise_xor.mlir" + "ceil.mlir" + "clamp.mlir" + "clz.mlir" + "const.mlir" + "equal.mlir" + "exp.mlir" + "floor.mlir" + "fully_connected.mlir" + "gather.mlir" + "greater.mlir" + "greater_equal.mlir" + "if.mlir" + "log.mlir" + "logical_left_shift.mlir" + "logical_right_shift.mlir" + "logical_right_shift_16.mlir" + "matmul.mlir" + "max_pool.mlir" + "maximum.mlir" + "minimum.mlir" + "mul.mlir" + "negate.mlir" + "pad.mlir" + "reciprocal.mlir" + "reduce.mlir" + "reshape.mlir" + "rsqrt.mlir" + "select.mlir" + "sigmoid.mlir" + "sub.mlir" + "table.mlir" + "tanh.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + COMPILER_FLAGS + "--iree-stream-emulate-memset" + "--iree-hal-cuda-enable-legacy-sync=false" + INPUT_TYPE + "tosa" + RUNNER_ARGS + "--cuda2_use_streams=false" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + +iree_check_single_backend_test_suite( + NAME + check_cuda2_stream + SRCS + "abs.mlir" + "add.mlir" + "arithmetic_right_shift.mlir" + "bitwise_and.mlir" + "bitwise_or.mlir" + "bitwise_xor.mlir" + "ceil.mlir" + "clamp.mlir" + "clz.mlir" + "const.mlir" + "equal.mlir" + "exp.mlir" + "floor.mlir" + "fully_connected.mlir" + "gather.mlir" + "greater.mlir" + "greater_equal.mlir" + "if.mlir" + "log.mlir" + "logical_left_shift.mlir" + "logical_right_shift.mlir" + "logical_right_shift_16.mlir" + "matmul.mlir" + "max_pool.mlir" + "maximum.mlir" + "minimum.mlir" + "mul.mlir" + "negate.mlir" + "pad.mlir" + "reciprocal.mlir" + "reduce.mlir" + "reshape.mlir" + "rsqrt.mlir" + "select.mlir" + "sigmoid.mlir" + "sub.mlir" + "table.mlir" + "tanh.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + COMPILER_FLAGS + "--iree-hal-cuda-enable-legacy-sync=false" + INPUT_TYPE + "tosa" + RUNNER_ARGS + "--cuda2_use_streams=true" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### iree_check_single_backend_test_suite(