From a5c70a02f2f2190e750d2592f173a53d9afc11eb Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 24 Aug 2023 21:28:01 -0700 Subject: [PATCH] [cuda] Move to hal/drivers and wire up BUILD files This commit moves the CUDA HAL driver rewrite to the `hal/drivers` directory given it's functional and ready for normal usage. By this we can start run tests with CI to make sure it does not regress. Further improvements can happen directly in this directory. This provides an easy route for trying out the rewrite before eventually replace the existing HAL driver. Along the way wired up BUILD configurations. --- .github/CODEOWNERS | 1 + CMakeLists.txt | 16 +- .../cuda2/registration/CMakeLists.txt | 21 --- experimental/cuda2/tests/CMakeLists.txt | 7 - .../cuda2/tests/stablehlo_ops/CMakeLists.txt | 167 ------------------ .../cuda2/tests/tosa_ops/CMakeLists.txt | 129 -------------- runtime/src/iree/hal/drivers/BUILD.bazel | 5 +- runtime/src/iree/hal/drivers/CMakeLists.txt | 4 + .../src/iree/hal/drivers/cuda2/BUILD.bazel | 115 ++++++++++++ .../iree/hal/drivers}/cuda2/CMakeLists.txt | 26 +-- .../src/iree/hal/drivers}/cuda2/README.md | 4 +- .../src/iree/hal/drivers}/cuda2/api.h | 6 +- .../hal/drivers}/cuda2/cts/CMakeLists.txt | 8 +- .../iree/hal/drivers}/cuda2/cuda_allocator.c | 8 +- .../iree/hal/drivers}/cuda2/cuda_allocator.h | 10 +- .../src/iree/hal/drivers}/cuda2/cuda_buffer.c | 2 +- .../src/iree/hal/drivers}/cuda2/cuda_buffer.h | 8 +- .../src/iree/hal/drivers}/cuda2/cuda_device.c | 33 ++-- .../src/iree/hal/drivers}/cuda2/cuda_device.h | 12 +- .../src/iree/hal/drivers}/cuda2/cuda_driver.c | 12 +- .../cuda2/cuda_dynamic_symbol_table.h | 0 .../hal/drivers}/cuda2/cuda_dynamic_symbols.c | 6 +- .../hal/drivers}/cuda2/cuda_dynamic_symbols.h | 10 +- .../iree/hal/drivers}/cuda2/cuda_headers.h | 6 +- .../hal/drivers}/cuda2/cuda_status_util.c | 5 +- .../hal/drivers}/cuda2/cuda_status_util.h | 8 +- .../drivers}/cuda2/dynamic_symbols_test.cc | 4 +- .../src/iree/hal/drivers}/cuda2/event_pool.c | 6 +- .../src/iree/hal/drivers}/cuda2/event_pool.h | 8 +- .../iree/hal/drivers}/cuda2/event_semaphore.c | 9 +- .../iree/hal/drivers}/cuda2/event_semaphore.h | 12 +- .../hal/drivers}/cuda2/graph_command_buffer.c | 12 +- .../hal/drivers}/cuda2/graph_command_buffer.h | 10 +- .../iree/hal/drivers}/cuda2/memory_pools.c | 8 +- .../iree/hal/drivers}/cuda2/memory_pools.h | 6 +- .../hal/drivers}/cuda2/native_executable.c | 8 +- .../hal/drivers}/cuda2/native_executable.h | 10 +- .../iree/hal/drivers}/cuda2/nccl_channel.c | 9 +- .../iree/hal/drivers}/cuda2/nccl_channel.h | 17 +- .../cuda2/nccl_dynamic_symbol_table.h | 0 .../hal/drivers}/cuda2/nccl_dynamic_symbols.c | 6 +- .../hal/drivers}/cuda2/nccl_dynamic_symbols.h | 12 +- .../iree/hal/drivers}/cuda2/nccl_headers.h | 6 +- .../hal/drivers}/cuda2/nccl_status_util.c | 5 +- .../hal/drivers}/cuda2/nccl_status_util.h | 8 +- .../hal/drivers}/cuda2/nop_executable_cache.c | 4 +- .../hal/drivers}/cuda2/nop_executable_cache.h | 10 +- .../drivers}/cuda2/pending_queue_actions.c | 12 +- .../drivers}/cuda2/pending_queue_actions.h | 8 +- .../iree/hal/drivers}/cuda2/pipeline_layout.c | 2 +- .../iree/hal/drivers}/cuda2/pipeline_layout.h | 6 +- .../drivers/cuda2/registration/BUILD.bazel | 32 ++++ .../drivers/cuda2/registration/CMakeLists.txt | 30 ++++ .../cuda2/registration/driver_module.c | 4 +- .../cuda2/registration/driver_module.h | 6 +- .../drivers}/cuda2/stream_command_buffer.c | 12 +- .../drivers}/cuda2/stream_command_buffer.h | 14 +- .../iree/hal/drivers}/cuda2/timepoint_pool.c | 8 +- .../iree/hal/drivers}/cuda2/timepoint_pool.h | 8 +- .../src/iree/hal/drivers}/cuda2/tracing.c | 6 +- .../src/iree/hal/drivers}/cuda2/tracing.h | 10 +- runtime/src/iree/hal/drivers/init.c | 9 + tests/e2e/stablehlo_ops/BUILD.bazel | 105 +++++++++++ tests/e2e/stablehlo_ops/CMakeLists.txt | 162 +++++++++++++++++ tests/e2e/tosa_ops/BUILD.bazel | 89 ++++++++++ tests/e2e/tosa_ops/CMakeLists.txt | 122 +++++++++++++ 66 files changed, 894 insertions(+), 550 deletions(-) delete mode 100644 experimental/cuda2/registration/CMakeLists.txt delete mode 100644 experimental/cuda2/tests/CMakeLists.txt delete mode 100644 experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt delete mode 100644 experimental/cuda2/tests/tosa_ops/CMakeLists.txt create mode 100644 runtime/src/iree/hal/drivers/cuda2/BUILD.bazel rename {experimental => runtime/src/iree/hal/drivers}/cuda2/CMakeLists.txt (70%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/README.md (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/api.h (97%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cts/CMakeLists.txt (82%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_allocator.c (99%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_allocator.h (79%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_buffer.c (99%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_buffer.h (93%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_device.c (97%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_device.h (87%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_driver.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_dynamic_symbol_table.h (100%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_dynamic_symbols.c (94%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_dynamic_symbols.h (84%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_headers.h (64%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_status_util.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/cuda_status_util.h (91%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/dynamic_symbols_test.cc (95%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/event_pool.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/event_pool.h (94%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/event_semaphore.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/event_semaphore.h (85%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/graph_command_buffer.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/graph_command_buffer.h (84%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/memory_pools.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/memory_pools.h (95%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/native_executable.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/native_executable.h (84%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_channel.c (99%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_channel.h (84%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_dynamic_symbol_table.h (100%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_dynamic_symbols.c (96%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_dynamic_symbols.h (84%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_headers.h (65%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_status_util.c (91%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nccl_status_util.h (92%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nop_executable_cache.c (97%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/nop_executable_cache.h (74%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/pending_queue_actions.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/pending_queue_actions.h (91%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/pipeline_layout.c (99%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/pipeline_layout.h (96%) create mode 100644 runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel create mode 100644 runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt rename {experimental => runtime/src/iree/hal/drivers}/cuda2/registration/driver_module.c (97%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/registration/driver_module.h (74%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/stream_command_buffer.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/stream_command_buffer.h (82%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/timepoint_pool.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/timepoint_pool.h (95%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/tracing.c (98%) rename {experimental => runtime/src/iree/hal/drivers}/cuda2/tracing.h (95%) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index bd825f42c62bd..e1c6266c52d24 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -77,5 +77,6 @@ # Runtime /runtime/src/iree/ @benvanik /runtime/src/iree/hal/cts/ @ScottTodd +/runtime/src/iree/hal/drivers/cuda2/ @antiagainst /runtime/src/iree/hal/drivers/metal/ @antiagainst /runtime/src/iree/hal/drivers/vulkan/ @antiagainst @ScottTodd diff --git a/CMakeLists.txt b/CMakeLists.txt index 415f321559903..7d631849cec41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -232,8 +232,10 @@ option(IREE_HAL_DRIVER_DEFAULTS "Sets the default value for all runtime HAL driv # not cross compiling. Note: a CUDA-compatible GPU with drivers is still # required to actually run CUDA workloads. set(IREE_HAL_DRIVER_CUDA_DEFAULT ${IREE_HAL_DRIVER_DEFAULTS}) +set(IREE_HAL_DRIVER_CUDA2_DEFAULT ${IREE_HAL_DRIVER_DEFAULTS}) if(NOT IREE_CUDA_AVAILABLE OR CMAKE_CROSSCOMPILING) set(IREE_HAL_DRIVER_CUDA_DEFAULT OFF) + set(IREE_HAL_DRIVER_CUDA2_DEFAULT OFF) endif() # Vulkan support is enabled by default if the platform might support Vulkan. @@ -251,6 +253,7 @@ if(NOT APPLE OR NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64") endif() option(IREE_HAL_DRIVER_CUDA "Enables the 'cuda' runtime HAL driver" ${IREE_HAL_DRIVER_CUDA_DEFAULT}) +option(IREE_HAL_DRIVER_CUDA2 "Enables the 'cuda2' runtime HAL driver" ${IREE_HAL_DRIVER_CUDA2_DEFAULT}) option(IREE_HAL_DRIVER_LOCAL_SYNC "Enables the 'local-sync' runtime HAL driver" ${IREE_HAL_DRIVER_DEFAULTS}) option(IREE_HAL_DRIVER_LOCAL_TASK "Enables the 'local-task' runtime HAL driver" ${IREE_HAL_DRIVER_DEFAULTS}) option(IREE_HAL_DRIVER_VULKAN "Enables the 'vulkan' runtime HAL driver" ${IREE_HAL_DRIVER_VULKAN_DEFAULT}) @@ -306,6 +309,9 @@ message(STATUS "IREE HAL drivers:") if(IREE_HAL_DRIVER_CUDA) message(STATUS " - cuda") endif() +if(IREE_HAL_DRIVER_CUDA2) + message(STATUS " - cuda2") +endif() if(IREE_HAL_DRIVER_LOCAL_SYNC) message(STATUS " - local-sync") endif() @@ -341,16 +347,6 @@ if(IREE_HAL_EXECUTABLE_PLUGIN_SYSTEM_LIBRARY) message(STATUS " - system-library") endif() -#------------------------------------------------------------------------------- -# Experimental next-generation CUDA HAL driver -# Enable with: -DIREE_EXTERNAL_HAL_DRIVERS=cuda2 -#------------------------------------------------------------------------------- - -set(IREE_EXTERNAL_CUDA2_HAL_DRIVER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/experimental/cuda2") -set(IREE_EXTERNAL_CUDA2_HAL_DRIVER_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/experimental/cuda2") -set(IREE_EXTERNAL_CUDA2_HAL_DRIVER_TARGET "iree::experimental::cuda2::registration") -set(IREE_EXTERNAL_CUDA2_HAL_DRIVER_REGISTER "iree_hal_cuda2_driver_module_register") - #------------------------------------------------------------------------------- # Experimental ROCM HAL driver # Enable with: -DIREE_EXTERNAL_HAL_DRIVERS=rocm diff --git a/experimental/cuda2/registration/CMakeLists.txt b/experimental/cuda2/registration/CMakeLists.txt deleted file mode 100644 index 49b49c7c0477f..0000000000000 --- a/experimental/cuda2/registration/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_cc_library( - NAME - registration - HDRS - "driver_module.h" - SRCS - "driver_module.c" - DEPS - iree::base - iree::experimental::cuda2 - iree::hal - DEFINES - "IREE_HAVE_HAL_CUDA2_DRIVER_MODULE=1" - PUBLIC -) diff --git a/experimental/cuda2/tests/CMakeLists.txt b/experimental/cuda2/tests/CMakeLists.txt deleted file mode 100644 index 33551b5769745..0000000000000 --- a/experimental/cuda2/tests/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_add_all_subdirs() diff --git a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt deleted file mode 100644 index 482f15453a437..0000000000000 --- a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_check_single_backend_test_suite( - NAME - check_cuda2_graph - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/batch_norm_inference.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/bitcast_convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_in_dim.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/compare.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/complex.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/concatenate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/constant.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convolution.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/cosine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/divide.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_bf16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_general.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_update_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_fp16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_minus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/fft.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/finite.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/iota.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log_plus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/multiply.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/philox.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pow.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce_window.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/remainder.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reverse.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_normal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_uniform.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/round.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter_dynamic.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sort.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/subtract.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/three_fry.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/torch_index_select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=stablehlo" - # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now. - "--iree-stream-emulate-memset" - RUNNER_ARGS - "--cuda2_use_streams=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) - -iree_check_single_backend_test_suite( - NAME - check_cuda2_stream - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/batch_norm_inference.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/bitcast_convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_in_dim.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/compare.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/complex.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/concatenate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/constant.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convert.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convolution.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/cosine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/divide.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_bf16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_general.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_update_slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_fp16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_minus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/fft.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/finite.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/iota.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log_plus_one.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/multiply.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/philox.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pow.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce_window.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/remainder.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reverse.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_normal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_uniform.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/round.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter_dynamic.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sine.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/slice.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sort.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/subtract.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/three_fry.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/torch_index_select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=stablehlo" - RUNNER_ARGS - "--cuda2_use_streams=true" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) diff --git a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt deleted file mode 100644 index 88752fe04624c..0000000000000 --- a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -iree_check_single_backend_test_suite( - NAME - check_cuda2_graph - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/arithmetic_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_and.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_or.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_xor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/ceil.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clz.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/const.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/exp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/fully_connected.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater_equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/if.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_left_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift_16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/matmul.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/max_pool.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul.mlir" - # "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reciprocal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sigmoid.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sub.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/table.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=tosa" - # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now. - "--iree-stream-emulate-memset" - RUNNER_ARGS - "--cuda2_use_streams=false" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) - -iree_check_single_backend_test_suite( - NAME - check_cuda2_stream - SRCS - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/abs.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/add.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/arithmetic_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_and.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_or.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_xor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/ceil.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clamp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clz.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/const.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/exp.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/floor.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/fully_connected.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/gather.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater_equal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/if.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/log.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_left_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift_16.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/matmul.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/max_pool.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/maximum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/minimum.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul.mlir" - # "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul_shift.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/negate.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/pad.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reciprocal.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reduce.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reshape.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/rsqrt.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/select.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sigmoid.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sub.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/table.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/tanh.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/transpose.mlir" - "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/while.mlir" - TARGET_BACKEND - "cuda" - DRIVER - "cuda2" - COMPILER_FLAGS - "--iree-input-type=tosa" - RUNNER_ARGS - "--cuda2_use_streams=true" - LABELS - "noasan" - "nomsan" - "notsan" - "noubsan" - "requires-gpu-nvidia" -) diff --git a/runtime/src/iree/hal/drivers/BUILD.bazel b/runtime/src/iree/hal/drivers/BUILD.bazel index f7b05ddf7d835..4381637a85062 100644 --- a/runtime/src/iree/hal/drivers/BUILD.bazel +++ b/runtime/src/iree/hal/drivers/BUILD.bazel @@ -56,7 +56,10 @@ iree_runtime_cc_library( "//runtime/src/iree/base", "//runtime/src/iree/hal", ] + select({ - ":cuda_enabled": ["//runtime/src/iree/hal/drivers/cuda/registration"], + ":cuda_enabled": [ + "//runtime/src/iree/hal/drivers/cuda/registration", + "//runtime/src/iree/hal/drivers/cuda2/registration", + ], "//conditions:default": [], }) + select({ diff --git a/runtime/src/iree/hal/drivers/CMakeLists.txt b/runtime/src/iree/hal/drivers/CMakeLists.txt index ac9906f8ea5d2..a483308cfaa95 100644 --- a/runtime/src/iree/hal/drivers/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/CMakeLists.txt @@ -114,6 +114,10 @@ if(IREE_HAL_DRIVER_CUDA) add_subdirectory(cuda) list(APPEND _INIT_INTERNAL_DEPS iree::hal::drivers::cuda::registration) endif() +if(IREE_HAL_DRIVER_CUDA2) + add_subdirectory(cuda2) + list(APPEND _INIT_INTERNAL_DEPS iree::hal::drivers::cuda2::registration) +endif() if(IREE_HAL_DRIVER_LOCAL_SYNC) add_subdirectory(local_sync) list(APPEND _INIT_INTERNAL_DEPS iree::hal::drivers::local_sync::registration) diff --git a/runtime/src/iree/hal/drivers/cuda2/BUILD.bazel b/runtime/src/iree/hal/drivers/cuda2/BUILD.bazel new file mode 100644 index 0000000000000..71449887dcc1b --- /dev/null +++ b/runtime/src/iree/hal/drivers/cuda2/BUILD.bazel @@ -0,0 +1,115 @@ +# Copyright 2023 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_runtime_cc_library( + name = "cuda2", + srcs = [ + "api.h", + "cuda_allocator.c", + "cuda_allocator.h", + "cuda_buffer.c", + "cuda_buffer.h", + "cuda_device.c", + "cuda_device.h", + "cuda_driver.c", + "event_pool.c", + "event_pool.h", + "event_semaphore.c", + "event_semaphore.h", + "graph_command_buffer.c", + "graph_command_buffer.h", + "memory_pools.c", + "memory_pools.h", + "native_executable.c", + "native_executable.h", + "nccl_channel.c", + "nccl_channel.h", + "nop_executable_cache.c", + "nop_executable_cache.h", + "pending_queue_actions.c", + "pending_queue_actions.h", + "pipeline_layout.c", + "pipeline_layout.h", + "stream_command_buffer.c", + "stream_command_buffer.h", + "timepoint_pool.c", + "timepoint_pool.h", + "tracing.c", + "tracing.h", + ], + hdrs = [ + "api.h", + ], + deps = [ + ":dynamic_symbols", + "//runtime/src/iree/base", + "//runtime/src/iree/base:core_headers", + "//runtime/src/iree/base/internal", + "//runtime/src/iree/base/internal:arena", + "//runtime/src/iree/base/internal:event_pool", + "//runtime/src/iree/base/internal:synchronization", + "//runtime/src/iree/base/internal/flatcc:parsing", + "//runtime/src/iree/hal", + "//runtime/src/iree/hal/utils:buffer_transfer", + "//runtime/src/iree/hal/utils:collective_batch", + "//runtime/src/iree/hal/utils:deferred_command_buffer", + "//runtime/src/iree/hal/utils:file_transfer", + "//runtime/src/iree/hal/utils:memory_file", + "//runtime/src/iree/hal/utils:resource_set", + "//runtime/src/iree/hal/utils:semaphore_base", + "//runtime/src/iree/schemas:cuda_executable_def_c_fbs", + ], +) + +iree_runtime_cc_library( + name = "dynamic_symbols", + srcs = [ + "cuda_dynamic_symbols.c", + "cuda_headers.h", + "cuda_status_util.c", + "nccl_dynamic_symbols.c", + "nccl_headers.h", + "nccl_status_util.c", + ], + hdrs = [ + "cuda_dynamic_symbols.h", + "cuda_status_util.h", + "nccl_dynamic_symbols.h", + "nccl_status_util.h", + ], + textual_hdrs = [ + "cuda_dynamic_symbol_table.h", + "nccl_dynamic_symbol_table.h", + ], + deps = [ + "//runtime/src/iree/base", + "//runtime/src/iree/base/internal:dynamic_library", + "@iree_cuda//:headers", + "@nccl//:headers", + ], +) + +iree_runtime_cc_test( + name = "dynamic_symbols_test", + srcs = [ + "dynamic_symbols_test.cc", + ], + tags = ["driver=cuda2"], + deps = [ + ":dynamic_symbols", + "//runtime/src/iree/base", + "//runtime/src/iree/testing:gtest", + "//runtime/src/iree/testing:gtest_main", + ], +) diff --git a/experimental/cuda2/CMakeLists.txt b/runtime/src/iree/hal/drivers/cuda2/CMakeLists.txt similarity index 70% rename from experimental/cuda2/CMakeLists.txt rename to runtime/src/iree/hal/drivers/cuda2/CMakeLists.txt index d3033f0d3b525..c3ab754781c97 100644 --- a/experimental/cuda2/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/cuda2/CMakeLists.txt @@ -1,12 +1,12 @@ -# Copyright 2023 The IREE Authors -# -# Licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# Set the root for package namespacing to the current directory. -set(IREE_PACKAGE_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}") -set(IREE_PACKAGE_ROOT_PREFIX "iree/experimental/cuda2") +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# runtime/src/iree/hal/drivers/cuda2/BUILD.bazel # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ iree_add_all_subdirs() @@ -34,10 +34,10 @@ iree_cc_library( "memory_pools.h" "native_executable.c" "native_executable.h" - "nop_executable_cache.c" - "nop_executable_cache.h" "nccl_channel.c" "nccl_channel.h" + "nop_executable_cache.c" + "nop_executable_cache.h" "pending_queue_actions.c" "pending_queue_actions.h" "pipeline_layout.c" @@ -51,10 +51,12 @@ iree_cc_library( DEPS ::dynamic_symbols iree::base + iree::base::core_headers iree::base::internal iree::base::internal::arena iree::base::internal::event_pool iree::base::internal::flatcc::parsing + iree::base::internal::synchronization iree::hal iree::hal::utils::buffer_transfer iree::hal::utils::collective_batch @@ -106,3 +108,5 @@ iree_cc_test( LABELS "driver=cuda2" ) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/experimental/cuda2/README.md b/runtime/src/iree/hal/drivers/cuda2/README.md similarity index 98% rename from experimental/cuda2/README.md rename to runtime/src/iree/hal/drivers/cuda2/README.md index 1f2e8fbb0c894..58662cc6d1c20 100644 --- a/experimental/cuda2/README.md +++ b/runtime/src/iree/hal/drivers/cuda2/README.md @@ -3,10 +3,10 @@ This document lists technical details regarding the CUDA implemenation of IREE's [Hardware Abstraction Layer (HAL)][iree-hal], called a CUDA HAL driver. -Note that there is an existing CUDA HAL driver under the +Note that there is another CUDA HAL driver under the [`iree/hal/drivers/cuda/`][iree-cuda] directory; what this directory holds is a rewrite for it. Once this rewrite is mature enough, it will replace the -existing one. For the rewrite rationale, goals, and plans, please see +other one. For the rewrite rationale, goals, and plans, please see [Issue #13245][iree-cuda-rewrite]. ## Synchronization diff --git a/experimental/cuda2/api.h b/runtime/src/iree/hal/drivers/cuda2/api.h similarity index 97% rename from experimental/cuda2/api.h rename to runtime/src/iree/hal/drivers/cuda2/api.h index 403b3dcf7ee61..f2193c9f5ae76 100644 --- a/experimental/cuda2/api.h +++ b/runtime/src/iree/hal/drivers/cuda2/api.h @@ -6,8 +6,8 @@ // See iree/base/api.h for documentation on the API conventions used. -#ifndef IREE_EXPERIMENTAL_CUDA2_API_H_ -#define IREE_EXPERIMENTAL_CUDA2_API_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_API_H_ +#define IREE_HAL_DRIVERS_CUDA2_API_H_ #include "iree/base/api.h" #include "iree/hal/api.h" @@ -128,4 +128,4 @@ IREE_API_EXPORT iree_status_t iree_hal_cuda2_driver_create( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_API_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_API_H_ diff --git a/experimental/cuda2/cts/CMakeLists.txt b/runtime/src/iree/hal/drivers/cuda2/cts/CMakeLists.txt similarity index 82% rename from experimental/cuda2/cts/CMakeLists.txt rename to runtime/src/iree/hal/drivers/cuda2/cts/CMakeLists.txt index e48f470380c11..58e5411e6483b 100644 --- a/experimental/cuda2/cts/CMakeLists.txt +++ b/runtime/src/iree/hal/drivers/cuda2/cts/CMakeLists.txt @@ -10,7 +10,7 @@ iree_hal_cts_test_suite( VARIANT_SUFFIX graph DRIVER_REGISTRATION_HDR - "experimental/cuda2/registration/driver_module.h" + "runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h" DRIVER_REGISTRATION_FN "iree_hal_cuda2_driver_module_register" COMPILER_TARGET_BACKEND @@ -20,7 +20,7 @@ iree_hal_cts_test_suite( ARGS "--cuda2_use_streams=false" DEPS - iree::experimental::cuda2::registration + iree::hal::drivers::cuda2::registration EXCLUDED_TESTS # HAL event is unimplemented for now. "event" @@ -35,7 +35,7 @@ iree_hal_cts_test_suite( VARIANT_SUFFIX stream DRIVER_REGISTRATION_HDR - "experimental/cuda2/registration/driver_module.h" + "runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h" DRIVER_REGISTRATION_FN "iree_hal_cuda2_driver_module_register" COMPILER_TARGET_BACKEND @@ -45,7 +45,7 @@ iree_hal_cts_test_suite( ARGS "--cuda2_use_streams=true" DEPS - iree::experimental::cuda2::registration + iree::hal::drivers::cuda2::registration EXCLUDED_TESTS # HAL event is unimplemented for now. "event" diff --git a/experimental/cuda2/cuda_allocator.c b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.c similarity index 99% rename from experimental/cuda2/cuda_allocator.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_allocator.c index 88550e6090a44..cf3452f286a07 100644 --- a/experimental/cuda2/cuda_allocator.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_allocator.h" +#include "iree/hal/drivers/cuda2/cuda_allocator.h" #include -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING static const char* IREE_HAL_CUDA_ALLOCATOR_ID = "CUDA2 unpooled"; diff --git a/experimental/cuda2/cuda_allocator.h b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.h similarity index 79% rename from experimental/cuda2/cuda_allocator.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_allocator.h index 8fb1bf193ad5e..767951072e8d8 100644 --- a/experimental/cuda2/cuda_allocator.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_allocator.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_CUDA_ALLOCATOR_H_ -#define EXPERIMENTAL_CUDA2_CUDA_ALLOCATOR_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_ALLOCATOR_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_ALLOCATOR_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/memory_pools.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/memory_pools.h" #ifdef __cplusplus extern "C" { @@ -30,4 +30,4 @@ iree_status_t iree_hal_cuda2_allocator_create( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_CUDA_ALLOCATOR_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_ALLOCATOR_H_ diff --git a/experimental/cuda2/cuda_buffer.c b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.c similarity index 99% rename from experimental/cuda2/cuda_buffer.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_buffer.c index d1d017fed2024..578aeb5861a6c 100644 --- a/experimental/cuda2/cuda_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.c @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" #include #include diff --git a/experimental/cuda2/cuda_buffer.h b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.h similarity index 93% rename from experimental/cuda2/cuda_buffer.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_buffer.h index dfeed297a5a9a..075776d2a9e4b 100644 --- a/experimental/cuda2/cuda_buffer.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_buffer.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_CUDA_BUFFER_H_ -#define EXPERIMENTAL_CUDA2_CUDA_BUFFER_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_BUFFER_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_BUFFER_H_ -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -66,4 +66,4 @@ void iree_hal_cuda2_buffer_drop_release_callback(iree_hal_buffer_t* buffer); } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_CUDA_BUFFER_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_BUFFER_H_ diff --git a/experimental/cuda2/cuda_device.c b/runtime/src/iree/hal/drivers/cuda2/cuda_device.c similarity index 97% rename from experimental/cuda2/cuda_device.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_device.c index 407b91a37b807..5c315be320196 100644 --- a/experimental/cuda2/cuda_device.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_device.c @@ -4,30 +4,31 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_device.h" +#include "iree/hal/drivers/cuda2/cuda_device.h" #include #include #include -#include "experimental/cuda2/cuda_allocator.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/event_pool.h" -#include "experimental/cuda2/event_semaphore.h" -#include "experimental/cuda2/graph_command_buffer.h" -#include "experimental/cuda2/memory_pools.h" -#include "experimental/cuda2/nccl_channel.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/nop_executable_cache.h" -#include "experimental/cuda2/pending_queue_actions.h" -#include "experimental/cuda2/pipeline_layout.h" -#include "experimental/cuda2/stream_command_buffer.h" -#include "experimental/cuda2/timepoint_pool.h" -#include "experimental/cuda2/tracing.h" #include "iree/base/internal/arena.h" #include "iree/base/internal/event_pool.h" #include "iree/base/internal/math.h" +#include "iree/hal/drivers/cuda2/cuda_allocator.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/event_pool.h" +#include "iree/hal/drivers/cuda2/event_semaphore.h" +#include "iree/hal/drivers/cuda2/graph_command_buffer.h" +#include "iree/hal/drivers/cuda2/memory_pools.h" +#include "iree/hal/drivers/cuda2/nccl_channel.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nop_executable_cache.h" +#include "iree/hal/drivers/cuda2/pending_queue_actions.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" +#include "iree/hal/drivers/cuda2/stream_command_buffer.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" +#include "iree/hal/drivers/cuda2/tracing.h" #include "iree/hal/utils/buffer_transfer.h" #include "iree/hal/utils/deferred_command_buffer.h" #include "iree/hal/utils/file_transfer.h" diff --git a/experimental/cuda2/cuda_device.h b/runtime/src/iree/hal/drivers/cuda2/cuda_device.h similarity index 87% rename from experimental/cuda2/cuda_device.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_device.h index 39d86e877da6a..4c32cc7c7e3a2 100644 --- a/experimental/cuda2/cuda_device.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_device.h @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_CUDA_DEVICE_H_ -#define EXPERIMENTAL_CUDA2_CUDA_DEVICE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_DEVICE_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_DEVICE_H_ -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -55,4 +55,4 @@ const iree_hal_cuda2_dynamic_symbols_t* iree_hal_cuda2_device_dynamic_symbols( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_CUDA_DEVICE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_DEVICE_H_ diff --git a/experimental/cuda2/cuda_driver.c b/runtime/src/iree/hal/drivers/cuda2/cuda_driver.c similarity index 98% rename from experimental/cuda2/cuda_driver.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_driver.c index 4fab5596185fa..79632a3c02b1a 100644 --- a/experimental/cuda2/cuda_driver.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_driver.c @@ -7,14 +7,14 @@ #include #include -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_device.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/nccl_status_util.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_device.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" // Maximum device name length supported by the CUDA HAL driver. #define IREE_HAL_CUDA_MAX_DEVICE_NAME_LENGTH 128 diff --git a/experimental/cuda2/cuda_dynamic_symbol_table.h b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h similarity index 100% rename from experimental/cuda2/cuda_dynamic_symbol_table.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h diff --git a/experimental/cuda2/cuda_dynamic_symbols.c b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.c similarity index 94% rename from experimental/cuda2/cuda_dynamic_symbols.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.c index fa6580969eedf..643fac1a243b4 100644 --- a/experimental/cuda2/cuda_dynamic_symbols.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.c @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #include -#include "experimental/cuda2/cuda_status_util.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" static const char* iree_hal_cuda_dylib_names[] = { #if defined(IREE_PLATFORM_WINDOWS) @@ -43,7 +43,7 @@ static iree_status_t iree_hal_cuda2_dynamic_symbols_resolve_all( CU_GET_PROC_ADDRESS_DEFAULT), \ "when resolving " #cuda_symbol_name " using cuGetProcAddress"); \ } -#include "experimental/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: keep +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: keep #undef IREE_CU_PFN_DECL return iree_ok_status(); } diff --git a/experimental/cuda2/cuda_dynamic_symbols.h b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.h similarity index 84% rename from experimental/cuda2/cuda_dynamic_symbols.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.h index 5bed39251532f..c8e305a23c357 100644 --- a/experimental/cuda2/cuda_dynamic_symbols.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_dynamic_symbols.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ -#define IREE_EXPERIMENTAL_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -28,7 +28,7 @@ typedef struct iree_hal_cuda2_dynamic_symbols_t { // Concrete CUDA symbols defined by including the `dynamic_symbol_tables.h`. #define IREE_CU_PFN_DECL(cudaSymbolName, ...) \ CUresult (*cudaSymbolName)(__VA_ARGS__); -#include "experimental/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: export +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbol_table.h" // IWYU pragma: export #undef IREE_CU_PFN_DECL } iree_hal_cuda2_dynamic_symbols_t; @@ -49,4 +49,4 @@ void iree_hal_cuda2_dynamic_symbols_deinitialize( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_DYNAMIC_SYMBOLS_H_ diff --git a/experimental/cuda2/cuda_headers.h b/runtime/src/iree/hal/drivers/cuda2/cuda_headers.h similarity index 64% rename from experimental/cuda2/cuda_headers.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_headers.h index 20ed2686595c5..7f8dd8ed19f98 100644 --- a/experimental/cuda2/cuda_headers.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_headers.h @@ -4,9 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_CUDA_HEADERS_H_ -#define IREE_EXPERIMENTAL_CUDA2_CUDA_HEADERS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_HEADERS_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_HEADERS_H_ #include "cuda.h" // IWYU pragma: export -#endif // IREE_EXPERIMENTAL_CUDA2_CUDA_HEADERS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_HEADERS_H_ diff --git a/experimental/cuda2/cuda_status_util.c b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.c similarity index 98% rename from experimental/cuda2/cuda_status_util.c rename to runtime/src/iree/hal/drivers/cuda2/cuda_status_util.c index c8d57b50377b0..b6abec1b6a3f7 100644 --- a/experimental/cuda2/cuda_status_util.c +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.c @@ -4,12 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "iree/base/status.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" // The list of CUDA error strings with their corresponding IREE error state // classification. diff --git a/experimental/cuda2/cuda_status_util.h b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.h similarity index 91% rename from experimental/cuda2/cuda_status_util.h rename to runtime/src/iree/hal/drivers/cuda2/cuda_status_util.h index 75c6057ad7bf9..2c0fe4e08fbb4 100644 --- a/experimental/cuda2/cuda_status_util.h +++ b/runtime/src/iree/hal/drivers/cuda2/cuda_status_util.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_CUDA_STATUS_UTIL_H_ -#define IREE_EXPERIMENTAL_CUDA2_CUDA_STATUS_UTIL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_CUDA_STATUS_UTIL_H_ +#define IREE_HAL_DRIVERS_CUDA2_CUDA_STATUS_UTIL_H_ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -65,4 +65,4 @@ iree_status_t iree_hal_cuda2_result_to_status( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_CUDA_STATUS_UTIL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_CUDA_STATUS_UTIL_H_ diff --git a/experimental/cuda2/dynamic_symbols_test.cc b/runtime/src/iree/hal/drivers/cuda2/dynamic_symbols_test.cc similarity index 95% rename from experimental/cuda2/dynamic_symbols_test.cc rename to runtime/src/iree/hal/drivers/cuda2/dynamic_symbols_test.cc index 9a40e39862272..b1bd4556edce4 100644 --- a/experimental/cuda2/dynamic_symbols_test.cc +++ b/runtime/src/iree/hal/drivers/cuda2/dynamic_symbols_test.cc @@ -6,9 +6,9 @@ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #include "iree/testing/gtest.h" namespace iree { diff --git a/experimental/cuda2/event_pool.c b/runtime/src/iree/hal/drivers/cuda2/event_pool.c similarity index 98% rename from experimental/cuda2/event_pool.c rename to runtime/src/iree/hal/drivers/cuda2/event_pool.c index 5ca26ef25b3ca..dd1d45798b5a2 100644 --- a/experimental/cuda2/event_pool.c +++ b/runtime/src/iree/hal/drivers/cuda2/event_pool.c @@ -4,18 +4,18 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/event_pool.h" +#include "iree/hal/drivers/cuda2/event_pool.h" #include #include #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" #include "iree/base/api.h" #include "iree/base/internal/atomics.h" #include "iree/base/internal/synchronization.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" //===----------------------------------------------------------------------===// // iree_hal_cuda2_event_t diff --git a/experimental/cuda2/event_pool.h b/runtime/src/iree/hal/drivers/cuda2/event_pool.h similarity index 94% rename from experimental/cuda2/event_pool.h rename to runtime/src/iree/hal/drivers/cuda2/event_pool.h index f1e49744bfb22..b9a5e3bca7bbe 100644 --- a/experimental/cuda2/event_pool.h +++ b/runtime/src/iree/hal/drivers/cuda2/event_pool.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_EVENT_POOL_H_ -#define EXPERIMENTAL_CUDA2_EVENT_POOL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_EVENT_POOL_H_ +#define IREE_HAL_DRIVERS_CUDA2_EVENT_POOL_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -79,4 +79,4 @@ iree_status_t iree_hal_cuda2_event_pool_acquire( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_EVENT_POOL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_EVENT_POOL_H_ diff --git a/experimental/cuda2/event_semaphore.c b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.c similarity index 98% rename from experimental/cuda2/event_semaphore.c rename to runtime/src/iree/hal/drivers/cuda2/event_semaphore.c index 75942bed9e727..2f120ff5fdc97 100644 --- a/experimental/cuda2/event_semaphore.c +++ b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.c @@ -4,13 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/event_semaphore.h" +#include "iree/hal/drivers/cuda2/event_semaphore.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/timepoint_pool.h" #include "iree/base/internal/synchronization.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" #include "iree/hal/utils/semaphore_base.h" typedef struct iree_hal_cuda2_semaphore_t { diff --git a/experimental/cuda2/event_semaphore.h b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.h similarity index 85% rename from experimental/cuda2/event_semaphore.h rename to runtime/src/iree/hal/drivers/cuda2/event_semaphore.h index ec49704bd96cd..2c73e3d5a16e2 100644 --- a/experimental/cuda2/event_semaphore.h +++ b/runtime/src/iree/hal/drivers/cuda2/event_semaphore.h @@ -4,16 +4,16 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_EVENT_SEMAPHORE_H_ -#define EXPERIMENTAL_CUDA2_EVENT_SEMAPHORE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_EVENT_SEMAPHORE_H_ +#define IREE_HAL_DRIVERS_CUDA2_EVENT_SEMAPHORE_H_ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/pending_queue_actions.h" -#include "experimental/cuda2/timepoint_pool.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/pending_queue_actions.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" #ifdef __cplusplus extern "C" { @@ -53,4 +53,4 @@ iree_status_t iree_hal_cuda2_event_semaphore_acquire_timepoint_device_wait( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_EVENT_SEMAPHORE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_EVENT_SEMAPHORE_H_ diff --git a/experimental/cuda2/graph_command_buffer.c b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.c similarity index 98% rename from experimental/cuda2/graph_command_buffer.c rename to runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.c index fd58217e2e44a..2921487f6044e 100644 --- a/experimental/cuda2/graph_command_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.c @@ -4,17 +4,17 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/graph_command_buffer.h" +#include "iree/hal/drivers/cuda2/graph_command_buffer.h" #include #include -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/native_executable.h" -#include "experimental/cuda2/pipeline_layout.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/native_executable.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" #include "iree/hal/utils/collective_batch.h" #include "iree/hal/utils/resource_set.h" diff --git a/experimental/cuda2/graph_command_buffer.h b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.h similarity index 84% rename from experimental/cuda2/graph_command_buffer.h rename to runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.h index 413e46ad70f1b..cf10c5e4e070b 100644 --- a/experimental/cuda2/graph_command_buffer.h +++ b/runtime/src/iree/hal/drivers/cuda2/graph_command_buffer.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_GRAPH_COMMAND_BUFFER_H_ -#define EXPERIMENTAL_CUDA2_GRAPH_COMMAND_BUFFER_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_GRAPH_COMMAND_BUFFER_H_ +#define IREE_HAL_DRIVERS_CUDA2_GRAPH_COMMAND_BUFFER_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -44,4 +44,4 @@ CUgraphExec iree_hal_cuda2_graph_command_buffer_handle( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_GRAPH_COMMAND_BUFFER_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_GRAPH_COMMAND_BUFFER_H_ diff --git a/experimental/cuda2/memory_pools.c b/runtime/src/iree/hal/drivers/cuda2/memory_pools.c similarity index 98% rename from experimental/cuda2/memory_pools.c rename to runtime/src/iree/hal/drivers/cuda2/memory_pools.c index 84b38d01fbd3e..5da268da143df 100644 --- a/experimental/cuda2/memory_pools.c +++ b/runtime/src/iree/hal/drivers/cuda2/memory_pools.c @@ -4,11 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/memory_pools.h" +#include "iree/hal/drivers/cuda2/memory_pools.h" -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" // NOTE: these are currently global for all devices; we could make // device-specific ones by malloc() and leaking (with LSAN note) unique string diff --git a/experimental/cuda2/memory_pools.h b/runtime/src/iree/hal/drivers/cuda2/memory_pools.h similarity index 95% rename from experimental/cuda2/memory_pools.h rename to runtime/src/iree/hal/drivers/cuda2/memory_pools.h index 9c1e59b0c0d0e..7cb0d7a453da0 100644 --- a/experimental/cuda2/memory_pools.h +++ b/runtime/src/iree/hal/drivers/cuda2/memory_pools.h @@ -7,12 +7,12 @@ #ifndef IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_ #define IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_ -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/internal/atomics.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { diff --git a/experimental/cuda2/native_executable.c b/runtime/src/iree/hal/drivers/cuda2/native_executable.c similarity index 98% rename from experimental/cuda2/native_executable.c rename to runtime/src/iree/hal/drivers/cuda2/native_executable.c index db74f75a0a333..ce252a3ed858f 100644 --- a/experimental/cuda2/native_executable.c +++ b/runtime/src/iree/hal/drivers/cuda2/native_executable.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/native_executable.h" +#include "iree/hal/drivers/cuda2/native_executable.h" #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/pipeline_layout.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" // flatcc schemas: #include "iree/base/internal/flatcc/parsing.h" diff --git a/experimental/cuda2/native_executable.h b/runtime/src/iree/hal/drivers/cuda2/native_executable.h similarity index 84% rename from experimental/cuda2/native_executable.h rename to runtime/src/iree/hal/drivers/cuda2/native_executable.h index 2b633fe529acb..10a942e2a4ccc 100644 --- a/experimental/cuda2/native_executable.h +++ b/runtime/src/iree/hal/drivers/cuda2/native_executable.h @@ -4,16 +4,16 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_NATIVE_EXECUTABLE_H_ -#define EXPERIMENTAL_CUDA2_NATIVE_EXECUTABLE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NATIVE_EXECUTABLE_H_ +#define IREE_HAL_DRIVERS_CUDA2_NATIVE_EXECUTABLE_H_ #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/tracing.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -47,4 +47,4 @@ iree_status_t iree_hal_cuda2_native_executable_entry_point_kernel_info( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_NATIVE_EXECUTABLE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NATIVE_EXECUTABLE_H_ diff --git a/experimental/cuda2/nccl_channel.c b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.c similarity index 99% rename from experimental/cuda2/nccl_channel.c rename to runtime/src/iree/hal/drivers/cuda2/nccl_channel.c index d8a6584557416..9df206aac16de 100644 --- a/experimental/cuda2/nccl_channel.c +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.c @@ -4,17 +4,16 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nccl_channel.h" +#include "iree/hal/drivers/cuda2/nccl_channel.h" #include #include -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/nccl_headers.h" -#include "experimental/cuda2/nccl_status_util.h" #include "iree/base/api.h" #include "iree/base/tracing.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" typedef struct iree_hal_cuda2_nccl_channel_t { iree_hal_resource_t resource; diff --git a/experimental/cuda2/nccl_channel.h b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.h similarity index 84% rename from experimental/cuda2/nccl_channel.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_channel.h index ad8bfef68667e..ddfc2b18f18a2 100644 --- a/experimental/cuda2/nccl_channel.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_channel.h @@ -4,16 +4,15 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_NCCL_CHANNEL_H_ -#define EXPERIMENTAL_CUDA2_NCCL_CHANNEL_H_ - -#include "experimental/cuda2/api.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/tracing.h" +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_CHANNEL_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_CHANNEL_H_ + #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/tracing.h" #include "iree/hal/utils/collective_batch.h" #ifdef __cplusplus @@ -56,4 +55,4 @@ iree_status_t iree_hal_cuda2_nccl_submit_batch( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_NCCL_CHANNEL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_CHANNEL_H_ diff --git a/experimental/cuda2/nccl_dynamic_symbol_table.h b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h similarity index 100% rename from experimental/cuda2/nccl_dynamic_symbol_table.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h diff --git a/experimental/cuda2/nccl_dynamic_symbols.c b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.c similarity index 96% rename from experimental/cuda2/nccl_dynamic_symbols.c rename to runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.c index 256edbf8d70bc..607a189fa14ba 100644 --- a/experimental/cuda2/nccl_dynamic_symbols.c +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.c @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #include -#include "experimental/cuda2/nccl_status_util.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" static const char* iree_hal_cuda_nccl_dylib_names[] = { #if defined(IREE_PLATFORM_WINDOWS) @@ -36,7 +36,7 @@ static iree_status_t iree_hal_cuda2_nccl_dynamic_symbols_resolve_all( IREE_RETURN_IF_ERROR(iree_dynamic_library_lookup_symbol( \ syms->dylib, name, (void**)&syms->nccl_symbol_name)); \ } -#include "experimental/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: keep +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: keep #undef IREE_NCCL_PFN_DECL #undef IREE_NCCL_PFN_DECL_STR_RETURN return iree_ok_status(); diff --git a/experimental/cuda2/nccl_dynamic_symbols.h b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.h similarity index 84% rename from experimental/cuda2/nccl_dynamic_symbols.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.h index e15253ffb96be..23a4e2e532d57 100644 --- a/experimental/cuda2/nccl_dynamic_symbols.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_dynamic_symbols.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ -#define IREE_EXPERIMENTAL_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/nccl_headers.h" #include "iree/base/api.h" #include "iree/base/internal/dynamic_library.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/nccl_headers.h" #ifdef __cplusplus extern "C" { @@ -31,7 +31,7 @@ typedef struct iree_hal_cuda2_nccl_dynamic_symbols_t { ncclResult_t (*ncclSymbolName)(__VA_ARGS__); #define IREE_NCCL_PFN_DECL_STR_RETURN(ncclSymbolName, ...) \ const char* (*ncclSymbolName)(__VA_ARGS__); -#include "experimental/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: export +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbol_table.h" // IWYU pragma: export #undef IREE_NCCL_PFN_DECL #undef IREE_NCCL_PFN_DECL_STR_RETURN } iree_hal_cuda2_nccl_dynamic_symbols_t; @@ -54,4 +54,4 @@ void iree_hal_cuda2_nccl_dynamic_symbols_deinitialize( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_DYNAMIC_SYMBOLS_H_ diff --git a/experimental/cuda2/nccl_headers.h b/runtime/src/iree/hal/drivers/cuda2/nccl_headers.h similarity index 65% rename from experimental/cuda2/nccl_headers.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_headers.h index 973c6a4f65df0..09af6ce2634c8 100644 --- a/experimental/cuda2/nccl_headers.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_headers.h @@ -4,9 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_NCCL_HEADERS_H_ -#define IREE_EXPERIMENTAL_CUDA2_NCCL_HEADERS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_HEADERS_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_HEADERS_H_ #include "third_party/nccl/nccl.h" // IWYU pragma: export -#endif // IREE_EXPERIMENTAL_CUDA2_NCCL_HEADERS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_HEADERS_H_ diff --git a/experimental/cuda2/nccl_status_util.c b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.c similarity index 91% rename from experimental/cuda2/nccl_status_util.c rename to runtime/src/iree/hal/drivers/cuda2/nccl_status_util.c index 81ca09751a7e6..5d1779ce91a66 100644 --- a/experimental/cuda2/nccl_status_util.c +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.c @@ -4,12 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nccl_status_util.h" +#include "iree/hal/drivers/cuda2/nccl_status_util.h" #include -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "iree/base/status.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" iree_status_t iree_hal_cuda2_nccl_result_to_status( const iree_hal_cuda2_nccl_dynamic_symbols_t* syms, ncclResult_t result, diff --git a/experimental/cuda2/nccl_status_util.h b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.h similarity index 92% rename from experimental/cuda2/nccl_status_util.h rename to runtime/src/iree/hal/drivers/cuda2/nccl_status_util.h index 65db9a9c6123a..b2d6905526252 100644 --- a/experimental/cuda2/nccl_status_util.h +++ b/runtime/src/iree/hal/drivers/cuda2/nccl_status_util.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_NCCL_STATUS_UTIL_H_ -#define IREE_EXPERIMENTAL_CUDA2_NCCL_STATUS_UTIL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NCCL_STATUS_UTIL_H_ +#define IREE_HAL_DRIVERS_CUDA2_NCCL_STATUS_UTIL_H_ #include -#include "experimental/cuda2/nccl_dynamic_symbols.h" #include "iree/base/api.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -66,4 +66,4 @@ iree_status_t iree_hal_cuda2_nccl_result_to_status( } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_NCCL_STATUS_UTIL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NCCL_STATUS_UTIL_H_ diff --git a/experimental/cuda2/nop_executable_cache.c b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.c similarity index 97% rename from experimental/cuda2/nop_executable_cache.c rename to runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.c index ce63c1c0e46f1..284f9562466c9 100644 --- a/experimental/cuda2/nop_executable_cache.c +++ b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/nop_executable_cache.h" +#include "iree/hal/drivers/cuda2/nop_executable_cache.h" #include #include -#include "experimental/cuda2/native_executable.h" #include "iree/base/api.h" #include "iree/base/tracing.h" +#include "iree/hal/drivers/cuda2/native_executable.h" typedef struct iree_hal_cuda2_nop_executable_cache_t { // Abstract resource used for injecting reference counting and vtable; diff --git a/experimental/cuda2/nop_executable_cache.h b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.h similarity index 74% rename from experimental/cuda2/nop_executable_cache.h rename to runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.h index 92e66acd669fa..a2424b4c9ce9a 100644 --- a/experimental/cuda2/nop_executable_cache.h +++ b/runtime/src/iree/hal/drivers/cuda2/nop_executable_cache.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_NOP_EXECUTABLE_CACHE_H_ -#define EXPERIMENTAL_CUDA2_NOP_EXECUTABLE_CACHE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_NOP_EXECUTABLE_CACHE_H_ +#define IREE_HAL_DRIVERS_CUDA2_NOP_EXECUTABLE_CACHE_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -29,4 +29,4 @@ iree_status_t iree_hal_cuda2_nop_executable_cache_create( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_NOP_EXECUTABLE_CACHE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_NOP_EXECUTABLE_CACHE_H_ diff --git a/experimental/cuda2/pending_queue_actions.c b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.c similarity index 98% rename from experimental/cuda2/pending_queue_actions.c rename to runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.c index bb80d6f13fb5f..9806b83d0b03b 100644 --- a/experimental/cuda2/pending_queue_actions.c +++ b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.c @@ -4,19 +4,19 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/pending_queue_actions.h" +#include "iree/hal/drivers/cuda2/pending_queue_actions.h" #include -#include "experimental/cuda2/cuda_device.h" -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/event_semaphore.h" -#include "experimental/cuda2/graph_command_buffer.h" #include "iree/base/api.h" #include "iree/base/internal/arena.h" #include "iree/base/internal/synchronization.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_device.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/event_semaphore.h" +#include "iree/hal/drivers/cuda2/graph_command_buffer.h" #include "iree/hal/utils/deferred_command_buffer.h" #include "iree/hal/utils/resource_set.h" diff --git a/experimental/cuda2/pending_queue_actions.h b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.h similarity index 91% rename from experimental/cuda2/pending_queue_actions.h rename to runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.h index 036c063030eec..1484c2bda8ffe 100644 --- a/experimental/cuda2/pending_queue_actions.h +++ b/runtime/src/iree/hal/drivers/cuda2/pending_queue_actions.h @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_ -#define EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_PENDING_QUEUE_ACTIONS_H_ +#define IREE_HAL_DRIVERS_CUDA2_PENDING_QUEUE_ACTIONS_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" #include "iree/base/api.h" #include "iree/base/internal/arena.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" #ifdef __cplusplus extern "C" { @@ -63,4 +63,4 @@ iree_status_t iree_hal_cuda2_pending_queue_actions_issue( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_PENDING_QUEUE_ACTIONS_H_ diff --git a/experimental/cuda2/pipeline_layout.c b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.c similarity index 99% rename from experimental/cuda2/pipeline_layout.c rename to runtime/src/iree/hal/drivers/cuda2/pipeline_layout.c index bd64d037d903e..7383d78f0f1cb 100644 --- a/experimental/cuda2/pipeline_layout.c +++ b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.c @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/pipeline_layout.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" #include diff --git a/experimental/cuda2/pipeline_layout.h b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.h similarity index 96% rename from experimental/cuda2/pipeline_layout.h rename to runtime/src/iree/hal/drivers/cuda2/pipeline_layout.h index f2d48c80dc9b8..b7b3cf7d5a121 100644 --- a/experimental/cuda2/pipeline_layout.h +++ b/runtime/src/iree/hal/drivers/cuda2/pipeline_layout.h @@ -4,8 +4,8 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_PIPELINE_LAYOUT_H_ -#define EXPERIMENTAL_CUDA2_PIPELINE_LAYOUT_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_PIPELINE_LAYOUT_H_ +#define IREE_HAL_DRIVERS_CUDA2_PIPELINE_LAYOUT_H_ #include "iree/base/api.h" #include "iree/hal/api.h" @@ -107,4 +107,4 @@ iree_host_size_t iree_hal_cuda2_pipeline_layout_push_constant_count( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_PIPELINE_LAYOUT_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_PIPELINE_LAYOUT_H_ diff --git a/runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel b/runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel new file mode 100644 index 0000000000000..78ee071af0dfc --- /dev/null +++ b/runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel @@ -0,0 +1,32 @@ +# Copyright 2022 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_library") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_runtime_cc_library( + name = "registration", + srcs = [ + "driver_module.c", + ], + hdrs = [ + "driver_module.h", + ], + defines = [ + "IREE_HAVE_HAL_CUDA2_DRIVER_MODULE=1", + ], + deps = [ + "//runtime/src/iree/base", + "//runtime/src/iree/base/internal:flags", + "//runtime/src/iree/hal", + "//runtime/src/iree/hal/drivers/cuda2", + ], +) diff --git a/runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt b/runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt new file mode 100644 index 0000000000000..b27cde34fcea1 --- /dev/null +++ b/runtime/src/iree/hal/drivers/cuda2/registration/CMakeLists.txt @@ -0,0 +1,30 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# runtime/src/iree/hal/drivers/cuda2/registration/BUILD.bazel # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_cc_library( + NAME + registration + HDRS + "driver_module.h" + SRCS + "driver_module.c" + DEPS + iree::base + iree::base::internal::flags + iree::hal + iree::hal::drivers::cuda2 + DEFINES + "IREE_HAVE_HAL_CUDA2_DRIVER_MODULE=1" + PUBLIC +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/experimental/cuda2/registration/driver_module.c b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.c similarity index 97% rename from experimental/cuda2/registration/driver_module.c rename to runtime/src/iree/hal/drivers/cuda2/registration/driver_module.c index 166286ffce569..da3fb55e77789 100644 --- a/experimental/cuda2/registration/driver_module.c +++ b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.c @@ -4,14 +4,14 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/registration/driver_module.h" +#include "iree/hal/drivers/cuda2/registration/driver_module.h" #include #include -#include "experimental/cuda2/api.h" #include "iree/base/api.h" #include "iree/base/internal/flags.h" +#include "iree/hal/drivers/cuda2/api.h" IREE_FLAG( bool, cuda2_use_streams, false, diff --git a/experimental/cuda2/registration/driver_module.h b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h similarity index 74% rename from experimental/cuda2/registration/driver_module.h rename to runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h index c92643da78cd9..996c9b83ebfbd 100644 --- a/experimental/cuda2/registration/driver_module.h +++ b/runtime/src/iree/hal/drivers/cuda2/registration/driver_module.h @@ -4,8 +4,8 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_EXPERIMENTAL_CUDA2_REGISTRATION_DRIVER_MODULE_H_ -#define IREE_EXPERIMENTAL_CUDA2_REGISTRATION_DRIVER_MODULE_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_REGISTRATION_DRIVER_MODULE_H_ +#define IREE_HAL_DRIVERS_CUDA2_REGISTRATION_DRIVER_MODULE_H_ #include "iree/base/api.h" #include "iree/hal/api.h" @@ -22,4 +22,4 @@ iree_hal_cuda2_driver_module_register(iree_hal_driver_registry_t* registry); } // extern "C" #endif // __cplusplus -#endif // IREE_EXPERIMENTAL_CUDA2_REGISTRATION_DRIVER_MODULE_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_REGISTRATION_DRIVER_MODULE_H_ diff --git a/experimental/cuda2/stream_command_buffer.c b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.c similarity index 98% rename from experimental/cuda2/stream_command_buffer.c rename to runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.c index 1dec100f8457e..c90b798e5eea3 100644 --- a/experimental/cuda2/stream_command_buffer.c +++ b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.c @@ -4,13 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/stream_command_buffer.h" +#include "iree/hal/drivers/cuda2/stream_command_buffer.h" -#include "experimental/cuda2/cuda_buffer.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/native_executable.h" -#include "experimental/cuda2/nccl_channel.h" -#include "experimental/cuda2/pipeline_layout.h" +#include "iree/hal/drivers/cuda2/cuda_buffer.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/native_executable.h" +#include "iree/hal/drivers/cuda2/nccl_channel.h" +#include "iree/hal/drivers/cuda2/pipeline_layout.h" #include "iree/hal/utils/collective_batch.h" #include "iree/hal/utils/resource_set.h" diff --git a/experimental/cuda2/stream_command_buffer.h b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.h similarity index 82% rename from experimental/cuda2/stream_command_buffer.h rename to runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.h index 65448563ca7b1..ae4cd0d643cc4 100644 --- a/experimental/cuda2/stream_command_buffer.h +++ b/runtime/src/iree/hal/drivers/cuda2/stream_command_buffer.h @@ -4,15 +4,15 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_STREAM_COMMAND_BUFFER_H_ -#define EXPERIMENTAL_CUDA2_STREAM_COMMAND_BUFFER_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_STREAM_COMMAND_BUFFER_H_ +#define IREE_HAL_DRIVERS_CUDA2_STREAM_COMMAND_BUFFER_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" -#include "experimental/cuda2/nccl_dynamic_symbols.h" -#include "experimental/cuda2/tracing.h" #include "iree/base/internal/arena.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" +#include "iree/hal/drivers/cuda2/nccl_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/tracing.h" #ifdef __cplusplus extern "C" { @@ -48,4 +48,4 @@ bool iree_hal_cuda2_stream_command_buffer_isa( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_STREAM_COMMAND_BUFFER_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_STREAM_COMMAND_BUFFER_H_ diff --git a/experimental/cuda2/timepoint_pool.c b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.c similarity index 98% rename from experimental/cuda2/timepoint_pool.c rename to runtime/src/iree/hal/drivers/cuda2/timepoint_pool.c index 535406f46a5c9..a3225531f337f 100644 --- a/experimental/cuda2/timepoint_pool.c +++ b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.c @@ -4,20 +4,20 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/timepoint_pool.h" +#include "iree/hal/drivers/cuda2/timepoint_pool.h" #include #include #include -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" -#include "experimental/cuda2/event_pool.h" #include "iree/base/api.h" #include "iree/base/internal/atomics.h" #include "iree/base/internal/event_pool.h" #include "iree/base/internal/synchronization.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/event_pool.h" #include "iree/hal/utils/semaphore_base.h" //===----------------------------------------------------------------------===// diff --git a/experimental/cuda2/timepoint_pool.h b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.h similarity index 95% rename from experimental/cuda2/timepoint_pool.h rename to runtime/src/iree/hal/drivers/cuda2/timepoint_pool.h index b0d71f5a1fbbb..12ad5d9fea196 100644 --- a/experimental/cuda2/timepoint_pool.h +++ b/runtime/src/iree/hal/drivers/cuda2/timepoint_pool.h @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_TIMEPOINT_POOL_H_ -#define EXPERIMENTAL_CUDA2_TIMEPOINT_POOL_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_TIMEPOINT_POOL_H_ +#define IREE_HAL_DRIVERS_CUDA2_TIMEPOINT_POOL_H_ -#include "experimental/cuda2/event_pool.h" #include "iree/base/api.h" #include "iree/base/internal/event_pool.h" +#include "iree/hal/drivers/cuda2/event_pool.h" #include "iree/hal/utils/semaphore_base.h" #ifdef __cplusplus @@ -116,4 +116,4 @@ void iree_hal_cuda2_timepoint_pool_release( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_TIMEPOINT_POOL_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_TIMEPOINT_POOL_H_ diff --git a/experimental/cuda2/tracing.c b/runtime/src/iree/hal/drivers/cuda2/tracing.c similarity index 98% rename from experimental/cuda2/tracing.c rename to runtime/src/iree/hal/drivers/cuda2/tracing.c index e680c467a8287..76254e3216e90 100644 --- a/experimental/cuda2/tracing.c +++ b/runtime/src/iree/hal/drivers/cuda2/tracing.c @@ -4,12 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "experimental/cuda2/tracing.h" +#include "iree/hal/drivers/cuda2/tracing.h" #if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_status_util.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_status_util.h" // Total number of events per tracing context. This translates to the maximum // number of outstanding timestamp queries before collection is required. diff --git a/experimental/cuda2/tracing.h b/runtime/src/iree/hal/drivers/cuda2/tracing.h similarity index 95% rename from experimental/cuda2/tracing.h rename to runtime/src/iree/hal/drivers/cuda2/tracing.h index 57b6786dc4697..68a606b64266e 100644 --- a/experimental/cuda2/tracing.h +++ b/runtime/src/iree/hal/drivers/cuda2/tracing.h @@ -4,15 +4,15 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef EXPERIMENTAL_CUDA2_TRACING_H_ -#define EXPERIMENTAL_CUDA2_TRACING_H_ +#ifndef IREE_HAL_DRIVERS_CUDA2_TRACING_H_ +#define IREE_HAL_DRIVERS_CUDA2_TRACING_H_ -#include "experimental/cuda2/cuda_dynamic_symbols.h" -#include "experimental/cuda2/cuda_headers.h" #include "iree/base/api.h" #include "iree/base/internal/arena.h" #include "iree/base/tracing.h" #include "iree/hal/api.h" +#include "iree/hal/drivers/cuda2/cuda_dynamic_symbols.h" +#include "iree/hal/drivers/cuda2/cuda_headers.h" #ifdef __cplusplus extern "C" { @@ -119,4 +119,4 @@ void iree_hal_cuda2_tracing_zone_end_impl( } // extern "C" #endif // __cplusplus -#endif // EXPERIMENTAL_CUDA2_TRACING_H_ +#endif // IREE_HAL_DRIVERS_CUDA2_TRACING_H_ diff --git a/runtime/src/iree/hal/drivers/init.c b/runtime/src/iree/hal/drivers/init.c index e2d1f659f4b62..226764892a745 100644 --- a/runtime/src/iree/hal/drivers/init.c +++ b/runtime/src/iree/hal/drivers/init.c @@ -10,6 +10,10 @@ #include "iree/hal/drivers/cuda/registration/driver_module.h" #endif // IREE_HAVE_HAL_CUDA_DRIVER_MODULE +#if defined(IREE_HAVE_HAL_CUDA2_DRIVER_MODULE) +#include "iree/hal/drivers/cuda2/registration/driver_module.h" +#endif // IREE_HAVE_HAL_CUDA2_DRIVER_MODULE + #if defined(IREE_HAVE_HAL_LOCAL_SYNC_DRIVER_MODULE) #include "iree/hal/drivers/local_sync/registration/driver_module.h" #endif // IREE_HAVE_HAL_LOCAL_SYNC_DRIVER_MODULE @@ -46,6 +50,11 @@ iree_hal_register_all_available_drivers(iree_hal_driver_registry_t* registry) { z0, iree_hal_cuda_driver_module_register(registry)); #endif // IREE_HAVE_HAL_CUDA_DRIVER_MODULE +#if defined(IREE_HAVE_HAL_CUDA2_DRIVER_MODULE) + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_hal_cuda2_driver_module_register(registry)); +#endif // IREE_HAVE_HAL_CUDA2_DRIVER_MODULE + #if defined(IREE_HAVE_HAL_LOCAL_SYNC_DRIVER_MODULE) IREE_RETURN_AND_END_ZONE_IF_ERROR( z0, iree_hal_local_sync_driver_module_register(registry)); diff --git a/tests/e2e/stablehlo_ops/BUILD.bazel b/tests/e2e/stablehlo_ops/BUILD.bazel index 14852a7a56c3e..1a07babbdb523 100644 --- a/tests/e2e/stablehlo_ops/BUILD.bazel +++ b/tests/e2e/stablehlo_ops/BUILD.bazel @@ -506,3 +506,108 @@ iree_check_single_backend_test_suite( ], target_backend = "cuda", ) + +CUDA_SRCS = enforce_glob( + [ + "abs.mlir", + "add.mlir", + "batch_norm_inference.mlir", + "bitcast_convert.mlir", + "broadcast.mlir", + "broadcast_add.mlir", + "broadcast_in_dim.mlir", + "clamp.mlir", + "compare.mlir", + "complex.mlir", + "concatenate.mlir", + "constant.mlir", + "convert.mlir", + "convolution.mlir", + "cosine.mlir", + "divide.mlir", + "dot.mlir", + "dot_bf16.mlir", + "dot_general.mlir", + "dynamic_slice.mlir", + "dynamic_update_slice.mlir", + "exponential.mlir", + "exponential_fp16.mlir", + "exponential_minus_one.mlir", + "fft.mlir", + "finite.mlir", + "floor.mlir", + "gather.mlir", + "iota.mlir", + "log.mlir", + "log_plus_one.mlir", + "maximum.mlir", + "minimum.mlir", + "multiply.mlir", + "negate.mlir", + "pad.mlir", + "philox.mlir", + "pow.mlir", + "reduce.mlir", + "reduce_window.mlir", + "remainder.mlir", + "reshape.mlir", + "reverse.mlir", + "rng_normal.mlir", + "rng_uniform.mlir", + "round.mlir", + "rsqrt.mlir", + "scatter.mlir", + "scatter_dynamic.mlir", + "select.mlir", + "sine.mlir", + "slice.mlir", + "sort.mlir", + "sqrt.mlir", + "subtract.mlir", + "tanh.mlir", + "three_fry.mlir", + "torch_index_select.mlir", + "transpose.mlir", + "while.mlir", + ], + include = ["*.mlir"], + exclude = [], +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_graph", + srcs = CUDA_SRCS, + compiler_flags = [ + # TODO(#13984): memset emulation required for graphs. + "--iree-stream-emulate-memset", + ], + driver = "cuda2", + input_type = "stablehlo", + runner_args = ["--cuda2_use_streams=false"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_stream", + srcs = CUDA_SRCS, + driver = "cuda2", + input_type = "stablehlo", + runner_args = ["--cuda2_use_streams=true"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) diff --git a/tests/e2e/stablehlo_ops/CMakeLists.txt b/tests/e2e/stablehlo_ops/CMakeLists.txt index f2d10dcd73bc1..fa62dc160fd65 100644 --- a/tests/e2e/stablehlo_ops/CMakeLists.txt +++ b/tests/e2e/stablehlo_ops/CMakeLists.txt @@ -460,6 +460,168 @@ iree_check_single_backend_test_suite( "requires-gpu-nvidia" ) +iree_check_single_backend_test_suite( + NAME + check_cuda2_graph + SRCS + "abs.mlir" + "add.mlir" + "batch_norm_inference.mlir" + "bitcast_convert.mlir" + "broadcast.mlir" + "broadcast_add.mlir" + "broadcast_in_dim.mlir" + "clamp.mlir" + "compare.mlir" + "complex.mlir" + "concatenate.mlir" + "constant.mlir" + "convert.mlir" + "convolution.mlir" + "cosine.mlir" + "divide.mlir" + "dot.mlir" + "dot_bf16.mlir" + "dot_general.mlir" + "dynamic_slice.mlir" + "dynamic_update_slice.mlir" + "exponential.mlir" + "exponential_fp16.mlir" + "exponential_minus_one.mlir" + "fft.mlir" + "finite.mlir" + "floor.mlir" + "gather.mlir" + "iota.mlir" + "log.mlir" + "log_plus_one.mlir" + "maximum.mlir" + "minimum.mlir" + "multiply.mlir" + "negate.mlir" + "pad.mlir" + "philox.mlir" + "pow.mlir" + "reduce.mlir" + "reduce_window.mlir" + "remainder.mlir" + "reshape.mlir" + "reverse.mlir" + "rng_normal.mlir" + "rng_uniform.mlir" + "round.mlir" + "rsqrt.mlir" + "scatter.mlir" + "scatter_dynamic.mlir" + "select.mlir" + "sine.mlir" + "slice.mlir" + "sort.mlir" + "sqrt.mlir" + "subtract.mlir" + "tanh.mlir" + "three_fry.mlir" + "torch_index_select.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + COMPILER_FLAGS + "--iree-stream-emulate-memset" + INPUT_TYPE + "stablehlo" + RUNNER_ARGS + "--cuda2_use_streams=false" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + +iree_check_single_backend_test_suite( + NAME + check_cuda2_stream + SRCS + "abs.mlir" + "add.mlir" + "batch_norm_inference.mlir" + "bitcast_convert.mlir" + "broadcast.mlir" + "broadcast_add.mlir" + "broadcast_in_dim.mlir" + "clamp.mlir" + "compare.mlir" + "complex.mlir" + "concatenate.mlir" + "constant.mlir" + "convert.mlir" + "convolution.mlir" + "cosine.mlir" + "divide.mlir" + "dot.mlir" + "dot_bf16.mlir" + "dot_general.mlir" + "dynamic_slice.mlir" + "dynamic_update_slice.mlir" + "exponential.mlir" + "exponential_fp16.mlir" + "exponential_minus_one.mlir" + "fft.mlir" + "finite.mlir" + "floor.mlir" + "gather.mlir" + "iota.mlir" + "log.mlir" + "log_plus_one.mlir" + "maximum.mlir" + "minimum.mlir" + "multiply.mlir" + "negate.mlir" + "pad.mlir" + "philox.mlir" + "pow.mlir" + "reduce.mlir" + "reduce_window.mlir" + "remainder.mlir" + "reshape.mlir" + "reverse.mlir" + "rng_normal.mlir" + "rng_uniform.mlir" + "round.mlir" + "rsqrt.mlir" + "scatter.mlir" + "scatter_dynamic.mlir" + "select.mlir" + "sine.mlir" + "slice.mlir" + "sort.mlir" + "sqrt.mlir" + "subtract.mlir" + "tanh.mlir" + "three_fry.mlir" + "torch_index_select.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + INPUT_TYPE + "stablehlo" + RUNNER_ARGS + "--cuda2_use_streams=true" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### iree_check_single_backend_test_suite( diff --git a/tests/e2e/tosa_ops/BUILD.bazel b/tests/e2e/tosa_ops/BUILD.bazel index a554c080acb84..c55ffaa9076a5 100644 --- a/tests/e2e/tosa_ops/BUILD.bazel +++ b/tests/e2e/tosa_ops/BUILD.bazel @@ -246,9 +246,98 @@ iree_check_single_backend_test_suite( target_backend = "vulkan-spirv", ) +CUDA_SRCS = enforce_glob( + [ + "abs.mlir", + "add.mlir", + "arithmetic_right_shift.mlir", + "bitwise_and.mlir", + "bitwise_or.mlir", + "bitwise_xor.mlir", + "ceil.mlir", + "clamp.mlir", + "clz.mlir", + "const.mlir", + "equal.mlir", + "exp.mlir", + "floor.mlir", + "fully_connected.mlir", + "gather.mlir", + "greater.mlir", + "greater_equal.mlir", + "if.mlir", + "log.mlir", + "logical_left_shift.mlir", + "logical_right_shift.mlir", + "logical_right_shift_16.mlir", + "matmul.mlir", + "max_pool.mlir", + "maximum.mlir", + "minimum.mlir", + "mul.mlir", + "negate.mlir", + "pad.mlir", + "reciprocal.mlir", + "reduce.mlir", + "reshape.mlir", + "rsqrt.mlir", + "select.mlir", + "sigmoid.mlir", + "sub.mlir", + "table.mlir", + "tanh.mlir", + "transpose.mlir", + "while.mlir", + ], + include = ["*.mlir"], + exclude = [ + "mul_shift.mlir", + ], +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_graph", + srcs = CUDA_SRCS, + compiler_flags = [ + # TODO(#13984): memset emulation required for graphs. + "--iree-stream-emulate-memset", + ], + driver = "cuda2", + input_type = "tosa", + runner_args = ["--cuda2_use_streams=false"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) + +iree_check_single_backend_test_suite( + name = "check_cuda2_stream", + srcs = CUDA_SRCS, + driver = "cuda2", + input_type = "tosa", + runner_args = ["--cuda2_use_streams=true"], + tags = [ + # CUDA cuInit fails with sanitizer on. + "noasan", + "nomsan", + "notsan", + "noubsan", + "requires-gpu-nvidia", + ], + target_backend = "cuda", +) + test_suite( name = "check", tests = [ + ":check_cuda2_graph", + ":check_cuda2_stream", ":check_llvm-cpu_local-task", ":check_vmvx_local-task", ":check_vulkan-spirv_vulkan", diff --git a/tests/e2e/tosa_ops/CMakeLists.txt b/tests/e2e/tosa_ops/CMakeLists.txt index 97aebe2e58e8b..7655420dcb1cc 100644 --- a/tests/e2e/tosa_ops/CMakeLists.txt +++ b/tests/e2e/tosa_ops/CMakeLists.txt @@ -222,6 +222,128 @@ iree_check_single_backend_test_suite( "tosa" ) +iree_check_single_backend_test_suite( + NAME + check_cuda2_graph + SRCS + "abs.mlir" + "add.mlir" + "arithmetic_right_shift.mlir" + "bitwise_and.mlir" + "bitwise_or.mlir" + "bitwise_xor.mlir" + "ceil.mlir" + "clamp.mlir" + "clz.mlir" + "const.mlir" + "equal.mlir" + "exp.mlir" + "floor.mlir" + "fully_connected.mlir" + "gather.mlir" + "greater.mlir" + "greater_equal.mlir" + "if.mlir" + "log.mlir" + "logical_left_shift.mlir" + "logical_right_shift.mlir" + "logical_right_shift_16.mlir" + "matmul.mlir" + "max_pool.mlir" + "maximum.mlir" + "minimum.mlir" + "mul.mlir" + "negate.mlir" + "pad.mlir" + "reciprocal.mlir" + "reduce.mlir" + "reshape.mlir" + "rsqrt.mlir" + "select.mlir" + "sigmoid.mlir" + "sub.mlir" + "table.mlir" + "tanh.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + COMPILER_FLAGS + "--iree-stream-emulate-memset" + INPUT_TYPE + "tosa" + RUNNER_ARGS + "--cuda2_use_streams=false" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + +iree_check_single_backend_test_suite( + NAME + check_cuda2_stream + SRCS + "abs.mlir" + "add.mlir" + "arithmetic_right_shift.mlir" + "bitwise_and.mlir" + "bitwise_or.mlir" + "bitwise_xor.mlir" + "ceil.mlir" + "clamp.mlir" + "clz.mlir" + "const.mlir" + "equal.mlir" + "exp.mlir" + "floor.mlir" + "fully_connected.mlir" + "gather.mlir" + "greater.mlir" + "greater_equal.mlir" + "if.mlir" + "log.mlir" + "logical_left_shift.mlir" + "logical_right_shift.mlir" + "logical_right_shift_16.mlir" + "matmul.mlir" + "max_pool.mlir" + "maximum.mlir" + "minimum.mlir" + "mul.mlir" + "negate.mlir" + "pad.mlir" + "reciprocal.mlir" + "reduce.mlir" + "reshape.mlir" + "rsqrt.mlir" + "select.mlir" + "sigmoid.mlir" + "sub.mlir" + "table.mlir" + "tanh.mlir" + "transpose.mlir" + "while.mlir" + TARGET_BACKEND + "cuda" + DRIVER + "cuda2" + INPUT_TYPE + "tosa" + RUNNER_ARGS + "--cuda2_use_streams=true" + LABELS + "noasan" + "nomsan" + "notsan" + "noubsan" + "requires-gpu-nvidia" +) + ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### iree_check_single_backend_test_suite(