diff --git a/compiler/plugins/target/CUDA/CUDATarget.cpp b/compiler/plugins/target/CUDA/CUDATarget.cpp index 2e976bbdfa72..59e581c2baff 100644 --- a/compiler/plugins/target/CUDA/CUDATarget.cpp +++ b/compiler/plugins/target/CUDA/CUDATarget.cpp @@ -62,6 +62,7 @@ struct CUDAOptions { bool clUsePtxas = false; std::string clUsePtxasFrom; std::string clUsePtxasParams; + bool enableLegacySync = true; void bindOptions(OptionsBinder &binder) { static llvm::cl::OptionCategory category("CUDA HAL Target"); @@ -104,6 +105,12 @@ struct CUDAOptions { "iree-hal-cuda-use-ptxas-params", clUsePtxasParams, llvm::cl::cat(category), llvm::cl::desc("Passes the given additional parameters to ptxas.")); + + binder.opt( + "iree-hal-cuda-enable-legacy-sync", enableLegacySync, + llvm::cl::cat(category), + llvm::cl::desc( + "Enable legacy sync mode that handles semaphores synchronously.")); } }; } // namespace @@ -390,7 +397,9 @@ class CUDATargetBackend final : public TargetBackend { // Indicates that the runtime HAL driver operates only in the legacy // synchronous mode. - configItems.emplace_back(b.getStringAttr("legacy_sync"), b.getUnitAttr()); + if (options.enableLegacySync) { + configItems.emplace_back(b.getStringAttr("legacy_sync"), b.getUnitAttr()); + } configItems.emplace_back(b.getStringAttr("executable_targets"), getExecutableTargets(context)); diff --git a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt index 482f15453a43..2b1772ef538f 100644 --- a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt +++ b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt @@ -76,6 +76,7 @@ iree_check_single_backend_test_suite( "--iree-input-type=stablehlo" # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now. "--iree-stream-emulate-memset" + "--iree-hal-cuda-enable-legacy-sync=false" RUNNER_ARGS "--cuda2_use_streams=false" LABELS @@ -156,6 +157,7 @@ iree_check_single_backend_test_suite( "cuda2" COMPILER_FLAGS "--iree-input-type=stablehlo" + "--iree-hal-cuda-enable-legacy-sync=false" RUNNER_ARGS "--cuda2_use_streams=true" LABELS diff --git a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt index 88752fe04624..e82be08f1365 100644 --- a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt +++ b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt @@ -57,6 +57,7 @@ iree_check_single_backend_test_suite( "--iree-input-type=tosa" # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now. "--iree-stream-emulate-memset" + "--iree-hal-cuda-enable-legacy-sync=false" RUNNER_ARGS "--cuda2_use_streams=false" LABELS @@ -118,6 +119,7 @@ iree_check_single_backend_test_suite( "cuda2" COMPILER_FLAGS "--iree-input-type=tosa" + "--iree-hal-cuda-enable-legacy-sync=false" RUNNER_ARGS "--cuda2_use_streams=true" LABELS