diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json index 4e27eaaeebb8..0ac678de2d74 100644 --- a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json +++ b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx90a.json @@ -20,9 +20,6 @@ "pytorch/models/resnet50", "pytorch/models/sdxl-vae-decode-tank", - // TODO(#17874): error: a handle passed as operand #0 and consumed by this operation points to a payload entity more than once - "sharktank/llama/open-llama-3b-v2-f16", - // TODO: Add I8 MFMA layout support for CDNA2. Currently only CDNA3 specific I8 layout is implemented. "sharktank/punet/int8" ], diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json index 5c864807c301..de01f7ef038b 100644 --- a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json +++ b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json @@ -20,8 +20,6 @@ "pytorch/models/resnet50", "pytorch/models/sdxl-vae-decode-tank", - // TODO(#17874): error: a handle passed as operand #0 and consumed by this operation points to a payload entity more than once - "sharktank/llama/open-llama-3b-v2-f16", ], "expected_run_failures": [] } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp index 72091ea9ceeb..a919749405ac 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp @@ -65,7 +65,7 @@ llvm::cl::opt clGPUEnableVectorDistribution( llvm::cl::opt clGPUEnableTransformDialectJit( "iree-codegen-llvmgpu-enable-transform-dialect-jit", llvm::cl::desc("enable the usage of the transform dialect JIT"), - llvm::cl::init(true)); + llvm::cl::init(false)); /// Flag to force using WMMA tensorcore operations. llvm::cl::opt diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir index 39aec0fd1f06..975f73ddc947 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/nvvm_pipeline_test.mlir @@ -811,7 +811,7 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // CHECK: llvm.store %{{.*}}, %{{.*}} : f32, !llvm.ptr<3> // CHECK: nvvm.barrier0 // CHECK: llvm.load {{.*}} : !llvm.ptr<3> -> f32 -// CHECK-COUNT-3: nvvm.shfl.sync bfly +// CHECK-COUNT-2: nvvm.shfl.sync bfly // ----- @@ -873,7 +873,7 @@ hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) { // CHECK: llvm.store %{{.*}}, %{{.*}} : f32, !llvm.ptr<3> // CHECK: nvvm.barrier0 // CHECK: llvm.load {{.*}} : !llvm.ptr<3> -> f32 -// CHECK-COUNT-3: nvvm.shfl.sync bfly +// CHECK-COUNT-2: nvvm.shfl.sync bfly // CHECK: llvm.fdiv %{{.*}}, %{{.*}} // CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 4 : i64} : vector<4xf32>, !llvm.ptr<1> diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir index 860513c98d65..bfc69ed52d4e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/reduction_pipeline_cuda.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, iree-codegen-lower-executable-using-transform-dialect, func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s +// RUN: iree-opt --split-input-file --iree-gpu-test-target=sm_60 --iree-codegen-llvmgpu-enable-transform-dialect-jit=true --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-decompose-softmax), iree-llvmgpu-select-lowering-strategy, iree-codegen-lower-executable-using-transform-dialect, func.func(iree-llvmgpu-lower-executable-target)))))" %s | FileCheck %s #pipeline_layout = #hal.pipeline.layout