From 1ef5acf3398ce6c673fcd74137f4ca667e215e1b Mon Sep 17 00:00:00 2001 From: Okwan Kwon Date: Thu, 10 Aug 2023 13:09:47 -0700 Subject: [PATCH] Integrate llvm-project@679c076ae446 (#14573) * bump llvm to 679c076ae446 * use getter for offsets, sizes, strides * implement getArgOperandsMutable() for Flow, Stream, and VM call ops --------- Co-authored-by: Mahesh Ravishankar Co-authored-by: Groverkss --- .../Codegen/Common/FlattenMemRefSubspanPass.cpp | 2 +- .../compiler/Dialect/Flow/IR/FlowOpFolders.cpp | 6 +++--- .../src/iree/compiler/Dialect/Flow/IR/FlowOps.td | 6 ++++++ .../iree/compiler/Dialect/Stream/IR/StreamOps.td | 12 ++++++++++++ compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td | 6 ++++++ .../lib/Dialect/LinalgExt/Transforms/Utils.cpp | 6 +++--- tests/transform_dialect/cuda/softmax.mlir | 14 +++++--------- .../cuda/softmax_codegen_spec.mlir | 4 ++++ tests/transform_dialect/cuda/softmax_partial.mlir | 4 +++- .../cuda/softmax_partial_codegen_spec.mlir | 11 +++++------ third_party/llvm-project | 2 +- 11 files changed, 49 insertions(+), 24 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp index f5c6856b6956..744b29304d89 100644 --- a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp @@ -431,7 +431,7 @@ struct FlattenSubView final : public OpConversionPattern { getTypeConverter()->convertType(op.getResult().getType()); if (!neededResultType || !isRankZeroOrOneMemRef(neededResultType)) return failure(); - Value size = createTotalElementCountValue(op.getType(), op.sizes(), + Value size = createTotalElementCountValue(op.getType(), op.getSizes(), op.getLoc(), rewriter); SmallVector offsets = mlir::getValueOrCreateConstantIndexOp( rewriter, op.getLoc(), op.getMixedOffsets()); diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp index 0abd2fa81219..0f13fc23c493 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp +++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp @@ -661,9 +661,9 @@ struct FoldCastOpIntoDispatchStoreOp rewriter.replaceOpWithNewOp( storeOp, parentOp.getSource(), storeOp.getTarget(), - storeOp.getTargetDims(), storeOp.offsets(), storeOp.sizes(), - storeOp.strides(), storeOp.static_offsets(), storeOp.static_sizes(), - storeOp.static_strides()); + storeOp.getTargetDims(), storeOp.getOffsets(), storeOp.getSizes(), + storeOp.getStrides(), storeOp.getStaticOffsets(), + storeOp.getStaticSizes(), storeOp.getStaticStrides()); return success(); } }; diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td index e2a233552386..b639b924cb3b 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td +++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td @@ -1011,6 +1011,12 @@ def FLOW_CallOp : FLOW_Op<"call", [ ValueRange getResultDynamicDims(unsigned idx) { return IREE::Util::findVariadicDynamicDims(idx, getResults(), getResultDims()); } + + /// Get the argument operands to the called function as a mutable range, this is + /// required by the call interface. + MutableOperandRange getArgOperandsMutable() { + return getArgumentsMutable(); + } }]; let hasVerifier = 1; diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td index c0418849519b..53f41c19a2d3 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td +++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td @@ -2120,6 +2120,12 @@ def Stream_AsyncCallOp : Stream_Op<"async.call", [ Value getResultSize(unsigned idx) { return findValueSizeInList(idx, getResults(), getResultSizes()); } + + /// Get the argument operands to the called function as a mutable range, this is + /// required by the call interface. + MutableOperandRange getArgOperandsMutable() { + return getResourceOperandsMutable(); + } }]; let hasVerifier = 1; @@ -2792,6 +2798,12 @@ def Stream_CmdCallOp : Stream_Op<"cmd.call", [ return findValueSizeInList(idx, getOperands(), getResourceOperandSizes()); } Value getResultSize(unsigned idx) { return {}; } + + /// Get the argument operands to the called function as a mutable range, this is + /// required by the call interface. + MutableOperandRange getArgOperandsMutable() { + return getResourceOperandsMutable(); + } }]; let hasVerifier = 1; diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td index db4763da8ad6..dd40683105ee 100644 --- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td +++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td @@ -4022,6 +4022,12 @@ class VM_CallBaseOp traits = []> : void setCalleeFromCallable(CallInterfaceCallable callee) { (*this)->setAttr("callee", callee.get()); } + + /// Get the argument operands to the called function as a mutable range, this is + /// required by the call interface. + MutableOperandRange getArgOperandsMutable() { + return getOperandsMutable(); + } }]; } diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp index 4248ed2b2afd..0a866589d7eb 100644 --- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp +++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp @@ -82,9 +82,9 @@ Value mlir::iree_compiler::IREE::LinalgExt::createMatchingSubsetInsertOp( Value source, Value dest) { return b.create( loc, subsetExtractOp.getSource().getType(), source, dest, - subsetExtractOp.offsets(), subsetExtractOp.sizes(), - subsetExtractOp.strides(), subsetExtractOp.static_offsets(), - subsetExtractOp.static_sizes(), subsetExtractOp.static_strides()); + subsetExtractOp.getOffsets(), subsetExtractOp.getSizes(), + subsetExtractOp.getStrides(), subsetExtractOp.getStaticOffsets(), + subsetExtractOp.getStaticSizes(), subsetExtractOp.getStaticStrides()); } void mlir::iree_compiler::IREE::LinalgExt::createMatchingParallelSubsetInsertOp( diff --git a/tests/transform_dialect/cuda/softmax.mlir b/tests/transform_dialect/cuda/softmax.mlir index d98b10da88df..d35de99f1753 100644 --- a/tests/transform_dialect/cuda/softmax.mlir +++ b/tests/transform_dialect/cuda/softmax.mlir @@ -2,23 +2,19 @@ // RUN: iree-opt %s --iree-hal-target-backends=cuda \ // RUN: --iree-abi-transformation-pipeline \ // RUN: --iree-flow-transformation-pipeline \ -/// This must be used with the custom dispatch region formation -/// because IREE's does not fuse the 6 ops softmax version even with -/// --iree-flow-fuse-multi-use. // RUN: --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \ // RUN: --iree-stream-transformation-pipeline \ // RUN: --iree-hal-configuration-pipeline | \ // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \ -// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \ +// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir \ +// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \ // RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE -// RUN: iree-compile %s --iree-hal-target-backends=cuda \ -// RUN: --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \ /// Constant JIT'ing must be disabled because the transform-dialect debug /// flags leak to the JIT session, which doesn't know what to do with them. -/// This must be used with the custom dispatch region formation -/// because IREE's does not fuse the 6 ops softmax version even with -/// --iree-flow-fuse-multi-use. +// RUN: iree-compile %s --iree-hal-target-backends=cuda \ +// RUN: --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \ +// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \ // RUN: --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \ // RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \ // RUN: iree-run-module --module=- --function=softmax --device=cuda | \ diff --git a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir index 5d2f7003aab6..830d24a0b212 100644 --- a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir +++ b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir @@ -35,6 +35,10 @@ transform.sequence failures(propagate) { %forall_with_type = transform.cast %forall : !transform.any_op to !transform.op<"scf.forall"> transform.iree.share_forall_operands %forall_with_type : (!transform.op<"scf.forall">) -> !transform.op<"scf.forall"> + transform.apply_patterns to %variant_op { + transform.apply_patterns.canonicalization + } : !transform.any_op + transform.iree.apply_cse %variant_op : !transform.any_op // Step 2. Second level of tiling + fusion parallelizes to threads. // ================================================================ diff --git a/tests/transform_dialect/cuda/softmax_partial.mlir b/tests/transform_dialect/cuda/softmax_partial.mlir index 12c4bef52c6c..db6f2f1cde1a 100644 --- a/tests/transform_dialect/cuda/softmax_partial.mlir +++ b/tests/transform_dialect/cuda/softmax_partial.mlir @@ -5,13 +5,15 @@ // RUN: --iree-stream-transformation-pipeline \ // RUN: --iree-hal-configuration-pipeline | \ // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \ -// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \ +// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir \ +// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \ // RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE // RUN: iree-compile %s --iree-hal-target-backends=cuda \ // RUN: --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \ /// Constant JIT'ing must be disabled because the transform-dialect debug /// flags leak to the JIT session, which doesn't know what to do with them. +// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \ // RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \ // RUN: iree-run-module --module=- --function=softmax_partial --device=cuda | \ // RUN: FileCheck %s diff --git a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir index 61b546332a2a..760353876e63 100644 --- a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir +++ b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir @@ -67,16 +67,15 @@ transform.sequence failures(propagate) { // Step 4. Bufferize and drop HAL decriptor from memref ops. // ========================================================= - %variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> !transform.any_op - %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2 : (!transform.any_op) -> !transform.any_op + transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> () + %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!transform.any_op) -> !transform.any_op %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op // Step 5. Post-bufferization mapping to blocks and threads. // ========================================================= %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op - %func_3 = transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> !transform.any_op - transform.iree.map_nested_forall_to_gpu_threads %func_3 - { workgroup_dims = [32, 4, 1] } + transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> () + transform.iree.map_nested_forall_to_gpu_threads %func_2 workgroup_dims = [32, 4, 1] : (!transform.any_op) -> () // Step 6. Post-bufferization vector distribution with rank-reduction. // =================================================================== @@ -90,5 +89,5 @@ transform.sequence failures(propagate) { %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op %warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 } : (!transform.any_op) -> !transform.any_op - transform.iree.vector.warp_distribute %end_func + transform.iree.vector.warp_distribute %end_func : (!transform.any_op) -> () } diff --git a/third_party/llvm-project b/third_party/llvm-project index 82ca6e95476b..fc40ccd77cf0 160000 --- a/third_party/llvm-project +++ b/third_party/llvm-project @@ -1 +1 @@ -Subproject commit 82ca6e95476b13f6c2572fb7d9397e5db8eeb510 +Subproject commit fc40ccd77cf04fca3922d465354a167428c92d11