Integrate llvm-project@679c076ae446 (#14573)

* bump llvm to 679c076ae446 * use getter for offsets, sizes, strides * implement getArgOperandsMutable() for Flow, Stream, and VM call ops --------- Co-authored-by: Mahesh Ravishankar <ravishankarm@google.com> Co-authored-by: Groverkss <groverkss@gmail.com>
iree-org · Aug 10, 2023 · 1ef5acf · 1ef5acf
1 parent af3d2a1
commit 1ef5acf
Show file tree

Hide file tree

Showing 11 changed files with 49 additions and 24 deletions.
diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
@@ -431,7 +431,7 @@ struct FlattenSubView final : public OpConversionPattern<memref::SubViewOp> {
         getTypeConverter()->convertType(op.getResult().getType());
     if (!neededResultType || !isRankZeroOrOneMemRef(neededResultType))
       return failure();
-    Value size = createTotalElementCountValue(op.getType(), op.sizes(),
+    Value size = createTotalElementCountValue(op.getType(), op.getSizes(),
                                               op.getLoc(), rewriter);
     SmallVector<Value> offsets = mlir::getValueOrCreateConstantIndexOp(
         rewriter, op.getLoc(), op.getMixedOffsets());

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
@@ -661,9 +661,9 @@ struct FoldCastOpIntoDispatchStoreOp
 
     rewriter.replaceOpWithNewOp<DispatchTensorStoreOp>(
         storeOp, parentOp.getSource(), storeOp.getTarget(),
-        storeOp.getTargetDims(), storeOp.offsets(), storeOp.sizes(),
-        storeOp.strides(), storeOp.static_offsets(), storeOp.static_sizes(),
-        storeOp.static_strides());
+        storeOp.getTargetDims(), storeOp.getOffsets(), storeOp.getSizes(),
+        storeOp.getStrides(), storeOp.getStaticOffsets(),
+        storeOp.getStaticSizes(), storeOp.getStaticStrides());
     return success();
   }
 };

diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -1011,6 +1011,12 @@ def FLOW_CallOp : FLOW_Op<"call", [
     ValueRange getResultDynamicDims(unsigned idx) {
       return IREE::Util::findVariadicDynamicDims(idx, getResults(), getResultDims());
     }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getArgumentsMutable();
+    }
   }];
 
   let hasVerifier = 1;

diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
@@ -2120,6 +2120,12 @@ def Stream_AsyncCallOp : Stream_Op<"async.call", [
     Value getResultSize(unsigned idx) {
       return findValueSizeInList(idx, getResults(), getResultSizes());
     }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getResourceOperandsMutable();
+    }
   }];
 
   let hasVerifier = 1;
@@ -2792,6 +2798,12 @@ def Stream_CmdCallOp : Stream_Op<"cmd.call", [
       return findValueSizeInList(idx, getOperands(), getResourceOperandSizes());
     }
     Value getResultSize(unsigned idx) { return {}; }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getResourceOperandsMutable();
+    }
   }];
 
   let hasVerifier = 1;

diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
@@ -4022,6 +4022,12 @@ class VM_CallBaseOp<string mnemonic, list<Trait> traits = []> :
     void setCalleeFromCallable(CallInterfaceCallable callee) {
       (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
     }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getOperandsMutable();
+    }
   }];
 }
 

diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp
@@ -82,9 +82,9 @@ Value mlir::iree_compiler::IREE::LinalgExt::createMatchingSubsetInsertOp(
     Value source, Value dest) {
   return b.create<tensor::InsertSliceOp>(
       loc, subsetExtractOp.getSource().getType(), source, dest,
-      subsetExtractOp.offsets(), subsetExtractOp.sizes(),
-      subsetExtractOp.strides(), subsetExtractOp.static_offsets(),
-      subsetExtractOp.static_sizes(), subsetExtractOp.static_strides());
+      subsetExtractOp.getOffsets(), subsetExtractOp.getSizes(),
+      subsetExtractOp.getStrides(), subsetExtractOp.getStaticOffsets(),
+      subsetExtractOp.getStaticSizes(), subsetExtractOp.getStaticStrides());
 }
 
 void mlir::iree_compiler::IREE::LinalgExt::createMatchingParallelSubsetInsertOp(

diff --git a/tests/transform_dialect/cuda/softmax.mlir b/tests/transform_dialect/cuda/softmax.mlir
@@ -2,23 +2,19 @@
 // RUN: iree-opt %s --iree-hal-target-backends=cuda \
 // RUN:     --iree-abi-transformation-pipeline \
 // RUN:     --iree-flow-transformation-pipeline  \
-/// This must be used with the custom dispatch region formation
-/// because IREE's does not fuse the 6 ops softmax version even with
-/// --iree-flow-fuse-multi-use.
 // RUN:     --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \
 // RUN:     --iree-stream-transformation-pipeline \
 // RUN:     --iree-hal-configuration-pipeline | \
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \
-// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \
+// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir \
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \
 // RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE
 
-// RUN: iree-compile %s --iree-hal-target-backends=cuda \
-// RUN:     --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
 /// Constant JIT'ing must be disabled because the transform-dialect debug
 /// flags leak to the JIT session, which doesn't know what to do with them.
-/// This must be used with the custom dispatch region formation
-/// because IREE's does not fuse the 6 ops softmax version even with
-/// --iree-flow-fuse-multi-use.
+// RUN: iree-compile %s --iree-hal-target-backends=cuda \
+// RUN:     --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \
 // RUN:     --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \
 // RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \
 // RUN: iree-run-module --module=- --function=softmax --device=cuda | \

diff --git a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
@@ -35,6 +35,10 @@ transform.sequence failures(propagate) {
   %forall_with_type = transform.cast %forall : !transform.any_op to !transform.op<"scf.forall">
   transform.iree.share_forall_operands %forall_with_type
     : (!transform.op<"scf.forall">) -> !transform.op<"scf.forall">
+  transform.apply_patterns to %variant_op {
+    transform.apply_patterns.canonicalization
+  } : !transform.any_op
+  transform.iree.apply_cse %variant_op : !transform.any_op
 
   // Step 2. Second level of tiling + fusion parallelizes to threads.
   // ================================================================

diff --git a/tests/transform_dialect/cuda/softmax_partial.mlir b/tests/transform_dialect/cuda/softmax_partial.mlir
@@ -5,13 +5,15 @@
 // RUN:     --iree-stream-transformation-pipeline \
 // RUN:     --iree-hal-configuration-pipeline | \
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \
-// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \
+// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir \
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \
 // RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE
 
 // RUN: iree-compile %s --iree-hal-target-backends=cuda \
 // RUN:     --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
 /// Constant JIT'ing must be disabled because the transform-dialect debug
 /// flags leak to the JIT session, which doesn't know what to do with them.
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \
 // RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \
 // RUN: iree-run-module --module=- --function=softmax_partial --device=cuda | \
 // RUN: FileCheck %s

diff --git a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
@@ -67,16 +67,15 @@ transform.sequence failures(propagate) {
 
   // Step 4. Bufferize and drop HAL decriptor from memref ops.
   // =========================================================
-  %variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> !transform.any_op
-  %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2 : (!transform.any_op) -> !transform.any_op
+  transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
+  %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!transform.any_op) -> !transform.any_op
   %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
 
   // Step 5. Post-bufferization mapping to blocks and threads.
   // =========================================================
   %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
-  %func_3 = transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> !transform.any_op
-  transform.iree.map_nested_forall_to_gpu_threads %func_3
-    { workgroup_dims = [32, 4, 1] }
+  transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> ()
+  transform.iree.map_nested_forall_to_gpu_threads %func_2 workgroup_dims = [32, 4, 1] : (!transform.any_op) -> ()
 
   // Step 6. Post-bufferization vector distribution with rank-reduction.
   // ===================================================================
@@ -90,5 +89,5 @@ transform.sequence failures(propagate) {
   %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
   %warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
     : (!transform.any_op) -> !transform.any_op
-  transform.iree.vector.warp_distribute %end_func
+  transform.iree.vector.warp_distribute %end_func : (!transform.any_op) -> ()
 }
diff --git a/third_party/llvm-project b/third_party/llvm-project