From 1ef5acf3398ce6c673fcd74137f4ca667e215e1b Mon Sep 17 00:00:00 2001
From: Okwan Kwon <okwan@google.com>
Date: Thu, 10 Aug 2023 13:09:47 -0700
Subject: [PATCH] Integrate llvm-project@679c076ae446 (#14573)

* bump llvm to 679c076ae446
* use getter for offsets, sizes, strides
* implement getArgOperandsMutable() for Flow, Stream, and VM call ops

---------

Co-authored-by: Mahesh Ravishankar <ravishankarm@google.com>
Co-authored-by: Groverkss <groverkss@gmail.com>
---
 .../Codegen/Common/FlattenMemRefSubspanPass.cpp    |  2 +-
 .../compiler/Dialect/Flow/IR/FlowOpFolders.cpp     |  6 +++---
 .../src/iree/compiler/Dialect/Flow/IR/FlowOps.td   |  6 ++++++
 .../iree/compiler/Dialect/Stream/IR/StreamOps.td   | 12 ++++++++++++
 compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td  |  6 ++++++
 .../lib/Dialect/LinalgExt/Transforms/Utils.cpp     |  6 +++---
 tests/transform_dialect/cuda/softmax.mlir          | 14 +++++---------
 .../cuda/softmax_codegen_spec.mlir                 |  4 ++++
 tests/transform_dialect/cuda/softmax_partial.mlir  |  4 +++-
 .../cuda/softmax_partial_codegen_spec.mlir         | 11 +++++------
 third_party/llvm-project                           |  2 +-
 11 files changed, 49 insertions(+), 24 deletions(-)
diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
index f5c6856b6956..744b29304d89 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
@@ -431,7 +431,7 @@ struct FlattenSubView final : public OpConversionPattern<memref::SubViewOp> {
         getTypeConverter()->convertType(op.getResult().getType());
     if (!neededResultType || !isRankZeroOrOneMemRef(neededResultType))
       return failure();
-    Value size = createTotalElementCountValue(op.getType(), op.sizes(),
+    Value size = createTotalElementCountValue(op.getType(), op.getSizes(),
                                               op.getLoc(), rewriter);
     SmallVector<Value> offsets = mlir::getValueOrCreateConstantIndexOp(
         rewriter, op.getLoc(), op.getMixedOffsets());
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
index 0abd2fa81219..0f13fc23c493 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
@@ -661,9 +661,9 @@ struct FoldCastOpIntoDispatchStoreOp
 
     rewriter.replaceOpWithNewOp<DispatchTensorStoreOp>(
         storeOp, parentOp.getSource(), storeOp.getTarget(),
-        storeOp.getTargetDims(), storeOp.offsets(), storeOp.sizes(),
-        storeOp.strides(), storeOp.static_offsets(), storeOp.static_sizes(),
-        storeOp.static_strides());
+        storeOp.getTargetDims(), storeOp.getOffsets(), storeOp.getSizes(),
+        storeOp.getStrides(), storeOp.getStaticOffsets(),
+        storeOp.getStaticSizes(), storeOp.getStaticStrides());
     return success();
   }
 };
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
index e2a233552386..b639b924cb3b 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -1011,6 +1011,12 @@ def FLOW_CallOp : FLOW_Op<"call", [
     ValueRange getResultDynamicDims(unsigned idx) {
       return IREE::Util::findVariadicDynamicDims(idx, getResults(), getResultDims());
     }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getArgumentsMutable();
+    }
   }];
 
   let hasVerifier = 1;
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
index c0418849519b..53f41c19a2d3 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
@@ -2120,6 +2120,12 @@ def Stream_AsyncCallOp : Stream_Op<"async.call", [
     Value getResultSize(unsigned idx) {
       return findValueSizeInList(idx, getResults(), getResultSizes());
     }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getResourceOperandsMutable();
+    }
   }];
 
   let hasVerifier = 1;
@@ -2792,6 +2798,12 @@ def Stream_CmdCallOp : Stream_Op<"cmd.call", [
       return findValueSizeInList(idx, getOperands(), getResourceOperandSizes());
     }
     Value getResultSize(unsigned idx) { return {}; }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getResourceOperandsMutable();
+    }
   }];
 
   let hasVerifier = 1;
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
index db4763da8ad6..dd40683105ee 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
@@ -4022,6 +4022,12 @@ class VM_CallBaseOp<string mnemonic, list<Trait> traits = []> :
     void setCalleeFromCallable(CallInterfaceCallable callee) {
       (*this)->setAttr("callee", callee.get<SymbolRefAttr>());
     }
+
+    /// Get the argument operands to the called function as a mutable range, this is
+    /// required by the call interface.
+    MutableOperandRange getArgOperandsMutable() {
+      return getOperandsMutable();
+    }
   }];
 }
 
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp
index 4248ed2b2afd..0a866589d7eb 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgExt/Transforms/Utils.cpp
@@ -82,9 +82,9 @@ Value mlir::iree_compiler::IREE::LinalgExt::createMatchingSubsetInsertOp(
     Value source, Value dest) {
   return b.create<tensor::InsertSliceOp>(
       loc, subsetExtractOp.getSource().getType(), source, dest,
-      subsetExtractOp.offsets(), subsetExtractOp.sizes(),
-      subsetExtractOp.strides(), subsetExtractOp.static_offsets(),
-      subsetExtractOp.static_sizes(), subsetExtractOp.static_strides());
+      subsetExtractOp.getOffsets(), subsetExtractOp.getSizes(),
+      subsetExtractOp.getStrides(), subsetExtractOp.getStaticOffsets(),
+      subsetExtractOp.getStaticSizes(), subsetExtractOp.getStaticStrides());
 }
 
 void mlir::iree_compiler::IREE::LinalgExt::createMatchingParallelSubsetInsertOp(
diff --git a/tests/transform_dialect/cuda/softmax.mlir b/tests/transform_dialect/cuda/softmax.mlir
index d98b10da88df..d35de99f1753 100644
--- a/tests/transform_dialect/cuda/softmax.mlir
+++ b/tests/transform_dialect/cuda/softmax.mlir
@@ -2,23 +2,19 @@
 // RUN: iree-opt %s --iree-hal-target-backends=cuda \
 // RUN:     --iree-abi-transformation-pipeline \
 // RUN:     --iree-flow-transformation-pipeline  \
-/// This must be used with the custom dispatch region formation
-/// because IREE's does not fuse the 6 ops softmax version even with
-/// --iree-flow-fuse-multi-use.
 // RUN:     --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \
 // RUN:     --iree-stream-transformation-pipeline \
 // RUN:     --iree-hal-configuration-pipeline | \
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \
-// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \
+// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir \
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \
 // RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE
 
-// RUN: iree-compile %s --iree-hal-target-backends=cuda \
-// RUN:     --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
 /// Constant JIT'ing must be disabled because the transform-dialect debug
 /// flags leak to the JIT session, which doesn't know what to do with them.
-/// This must be used with the custom dispatch region formation
-/// because IREE's does not fuse the 6 ops softmax version even with
-/// --iree-flow-fuse-multi-use.
+// RUN: iree-compile %s --iree-hal-target-backends=cuda \
+// RUN:     --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \
 // RUN:     --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \
 // RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \
 // RUN: iree-run-module --module=- --function=softmax --device=cuda | \
diff --git a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
index 5d2f7003aab6..830d24a0b212 100644
--- a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
@@ -35,6 +35,10 @@ transform.sequence failures(propagate) {
   %forall_with_type = transform.cast %forall : !transform.any_op to !transform.op<"scf.forall">
   transform.iree.share_forall_operands %forall_with_type
     : (!transform.op<"scf.forall">) -> !transform.op<"scf.forall">
+  transform.apply_patterns to %variant_op {
+    transform.apply_patterns.canonicalization
+  } : !transform.any_op
+  transform.iree.apply_cse %variant_op : !transform.any_op
 
   // Step 2. Second level of tiling + fusion parallelizes to threads.
   // ================================================================
diff --git a/tests/transform_dialect/cuda/softmax_partial.mlir b/tests/transform_dialect/cuda/softmax_partial.mlir
index 12c4bef52c6c..db6f2f1cde1a 100644
--- a/tests/transform_dialect/cuda/softmax_partial.mlir
+++ b/tests/transform_dialect/cuda/softmax_partial.mlir
@@ -5,13 +5,15 @@
 // RUN:     --iree-stream-transformation-pipeline \
 // RUN:     --iree-hal-configuration-pipeline | \
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \
-// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \
+// RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir \
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \
 // RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE
 
 // RUN: iree-compile %s --iree-hal-target-backends=cuda \
 // RUN:     --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
 /// Constant JIT'ing must be disabled because the transform-dialect debug
 /// flags leak to the JIT session, which doesn't know what to do with them.
+// RUN:     --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \
 // RUN:     --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \
 // RUN: iree-run-module --module=- --function=softmax_partial --device=cuda | \
 // RUN: FileCheck %s
diff --git a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
index 61b546332a2a..760353876e63 100644
--- a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
@@ -67,16 +67,15 @@ transform.sequence failures(propagate) {
 
   // Step 4. Bufferize and drop HAL decriptor from memref ops.
   // =========================================================
-  %variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> !transform.any_op
-  %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2 : (!transform.any_op) -> !transform.any_op
+  transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
+  %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!transform.any_op) -> !transform.any_op
   %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
 
   // Step 5. Post-bufferization mapping to blocks and threads.
   // =========================================================
   %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
-  %func_3 = transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> !transform.any_op
-  transform.iree.map_nested_forall_to_gpu_threads %func_3
-    { workgroup_dims = [32, 4, 1] }
+  transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> ()
+  transform.iree.map_nested_forall_to_gpu_threads %func_2 workgroup_dims = [32, 4, 1] : (!transform.any_op) -> ()
 
   // Step 6. Post-bufferization vector distribution with rank-reduction.
   // ===================================================================
@@ -90,5 +89,5 @@ transform.sequence failures(propagate) {
   %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
   %warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
     : (!transform.any_op) -> !transform.any_op
-  transform.iree.vector.warp_distribute %end_func
+  transform.iree.vector.warp_distribute %end_func : (!transform.any_op) -> ()
 }
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 82ca6e95476b..fc40ccd77cf0 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 82ca6e95476b13f6c2572fb7d9397e5db8eeb510
+Subproject commit fc40ccd77cf04fca3922d465354a167428c92d11