Skip to content

Commit

Permalink
Integrate llvm-project@679c076ae446 (#14573)
Browse files Browse the repository at this point in the history
* bump llvm to 679c076ae446
* use getter for offsets, sizes, strides
* implement getArgOperandsMutable() for Flow, Stream, and VM call ops

---------

Co-authored-by: Mahesh Ravishankar <ravishankarm@google.com>
Co-authored-by: Groverkss <groverkss@gmail.com>
  • Loading branch information
3 people authored Aug 10, 2023
1 parent af3d2a1 commit 1ef5acf
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ struct FlattenSubView final : public OpConversionPattern<memref::SubViewOp> {
getTypeConverter()->convertType(op.getResult().getType());
if (!neededResultType || !isRankZeroOrOneMemRef(neededResultType))
return failure();
Value size = createTotalElementCountValue(op.getType(), op.sizes(),
Value size = createTotalElementCountValue(op.getType(), op.getSizes(),
op.getLoc(), rewriter);
SmallVector<Value> offsets = mlir::getValueOrCreateConstantIndexOp(
rewriter, op.getLoc(), op.getMixedOffsets());
Expand Down
6 changes: 3 additions & 3 deletions compiler/src/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,9 +661,9 @@ struct FoldCastOpIntoDispatchStoreOp

rewriter.replaceOpWithNewOp<DispatchTensorStoreOp>(
storeOp, parentOp.getSource(), storeOp.getTarget(),
storeOp.getTargetDims(), storeOp.offsets(), storeOp.sizes(),
storeOp.strides(), storeOp.static_offsets(), storeOp.static_sizes(),
storeOp.static_strides());
storeOp.getTargetDims(), storeOp.getOffsets(), storeOp.getSizes(),
storeOp.getStrides(), storeOp.getStaticOffsets(),
storeOp.getStaticSizes(), storeOp.getStaticStrides());
return success();
}
};
Expand Down
6 changes: 6 additions & 0 deletions compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1011,6 +1011,12 @@ def FLOW_CallOp : FLOW_Op<"call", [
ValueRange getResultDynamicDims(unsigned idx) {
return IREE::Util::findVariadicDynamicDims(idx, getResults(), getResultDims());
}

/// Get the argument operands to the called function as a mutable range, this is
/// required by the call interface.
MutableOperandRange getArgOperandsMutable() {
return getArgumentsMutable();
}
}];

let hasVerifier = 1;
Expand Down
12 changes: 12 additions & 0 deletions compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2120,6 +2120,12 @@ def Stream_AsyncCallOp : Stream_Op<"async.call", [
Value getResultSize(unsigned idx) {
return findValueSizeInList(idx, getResults(), getResultSizes());
}

/// Get the argument operands to the called function as a mutable range, this is
/// required by the call interface.
MutableOperandRange getArgOperandsMutable() {
return getResourceOperandsMutable();
}
}];

let hasVerifier = 1;
Expand Down Expand Up @@ -2792,6 +2798,12 @@ def Stream_CmdCallOp : Stream_Op<"cmd.call", [
return findValueSizeInList(idx, getOperands(), getResourceOperandSizes());
}
Value getResultSize(unsigned idx) { return {}; }

/// Get the argument operands to the called function as a mutable range, this is
/// required by the call interface.
MutableOperandRange getArgOperandsMutable() {
return getResourceOperandsMutable();
}
}];

let hasVerifier = 1;
Expand Down
6 changes: 6 additions & 0 deletions compiler/src/iree/compiler/Dialect/VM/IR/VMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -4022,6 +4022,12 @@ class VM_CallBaseOp<string mnemonic, list<Trait> traits = []> :
void setCalleeFromCallable(CallInterfaceCallable callee) {
(*this)->setAttr("callee", callee.get<SymbolRefAttr>());
}

/// Get the argument operands to the called function as a mutable range, this is
/// required by the call interface.
MutableOperandRange getArgOperandsMutable() {
return getOperandsMutable();
}
}];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ Value mlir::iree_compiler::IREE::LinalgExt::createMatchingSubsetInsertOp(
Value source, Value dest) {
return b.create<tensor::InsertSliceOp>(
loc, subsetExtractOp.getSource().getType(), source, dest,
subsetExtractOp.offsets(), subsetExtractOp.sizes(),
subsetExtractOp.strides(), subsetExtractOp.static_offsets(),
subsetExtractOp.static_sizes(), subsetExtractOp.static_strides());
subsetExtractOp.getOffsets(), subsetExtractOp.getSizes(),
subsetExtractOp.getStrides(), subsetExtractOp.getStaticOffsets(),
subsetExtractOp.getStaticSizes(), subsetExtractOp.getStaticStrides());
}

void mlir::iree_compiler::IREE::LinalgExt::createMatchingParallelSubsetInsertOp(
Expand Down
14 changes: 5 additions & 9 deletions tests/transform_dialect/cuda/softmax.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,19 @@
// RUN: iree-opt %s --iree-hal-target-backends=cuda \
// RUN: --iree-abi-transformation-pipeline \
// RUN: --iree-flow-transformation-pipeline \
/// This must be used with the custom dispatch region formation
/// because IREE's does not fuse the 6 ops softmax version even with
/// --iree-flow-fuse-multi-use.
// RUN: --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \
// RUN: --iree-stream-transformation-pipeline \
// RUN: --iree-hal-configuration-pipeline | \
// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \
// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \
// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir \
// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \
// RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE

// RUN: iree-compile %s --iree-hal-target-backends=cuda \
// RUN: --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
/// Constant JIT'ing must be disabled because the transform-dialect debug
/// flags leak to the JIT session, which doesn't know what to do with them.
/// This must be used with the custom dispatch region formation
/// because IREE's does not fuse the 6 ops softmax version even with
/// --iree-flow-fuse-multi-use.
// RUN: iree-compile %s --iree-hal-target-backends=cuda \
// RUN: --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \
// RUN: --iree-flow-dispatch-use-transform-dialect=%p/softmax_dispatch_spec.mlir \
// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_codegen_spec.mlir | \
// RUN: iree-run-module --module=- --function=softmax --device=cuda | \
Expand Down
4 changes: 4 additions & 0 deletions tests/transform_dialect/cuda/softmax_codegen_spec.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ transform.sequence failures(propagate) {
%forall_with_type = transform.cast %forall : !transform.any_op to !transform.op<"scf.forall">
transform.iree.share_forall_operands %forall_with_type
: (!transform.op<"scf.forall">) -> !transform.op<"scf.forall">
transform.apply_patterns to %variant_op {
transform.apply_patterns.canonicalization
} : !transform.any_op
transform.iree.apply_cse %variant_op : !transform.any_op

// Step 2. Second level of tiling + fusion parallelizes to threads.
// ================================================================
Expand Down
4 changes: 3 additions & 1 deletion tests/transform_dialect/cuda/softmax_partial.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
// RUN: --iree-stream-transformation-pipeline \
// RUN: --iree-hal-configuration-pipeline | \
// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmgpu-lower-executable-target)))' \
// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \
// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir \
// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false | \
// RUN: FileCheck %s --check-prefix=CHECK-SHUFFLE

// RUN: iree-compile %s --iree-hal-target-backends=cuda \
// RUN: --iree-opt-const-expr-hoisting=false --iree-opt-const-eval=false \
/// Constant JIT'ing must be disabled because the transform-dialect debug
/// flags leak to the JIT session, which doesn't know what to do with them.
// RUN: --iree-codegen-llvmgpu-enable-transform-dialect-jit=false \
// RUN: --iree-codegen-llvmgpu-use-transform-dialect=%p/softmax_partial_codegen_spec.mlir | \
// RUN: iree-run-module --module=- --function=softmax_partial --device=cuda | \
// RUN: FileCheck %s
Expand Down
11 changes: 5 additions & 6 deletions tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,15 @@ transform.sequence failures(propagate) {

// Step 4. Bufferize and drop HAL decriptor from memref ops.
// =========================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> !transform.any_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2 : (!transform.any_op) -> !transform.any_op
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!transform.any_op) -> !transform.any_op
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op

// Step 5. Post-bufferization mapping to blocks and threads.
// =========================================================
%func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
%func_3 = transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> !transform.any_op
transform.iree.map_nested_forall_to_gpu_threads %func_3
{ workgroup_dims = [32, 4, 1] }
transform.iree.forall_to_workgroup %func_2 : (!transform.any_op) -> ()
transform.iree.map_nested_forall_to_gpu_threads %func_2 workgroup_dims = [32, 4, 1] : (!transform.any_op) -> ()

// Step 6. Post-bufferization vector distribution with rank-reduction.
// ===================================================================
Expand All @@ -90,5 +89,5 @@ transform.sequence failures(propagate) {
%if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
: (!transform.any_op) -> !transform.any_op
transform.iree.vector.warp_distribute %end_func
transform.iree.vector.warp_distribute %end_func : (!transform.any_op) -> ()
}
2 changes: 1 addition & 1 deletion third_party/llvm-project

0 comments on commit 1ef5acf

Please sign in to comment.