diff --git a/build_tools/benchmarks/comparisons/setup_mobile.sh b/build_tools/benchmarks/comparisons/setup_mobile.sh index bda6291139d0..f83959509279 100644 --- a/build_tools/benchmarks/comparisons/setup_mobile.sh +++ b/build_tools/benchmarks/comparisons/setup_mobile.sh @@ -121,7 +121,7 @@ for i in $(ls ${ROOT_DIR}/models/tflite/); do --iree-input-type=tosa \ --iree-hal-target-backends=llvm-cpu \ --iree-llvmcpu-target-triple=aarch64-none-linux-android29 \ - --iree-flow-enable-data-tiling \ + --iree-opt-data-tiling \ --iree-llvmcpu-target-cpu-features=+dotprod \ "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \ "--iree-llvmcpu-enable-pad-consumer-fusion" \ @@ -136,7 +136,7 @@ for i in $(ls ${ROOT_DIR}/models/tflite/); do --iree-input-type=tosa \ --iree-hal-target-backends=llvm-cpu \ --iree-llvmcpu-target-triple=aarch64-none-linux-android29 \ - --iree-flow-enable-data-tiling \ + --iree-opt-data-tiling \ --iree-llvmcpu-target-cpu-features=+dotprod \ "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \ "--iree-llvmcpu-enable-pad-consumer-fusion" \ diff --git a/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py b/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py index 32026d4db8c3..1fa699b9f83e 100644 --- a/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py +++ b/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py @@ -43,7 +43,7 @@ class Android_ARMv8_A_Benchmarks(object): tags=["experimental-flags", "mmt4d"], compile_targets=[ARMV8_A_CPU_TARGET], extra_flags=[ - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops", "--iree-llvmcpu-enable-pad-consumer-fusion", ], @@ -53,7 +53,7 @@ class Android_ARMv8_A_Benchmarks(object): tags=["experimental-flags", "mmt4d", "dotprod"], compile_targets=[ARMV8_A_CPU_TARGET], extra_flags=[ - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", "--iree-llvmcpu-target-cpu-features=+dotprod", "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops", "--iree-llvmcpu-enable-pad-consumer-fusion", diff --git a/build_tools/python/benchmark_suites/iree/x86_64_benchmarks.py b/build_tools/python/benchmark_suites/iree/x86_64_benchmarks.py index 1bc2454f32c0..3190cc027397 100644 --- a/build_tools/python/benchmark_suites/iree/x86_64_benchmarks.py +++ b/build_tools/python/benchmark_suites/iree/x86_64_benchmarks.py @@ -33,7 +33,7 @@ class Linux_x86_64_Benchmarks(object): tags=["experimental-flags", "data-tiling", "ukernel"], compile_targets=[CASCADELAKE_CPU_TARGET], extra_flags=[ - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", "--iree-llvmcpu-enable-microkernels", ], ) diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodingPass.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodingPass.cpp index 782fff7d0db0..539a96d2e1d5 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodingPass.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodingPass.cpp @@ -144,20 +144,33 @@ static MatmulTileParams chooseMatmulTileParams(EncodingUser user, struct CPUMaterializeEncodingPass : public CPUMaterializeEncodingBase { + CPUMaterializeEncodingPass() : targetAttr(nullptr) {} + explicit CPUMaterializeEncodingPass(IREE::HAL::ExecutableTargetAttr attr) + : targetAttr(attr) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } void runOnOperation() override; + +private: + IREE::HAL::ExecutableTargetAttr targetAttr; }; struct CPUMaterializeUpperBoundTileSizePass : public CPUMaterializeUpperBoundTileSizeBase< CPUMaterializeUpperBoundTileSizePass> { + CPUMaterializeUpperBoundTileSizePass() = default; + explicit CPUMaterializeUpperBoundTileSizePass( + ArrayRef attrs) + : targetAttrs(attrs) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); } void runOnOperation() override; + +private: + SmallVector targetAttrs; }; FailureOr @@ -249,7 +262,8 @@ void CPUMaterializeEncodingPass::runOnOperation() { MLIRContext *context = &getContext(); auto operation = getOperation(); RewritePatternSet materializeEncodingPattern(context); - auto targetAttr = ExecutableTargetAttr::lookup(operation); + if (!targetAttr) + targetAttr = ExecutableTargetAttr::lookup(operation); auto materializeEncodingFn = getMaterializeEncodingFn(targetAttr); if (!materializeEncodingFn) { return signalPassFailure(); @@ -283,8 +297,10 @@ void CPUMaterializeEncodingPass::runOnOperation() { void CPUMaterializeUpperBoundTileSizePass::runOnOperation() { MLIRContext *context = &getContext(); auto operation = getOperation(); - auto targetAttrs = - IREE::HAL::DeviceTargetAttr::lookupExecutableTargets(operation); + if (targetAttrs.empty()) { + targetAttrs = + IREE::HAL::DeviceTargetAttr::lookupExecutableTargets(operation); + } RewritePatternSet patterns(context); MaterializeEncodingFn materializeEncodingFn = getUpperBoundMaterializeEncodingFn(targetAttrs); @@ -301,13 +317,14 @@ void CPUMaterializeUpperBoundTileSizePass::runOnOperation() { } std::unique_ptr> -createCPUMaterializeEncodingPass() { - return std::make_unique(); +createCPUMaterializeEncodingPass(IREE::HAL::ExecutableTargetAttr targetAttr) { + return std::make_unique(targetAttr); } std::unique_ptr> -createCPUMaterializeUpperBoundTileSizePass() { - return std::make_unique(); +createCPUMaterializeUpperBoundTileSizePass( + ArrayRef targetAttrs) { + return std::make_unique(targetAttrs); } } // namespace iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h index 0df9d0351769..38bfd22a7e90 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.h @@ -12,6 +12,7 @@ #ifndef IREE_COMPILER_CODEGEN_COMMON_CPU_PASSES_H_ #define IREE_COMPILER_CODEGEN_COMMON_CPU_PASSES_H_ +#include "iree/compiler/Dialect/HAL/IR/HALTypes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" @@ -22,7 +23,8 @@ namespace iree_compiler { /// linalg_ext.set_encoding -> tensor.pack /// linalg_ext.unset_encoding -> tensor.unpack /// linalg.matmul -> linalg.mmt4d -std::unique_ptr> createCPUMaterializeEncodingPass(); +std::unique_ptr> createCPUMaterializeEncodingPass( + IREE::HAL::ExecutableTargetAttr targetAttr = nullptr); /// Like createLLVMCPUMaterializeEncodingPass, but specifically for /// linalg_ext.upper_bound_tile_size, converting it to constants. @@ -40,7 +42,8 @@ std::unique_ptr> createCPUMaterializeEncodingPass(); /// that is the largest tile size that we can use in VMVX, and can be adjusted // as needed. std::unique_ptr> -createCPUMaterializeUpperBoundTileSizePass(); +createCPUMaterializeUpperBoundTileSizePass( + ArrayRef targetAttrs = {}); void registerCodegenCommonCPUPasses(); diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp index 6182b2357864..a686e06336f2 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FormScalarDispatches.cpp @@ -62,7 +62,7 @@ static bool isComputeOperation(Operation *op) { } if (op->getDialect() == context->getLoadedDialect()) { return !isa(op); + tensor::ExpandShapeOp, tensor::PackOp, tensor::UnPackOp>(op); } return false; } diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp index 40395b1b8cd1..744a048315c8 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp @@ -79,11 +79,6 @@ static llvm::cl::opt clDispatchGenerateWorkloadRegion( "iree-flow-dispatch-generate-workload-region", llvm::cl::desc("Generate the workload region."), llvm::cl::init(true)); -static llvm::cl::opt - clEnableDataTiling("iree-flow-enable-data-tiling", - llvm::cl::desc("Enable data tiling path."), - llvm::cl::init(false)); - static llvm::cl::opt clNormalizeInputIndexingMap( "iree-flow-normalize-input-indexing-map", llvm::cl::desc("Enable normalizing input indexing map to identity."), @@ -154,8 +149,6 @@ void buildFlowTransformPassPipeline(OpPassManager &passManager, // transpose. .addPredicatedPass(clNormalizeInputIndexingMap, createInterchangeTransposeGenericOpsPass) - // Enable data tiling after all linalg level transformations. - .addPredicatedPass(clEnableDataTiling, createSetEncodingPass) //////////////////////////////////////////////////////////////////////// // Dispatch region formation. .addPredicatedPass(!clDispatchTransformFileName.empty(), diff --git a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp index 2abc28762396..abc6758062a4 100644 --- a/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp +++ b/compiler/src/iree/compiler/Dialect/HAL/Transforms/Passes.cpp @@ -227,8 +227,9 @@ void buildHALTransformPassPipeline(OpPassManager &passManager, // Executable translation //---------------------------------------------------------------------------- - FunctionLikeNest(passManager) - .addPass(createCPUMaterializeUpperBoundTileSizePass); + FunctionLikeNest(passManager).addPass([]() { + return createCPUMaterializeUpperBoundTileSizePass(); + }); // Preprocess executables using an external tool. The tool may mutate one or // more variants and even insert or remove variants. diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/BUILD.bazel index 47bcaa17ab19..ff2b78327e45 100644 --- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/BUILD.bazel +++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/BUILD.bazel @@ -25,6 +25,7 @@ iree_compiler_cc_library( deps = [ "//compiler/src/iree/compiler/Dialect/Util/Analysis", "//compiler/src/iree/compiler/Dialect/Util/IR", + "//llvm-external-projects/iree-dialects:IREELinalgExtDialect", "@llvm-project//llvm:Support", "@llvm-project//mlir:ArithDialect", "@llvm-project//mlir:FuncDialect", diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/CMakeLists.txt index 9fe654f320ef..7da6315a1dfc 100644 --- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/CMakeLists.txt +++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/CMakeLists.txt @@ -20,6 +20,7 @@ iree_cc_library( "ConstExpr.cpp" "OpOracle.cpp" DEPS + IREELinalgExtDialect LLVMSupport MLIRArithDialect MLIRFuncDialect diff --git a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp index bf37adebc7b2..5a382cca4d5d 100644 --- a/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp +++ b/compiler/src/iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/Util/Analysis/Constant/OpOracle.h" +#include "iree-dialects/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "iree/compiler/Dialect/Util/IR/UtilDialect.h" #include "iree/compiler/Dialect/Util/IR/UtilOps.h" #include "llvm/ADT/SmallPtrSet.h" @@ -100,6 +101,13 @@ ConstExprOpInfo ConstExprOpInfo::getForOp(Operation *op) { return {}; } + // Target-dependent ops are not const-expr. + // TODO(#14887): Use trait/interface instead. + if (isa(op)) { + return {}; + } + // By default, ops without results are not const-expr. if (op->getNumResults() == 0) { return {}; diff --git a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel index 53b57954f38e..e751347ce2b6 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel +++ b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -load("//build_tools/bazel:build_defs.oss.bzl", "iree_compiler_cc_library") +load("//build_tools/bazel:build_defs.oss.bzl", "iree_compiler_cc_library", "iree_gentbl_cc_library") package( default_visibility = ["//visibility:public"], @@ -12,16 +12,51 @@ package( licenses = ["notice"], # Apache 2.0 ) +iree_gentbl_cc_library( + name = "PassesIncGen", + tbl_outs = [ + ( + ["--gen-pass-decls"], + "Passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "Passes.td", + deps = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + +iree_compiler_cc_library( + name = "PassHeaders", + hdrs = [ + "PassDetail.h", + "Passes.h", + "Passes.h.inc", + ], + deps = [ + ":PassesIncGen", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Transforms", + ], +) + iree_compiler_cc_library( name = "GlobalOptimization", srcs = [ + "MaterializeHomogeneousEncodings.cpp", "Passes.cpp", ], hdrs = [ "Passes.h", ], deps = [ + ":PassHeaders", + ":PassesIncGen", + "//compiler/src/iree/compiler/Codegen/Common/CPU:CommonCPUPasses", "//compiler/src/iree/compiler/Dialect/Flow/Transforms", + "//compiler/src/iree/compiler/Dialect/HAL/IR", + "//compiler/src/iree/compiler/Dialect/HAL/IR:HALDialect", "//compiler/src/iree/compiler/Dialect/Util/Transforms", "//compiler/src/iree/compiler/Pipelines:Options", "//compiler/src/iree/compiler/Utils", diff --git a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt index 1679b24443dc..7921d385d613 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt +++ b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt @@ -10,14 +10,40 @@ iree_add_all_subdirs() +iree_tablegen_library( + NAME + PassesIncGen + TD_FILE + "Passes.td" + OUTS + --gen-pass-decls Passes.h.inc +) + +iree_cc_library( + NAME + PassHeaders + HDRS + "PassDetail.h" + "Passes.h" + "Passes.h.inc" + DEPS + ::PassesIncGen + MLIRPass + MLIRTransforms + PUBLIC +) + iree_cc_library( NAME GlobalOptimization HDRS "Passes.h" SRCS + "MaterializeHomogeneousEncodings.cpp" "Passes.cpp" DEPS + ::PassHeaders + ::PassesIncGen LLVMSupport MLIRFuncDialect MLIRIR @@ -25,7 +51,10 @@ iree_cc_library( MLIRMemRefTransforms MLIRPass MLIRTransforms + iree::compiler::Codegen::Common::CPU::CommonCPUPasses iree::compiler::Dialect::Flow::Transforms + iree::compiler::Dialect::HAL::IR + iree::compiler::Dialect::HAL::IR::HALDialect iree::compiler::Dialect::Util::Transforms iree::compiler::Pipelines::Options iree::compiler::Utils diff --git a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp new file mode 100644 index 000000000000..1ed56ef86b2c --- /dev/null +++ b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp @@ -0,0 +1,75 @@ +// Copyright 2023 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/Common/CPU/Passes.h" +#include "iree/compiler/Dialect/HAL/IR/HALDialect.h" +#include "iree/compiler/Dialect/HAL/IR/HALOps.h" +#include "iree/compiler/GlobalOptimization/PassDetail.h" +#include "iree/compiler/GlobalOptimization/Passes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace iree_compiler { +namespace GlobalOptimization { + +class MaterializeHomogeneousEncodingsPass + : public MaterializeHomogeneousEncodingsBase< + MaterializeHomogeneousEncodingsPass> { +public: + MaterializeHomogeneousEncodingsPass() = default; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + auto moduleOp = getOperation(); + auto targetsAttr = moduleOp->getAttrOfType("hal.device.targets"); + if (!targetsAttr || targetsAttr.size() != 1) { + return; + } + auto deviceTarget = cast(targetsAttr[0]); + SmallVector executableTargets = + deviceTarget.getExecutableTargets(); + if (executableTargets.size() != 1) { + return; + } + // TODO(hanchung): Move *CPUMateralize* methods to Codegen/Common. They + // could be generalized to other backends (by looking into something like + // ExecutableTarget things). Only llvm-cpu backends handle encodings for + // now. + auto executableTarget = executableTargets[0]; + if (executableTarget.getBackend() != "llvm-cpu") { + return; + } + + OpPassManager passManager(moduleOp.getOperationName()); + passManager.addNestedPass( + createCPUMaterializeUpperBoundTileSizePass(executableTargets)); + passManager.addNestedPass( + createCPUMaterializeEncodingPass(executableTarget)); + + if (failed(runPipeline(passManager, moduleOp))) { + return signalPassFailure(); + } + } +}; + +std::unique_ptr> +createMaterializeHomogeneousEncodingsPass() { + return std::make_unique(); +} + +} // namespace GlobalOptimization +} // namespace iree_compiler +} // namespace mlir diff --git a/compiler/src/iree/compiler/GlobalOptimization/PassDetail.h b/compiler/src/iree/compiler/GlobalOptimization/PassDetail.h new file mode 100644 index 000000000000..d6f91d290004 --- /dev/null +++ b/compiler/src/iree/compiler/GlobalOptimization/PassDetail.h @@ -0,0 +1,24 @@ +// Copyright 2023 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_GLOBALOPTIMIZATION_PASSDETAIL_H_ +#define IREE_COMPILER_GLOBALOPTIMIZATION_PASSDETAIL_H_ + +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace iree_compiler { +namespace GlobalOptimization { + +#define GEN_PASS_CLASSES +#include "iree/compiler/GlobalOptimization/Passes.h.inc" + +} // namespace GlobalOptimization +} // namespace iree_compiler +} // namespace mlir + +#endif // IREE_COMPILER_GLOBALOPTIMIZATION_PASSDETAIL_H_ diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp index 8ba124bb6773..e85a3141588c 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp @@ -64,8 +64,12 @@ void buildGlobalOptimizationPassPipeline( .addPass(IREE::Flow::createGeneralizeLinalgNamedOpsPass) .addPass(IREE::Flow::createFuseDequantizationMatmulPass) .addPass(IREE::Flow::createFoldUnitExtentDimsPass) + // Enable data tiling after they are in a canonical form. + .addPredicatedPass(transformOptions.options.dataTiling, + IREE::Flow::createSetEncodingPass) .addPass(mlir::createCanonicalizerPass) .addPass(mlir::createCSEPass); + mainPassManager.addPass(createMaterializeHomogeneousEncodingsPass()); OpPassManager pipeline(ModuleOp::getOperationName()); FunctionLikeNest(pipeline) @@ -73,8 +77,8 @@ void buildGlobalOptimizationPassPipeline( // region formation as redundant store-loads are removed. .addPass(IREE::Util::createSimplifyGlobalAccessesPass); - // Module level cleanup and canonicalization of util.global (and other util - // ops). + // Module level cleanup and canonicalization of util.global (and other + // util ops). pipeline.addPass(IREE::Util::createApplyPatternsPass()); pipeline.addPass(IREE::Util::createFoldGlobalsPass()); pipeline.addPass(IREE::Util::createIPOPass()); @@ -109,7 +113,14 @@ void buildGlobalOptimizationPassPipeline( } } +namespace { +#define GEN_PASS_REGISTRATION +#include "iree/compiler/GlobalOptimization/Passes.h.inc" // IWYU pragma: export +} // namespace + void registerGlobalOptimizationPipeline() { + registerPasses(); + PassPipelineRegistration globalOptimizationTransformPassPipeline( "iree-global-optimization-transformation-pipeline", diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.h b/compiler/src/iree/compiler/GlobalOptimization/Passes.h index b9256f668627..65552c50120b 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/Passes.h +++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.h @@ -10,6 +10,7 @@ #include #include "iree/compiler/Pipelines/Options.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" @@ -36,6 +37,11 @@ struct TransformOptions : public PassPipelineOptions { void buildGlobalOptimizationPassPipeline( OpPassManager &mainPassManager, const TransformOptions &transformOptions); +// Materializes logical encodings to physical encodings if there is a single +// device target. +std::unique_ptr> +createMaterializeHomogeneousEncodingsPass(); + void registerGlobalOptimizationPipeline(); } // namespace GlobalOptimization diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.td b/compiler/src/iree/compiler/GlobalOptimization/Passes.td new file mode 100644 index 000000000000..e07e4e83de73 --- /dev/null +++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.td @@ -0,0 +1,19 @@ +// Copyright 2023 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_GLOBALOPTIMIZATION_PASSES +#define IREE_COMPILER_GLOBALOPTIMIZATION_PASSES + +include "mlir/Pass/PassBase.td" + +def MaterializeHomogeneousEncodings : + Pass<"iree-global-opt-materialize-homogeneous-encodings", "mlir::ModuleOp"> { + let summary = "Materializes logical encodings to physical encodings if there is a single device target."; + let constructor = + "mlir::iree_compiler::GlobalOptimization::createMaterializeHomogeneousEncodingsPass()"; +} + +#endif // IREE_COMPILER_GLOBALOPTIMIZATION_PASSES diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel b/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel index 01a1d9034700..6c042726b245 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel +++ b/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel @@ -17,6 +17,7 @@ iree_lit_test_suite( srcs = enforce_glob( [ "transformation_pipeline.mlir", + "materialize_homogeneous_encodings.mlir", ], include = ["*.mlir"], ), diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt b/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt index 415559577a7c..1924cd4a4513 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt @@ -14,6 +14,7 @@ iree_lit_test_suite( NAME lit SRCS + "materialize_homogeneous_encodings.mlir" "transformation_pipeline.mlir" TOOLS FileCheck diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir new file mode 100644 index 000000000000..de11381fc692 --- /dev/null +++ b/compiler/src/iree/compiler/GlobalOptimization/test/materialize_homogeneous_encodings.mlir @@ -0,0 +1,57 @@ +// RUN: iree-opt --split-input-file --iree-global-opt-materialize-homogeneous-encodings %s | FileCheck %s + +#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f"}> +#map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)> +#device_target_llvm_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#executable_target_embedded_elf_x86_64_]}> +module attributes {hal.device.targets = [#device_target_llvm_cpu]} { + func.func @lhs_encoding(%arg0: tensor) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %dim = tensor.dim %arg0, %c0 : tensor + %dim_0 = tensor.dim %arg0, %c1 : tensor + %0:2 = iree_linalg_ext.upper_bound_tile_size tensor> -> index, index + %1 = affine.apply #map()[%0#0, %dim] + %2 = affine.apply #map()[%0#1, %dim_0] + %padded = tensor.pad %arg0 low[0, 0] high[%1, %2] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor + %3 = iree_linalg_ext.set_encoding %padded : tensor -> tensor> + %4 = iree_linalg_ext.unset_encoding %3 : tensor> -> tensor + return %4 : tensor + } +} +// CHECK-LABEL: func.func @lhs_encoding +// CHECK: tensor.pack +// CHECK: tensor.unpack + +// ----- + +#executable_target_vulkan_spirv_fb = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> +#map = affine_map<()[s0, s1] -> (-s1 + (s1 ceildiv s0) * s0)> +#device_target_vulkan = #hal.device.target<"vulkan", {executable_targets = [#executable_target_vulkan_spirv_fb], legacy_sync}> +module attributes {hal.device.targets = [#device_target_vulkan]} { + func.func @lhs_encoding(%arg0: tensor) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %dim = tensor.dim %arg0, %c0 : tensor + %dim_0 = tensor.dim %arg0, %c1 : tensor + %0:2 = iree_linalg_ext.upper_bound_tile_size tensor> -> index, index + %1 = affine.apply #map()[%0#0, %dim] + %2 = affine.apply #map()[%0#1, %dim_0] + %padded = tensor.pad %arg0 low[0, 0] high[%1, %2] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor + %3 = iree_linalg_ext.set_encoding %padded : tensor -> tensor> + %4 = iree_linalg_ext.unset_encoding %3 : tensor> -> tensor + return %4 : tensor + } +} + +// vulkan does not implement buildMaterializeEncodingsPassPipeline method. +// CHECK-LABEL: func.func @lhs_encoding +// CHECK: iree_linalg_ext.upper_bound_tile_size +// CHECK: iree_linalg_ext.set_encoding diff --git a/compiler/src/iree/compiler/Pipelines/Options.cpp b/compiler/src/iree/compiler/Pipelines/Options.cpp index 0dc7f0b3c8e8..af416c669f49 100644 --- a/compiler/src/iree/compiler/Pipelines/Options.cpp +++ b/compiler/src/iree/compiler/Pipelines/Options.cpp @@ -128,6 +128,10 @@ void GlobalOptimizationOptions::bindOptions(OptionsBinder &binder) { "unconditionally before main global optimizations."), llvm::cl::cat(category)); + binder.opt("iree-opt-data-tiling", dataTiling, + llvm::cl::desc("Enables data tiling path."), + llvm::cl::cat(category)); + binder.opt( "iree-opt-const-eval", constEval, llvm::cl::desc("Enables eager evaluation of constants using the full " diff --git a/compiler/src/iree/compiler/Pipelines/Options.h b/compiler/src/iree/compiler/Pipelines/Options.h index e2149d57c161..2d27e2f8db5c 100644 --- a/compiler/src/iree/compiler/Pipelines/Options.h +++ b/compiler/src/iree/compiler/Pipelines/Options.h @@ -83,6 +83,9 @@ struct GlobalOptimizationOptions { bool promoteBF16ToF32 = false; bool demoteI64ToI32 = false; + // Enables data tiling. + bool dataTiling = false; + // Enables const-expr hoisting into globals. bool constExprHoisting = true; diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel index 184e76ea1526..a5df91b6a8ac 100644 --- a/tests/e2e/matmul/BUILD.bazel +++ b/tests/e2e/matmul/BUILD.bazel @@ -38,7 +38,7 @@ py_binary( [iree_generated_trace_runner_test( name = "e2e_matmul_mmt4d_%s_small" % lhs_rhs_type, compiler_flags = [ - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -62,7 +62,7 @@ py_binary( [iree_generated_trace_runner_test( name = "e2e_matmul_mmt4d_%s_large" % lhs_rhs_type, compiler_flags = [ - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -96,7 +96,7 @@ py_binary( name = "e2e_matmul_mmt4d_%s_intrinsics_%s" % (lhs_rhs_type, size), compiler_flags = [ "--iree-codegen-mmt4d-use-intrinsics", - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -145,7 +145,7 @@ py_binary( name = "e2e_matmul_mmt4d_%s_small_vmvx_ukernel" % lhs_rhs_type, compiler_flags = [ "--iree-vmvx-enable-microkernels", - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", ], generator = ":generate_e2e_matmul_tests", generator_args = [ @@ -184,7 +184,7 @@ X86_64_AVX512_VNNI = X86_64_AVX512_BASE + [ name = "e2e_matmul_mmt4d_%s_%s_ukernel" % (lhs_rhs_type, size), compiler_flags = [ "--iree-llvmcpu-enable-microkernels", - "--iree-flow-enable-data-tiling", + "--iree-opt-data-tiling", ], generator = ":generate_e2e_matmul_tests", generator_args = [ diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt index c8636dc5a463..6f23016de3e4 100644 --- a/tests/e2e/matmul/CMakeLists.txt +++ b/tests/e2e/matmul/CMakeLists.txt @@ -57,7 +57,7 @@ iree_generated_trace_runner_test( DRIVERS "local-task" COMPILER_FLAGS - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" TARGET_CPU_FEATURES_VARIANTS "default" "arm_64:dotprod:+dotprod" @@ -79,7 +79,7 @@ iree_generated_trace_runner_test( DRIVERS "local-task" COMPILER_FLAGS - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" TARGET_CPU_FEATURES_VARIANTS "default" ) @@ -99,7 +99,7 @@ iree_generated_trace_runner_test( DRIVERS "local-task" COMPILER_FLAGS - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" LABELS "noriscv" "noasan" @@ -125,7 +125,7 @@ iree_generated_trace_runner_test( DRIVERS "local-task" COMPILER_FLAGS - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" LABELS "noriscv" "noasan" @@ -150,7 +150,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-codegen-mmt4d-use-intrinsics" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" TARGET_CPU_FEATURES_VARIANTS "default" "arm_64:dotprod:+dotprod" @@ -173,7 +173,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-codegen-mmt4d-use-intrinsics" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" TARGET_CPU_FEATURES_VARIANTS "default" ) @@ -230,7 +230,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-vmvx-enable-microkernels" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" ) iree_generated_trace_runner_test( @@ -249,7 +249,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-vmvx-enable-microkernels" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" ) iree_generated_trace_runner_test( @@ -268,7 +268,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-llvmcpu-enable-microkernels" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" LABELS TARGET_CPU_FEATURES_VARIANTS @@ -296,7 +296,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-llvmcpu-enable-microkernels" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" LABELS "noriscv" "noasan" @@ -326,7 +326,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-llvmcpu-enable-microkernels" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" LABELS TARGET_CPU_FEATURES_VARIANTS @@ -351,7 +351,7 @@ iree_generated_trace_runner_test( "local-task" COMPILER_FLAGS "--iree-llvmcpu-enable-microkernels" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" LABELS "noriscv" "noasan" diff --git a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake index 66aee96f2a90..97cee6a51c1a 100644 --- a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake +++ b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake @@ -313,7 +313,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "PersonDetect_int8(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -328,7 +328,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MobileNetV3Small_fp32(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -343,7 +343,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "DeepLabV3_fp32(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -358,7 +358,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "EfficientNet_int8(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -373,7 +373,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MobileNetV2_fp32(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -388,7 +388,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MobileNetV2_int8(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -403,7 +403,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MobileSSD_fp32(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -418,7 +418,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "PoseNet_fp32(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -433,7 +433,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MobileBertSquad_fp32(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -448,7 +448,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MobileBertSquad_int8(tflite) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -463,7 +463,7 @@ iree_bytecode_module( "--iree-input-type=stablehlo" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "EfficientNetV2STF(stablehlo) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -478,7 +478,7 @@ iree_bytecode_module( "--iree-input-type=stablehlo" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "MiniLML12H384Uncased(stablehlo) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -493,7 +493,7 @@ iree_bytecode_module( "--iree-input-type=none" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "EfficientNetV2SPT(linalg) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -508,7 +508,7 @@ iree_bytecode_module( "--iree-input-type=stablehlo" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "BertLargeTF(stablehlo) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -523,7 +523,7 @@ iree_bytecode_module( "--iree-input-type=none" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" FRIENDLY_NAME "EfficientNetB7PT(linalg) [x86_64-cascadelake-linux_gnu-llvm_cpu][experimental-flags,data-tiling,ukernel]" PUBLIC @@ -1752,7 +1752,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" FRIENDLY_NAME "DeepLabV3_fp32(tflite) [armv8.2-a-generic-linux_android29-llvm_cpu][experimental-flags,mmt4d]" @@ -1767,7 +1767,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" FRIENDLY_NAME "MobileSSD_fp32(tflite) [armv8.2-a-generic-linux_android29-llvm_cpu][experimental-flags,mmt4d]" @@ -1782,7 +1782,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" FRIENDLY_NAME "PoseNet_fp32(tflite) [armv8.2-a-generic-linux_android29-llvm_cpu][experimental-flags,mmt4d]" @@ -1797,7 +1797,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" FRIENDLY_NAME "MobileBertSquad_fp32(tflite) [armv8.2-a-generic-linux_android29-llvm_cpu][experimental-flags,mmt4d]" @@ -1812,7 +1812,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" FRIENDLY_NAME "MobileNetV2_fp32(tflite) [armv8.2-a-generic-linux_android29-llvm_cpu][experimental-flags,mmt4d]" @@ -1827,7 +1827,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" FRIENDLY_NAME "MobileNetV3Small_fp32(tflite) [armv8.2-a-generic-linux_android29-llvm_cpu][experimental-flags,mmt4d]" @@ -1842,7 +1842,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-target-cpu-features=+dotprod" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" @@ -2866,7 +2866,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2885,7 +2885,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2904,7 +2904,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2923,7 +2923,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2942,7 +2942,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2961,7 +2961,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2980,7 +2980,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -2999,7 +2999,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3018,7 +3018,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3037,7 +3037,7 @@ iree_bytecode_module( "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3056,7 +3056,7 @@ iree_bytecode_module( "--iree-input-type=stablehlo" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3075,7 +3075,7 @@ iree_bytecode_module( "--iree-input-type=stablehlo" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3094,7 +3094,7 @@ iree_bytecode_module( "--iree-input-type=none" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3113,7 +3113,7 @@ iree_bytecode_module( "--iree-input-type=stablehlo" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -3132,7 +3132,7 @@ iree_bytecode_module( "--iree-input-type=none" "--iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu" "--iree-llvmcpu-target-cpu=cascadelake" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-enable-microkernels" "--iree-vm-emit-polyglot-zip=true" "--iree-llvmcpu-debug-symbols=false" @@ -4745,7 +4745,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" "--iree-vm-emit-polyglot-zip=true" @@ -4764,7 +4764,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" "--iree-vm-emit-polyglot-zip=true" @@ -4783,7 +4783,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" "--iree-vm-emit-polyglot-zip=true" @@ -4802,7 +4802,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" "--iree-vm-emit-polyglot-zip=true" @@ -4821,7 +4821,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" "--iree-vm-emit-polyglot-zip=true" @@ -4840,7 +4840,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion" "--iree-vm-emit-polyglot-zip=true" @@ -4859,7 +4859,7 @@ iree_bytecode_module( "--iree-hal-target-backends=llvm-cpu" "--iree-input-type=tosa" "--iree-llvmcpu-target-triple=aarch64-none-linux-android29" - "--iree-flow-enable-data-tiling" + "--iree-opt-data-tiling" "--iree-llvmcpu-target-cpu-features=+dotprod" "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" "--iree-llvmcpu-enable-pad-consumer-fusion"