[CPU] Add contract fast-math-flag to arith operations (#14551)

This patch adds the `contract` FMF to some arith operations so that they can be folded into an fma instruction. We are doing this by default as we are lowering matmul ops by default to fmas. We will add different fp modes to have more control on fp optimizations depending on the tolerance to fp errors.
iree-org · Aug 7, 2023 · b47ed8c · b47ed8c
1 parent d1d03cb
commit b47ed8c
Show file tree

Hide file tree

Showing 10 changed files with 73 additions and 0 deletions.
diff --git a/compiler/src/iree/compiler/Codegen/Common/AddFastMathFlags.cpp b/compiler/src/iree/compiler/Codegen/Common/AddFastMathFlags.cpp
@@ -0,0 +1,44 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Common/PassDetail.h"
+#include "iree/compiler/Codegen/Common/Passes.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+#define DEBUG_TYPE "iree-codegen-add-fast-math-flags"
+
+using namespace mlir;
+using namespace mlir::iree_compiler;
+
+/// Add `contract` FMF to operations that support it.
+static void addContractFMF(Operation *op) {
+  LLVM::FastmathFlags contract = LLVM::FastmathFlags::contract;
+  TypeSwitch<Operation *>(op)
+      .Case<LLVM::FMulOp, LLVM::FAddOp, LLVM::FSubOp, LLVM::FNegOp>(
+          [&](auto llvmOp) { llvmOp.setFastmathFlags(contract); });
+}
+
+namespace {
+
+/// Add the corresponding fast-math flags to operations given a floating-point
+/// optimization mode.
+// TODO: For now we only allow default flags, such as arithmetic reassociation.
+struct AddFastMathFlagsPass
+    : public AddFastMathFlagsBase<AddFastMathFlagsPass> {
+public:
+  using AddFastMathFlagsBase::AddFastMathFlagsBase;
+
+  void runOnOperation() override {
+    getOperation()->walk([](Operation *op) { addContractFMF(op); });
+  }
+};
+
+} // namespace
+
+std::unique_ptr<OperationPass<LLVM::LLVMFuncOp>>
+mlir::iree_compiler::createAddFastMathFlagsPass() {
+  return std::make_unique<AddFastMathFlagsPass>();
+}
diff --git a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
@@ -143,6 +143,7 @@ iree_compiler_cc_library(
 iree_compiler_cc_library(
     name = "Common",
     srcs = [
+        "AddFastMathFlags.cpp",
         "BubbleUpOrdinalOps.cpp",
         "BufferizationAnalysis.cpp",
         "BufferizeCopyOnlyDispatchesPass.cpp",

diff --git a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
@@ -118,6 +118,7 @@ iree_cc_library(
     "Transforms.h"
     "UserConfig.h"
   SRCS
+    "AddFastMathFlags.cpp"
     "BubbleUpOrdinalOps.cpp"
     "BufferizationAnalysis.cpp"
     "BufferizeCopyOnlyDispatchesPass.cpp"

diff --git a/compiler/src/iree/compiler/Codegen/Common/PassDetail.h b/compiler/src/iree/compiler/Codegen/Common/PassDetail.h
@@ -10,6 +10,7 @@
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
 #include "mlir/Pass/Pass.h"

diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.h b/compiler/src/iree/compiler/Codegen/Common/Passes.h
@@ -14,6 +14,7 @@
 
 #include "iree/compiler/Codegen/Dialect/IREECodegenAttrs.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -37,6 +38,8 @@ void addIREEComprehensiveBufferizePasses(
         std::nullopt,
     std::optional<BufferizationOptions::MemCpyFn> memCpyFn = std::nullopt);
 
+std::unique_ptr<OperationPass<LLVM::LLVMFuncOp>> createAddFastMathFlagsPass();
+
 /// Pass to bubble up ordinal operations to allow workgroup count computation
 /// based on slices to correlate back to workload computation.
 std::unique_ptr<Pass> createBubbleUpOrdinalOpsPass();

diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.td b/compiler/src/iree/compiler/Codegen/Common/Passes.td
@@ -13,6 +13,13 @@ include "mlir/Pass/PassBase.td"
 // Common passes for all backends (keep alphabetical)
 //===---------------------------------------------------------------------===//
 
+def AddFastMathFlags
+    : Pass<"iree-codegen-add-fast-math-flags", "LLVM::LLVMFuncOp"> {
+  let summary = "Add fast math flags to all the operations supporting them, "
+                "given a floating-point mode.";
+  let constructor = "mlir::iree_compiler::createAddFastMathFlagsPass()";
+}
+
 def BubbleUpOrdinalOps : Pass<"iree-codegen-bubble-up-ordinal-ops", ""> {
   let summary = "Bubbles op ordinal ops to allow for workgroup count computation";
   let constructor = "mlir::iree_compiler::createBubbleUpOrdinalOpsPass()";

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
@@ -18,6 +18,7 @@ iree_lit_test_suite(
     name = "lit",
     srcs = enforce_glob(
         [
+            "add_fmfs.mlir",
             "affinemin_canonicalization.mlir",
             "batch_matmuls.mlir",
             "bubble_up_ordinal_ops.mlir",

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
@@ -14,6 +14,7 @@ iree_lit_test_suite(
   NAME
     lit
   SRCS
+    "add_fmfs.mlir"
     "affinemin_canonicalization.mlir"
     "batch_matmuls.mlir"
     "bubble_up_ordinal_ops.mlir"

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/add_fmfs.mlir b/compiler/src/iree/compiler/Codegen/Common/test/add_fmfs.mlir
@@ -0,0 +1,13 @@
+// RUN: iree-opt -iree-codegen-add-fast-math-flags --split-input-file %s | FileCheck %s
+
+// LABEL: llvm.func @fmfs
+llvm.func @fmfs() -> f32 {
+  %c3 = llvm.mlir.constant(3.000000e+00 : f32) : f32
+  %c6 = llvm.mlir.constant(6.000000e+00 : f32) : f32
+  %mul = llvm.fmul %c3, %c3 : f32
+  %add = llvm.fadd %c3, %c6 : f32
+  llvm.return %add : f32
+}
+
+// CHECK: llvm.fmul %{{.*}}, %{{.*}}  {fastmathFlags = #llvm.fastmath<contract>} : f32
+// CHECK: llvm.fadd %{{.*}}, %{{.*}}  {fastmathFlags = #llvm.fastmath<contract>} : f32
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -752,6 +752,7 @@ static void addLowerToLLVMPasses(OpPassManager &passManager) {
 
   passManager.addPass(createCanonicalizerPass());
   passManager.addPass(createCSEPass());
+  passManager.addNestedPass<LLVM::LLVMFuncOp>(createAddFastMathFlagsPass());
 }
 
 void buildLLVMCPUCodegenPassPipeline(OpPassManager &passManager) {