Skip to content

Commit

Permalink
[llvm][loop-rotate] Allow forcing loop-rotation (llvm#82828)
Browse files Browse the repository at this point in the history
Many profitable optimizations cannot be performed at -Oz, due to
unrotated loops. While this is worse for size (minimally), many of the
optimizations significantly reduce code size, such as memcpy
optimizations and other patterns found by loop idiom recognition.
Related discussion can be found in issue llvm#50308.

This patch adds an experimental, backend-only flag to allow loop header
duplication, regardless of the optimization level. Downstream consumers
can experiment with this flag, and if it is profitable, we can adjust
the compiler's defaults accordingly, and expose any useful frontend
flags to opt into the new behavior.
  • Loading branch information
ilovepi authored Feb 29, 2024
1 parent 91895f5 commit 2fef685
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 4 deletions.
21 changes: 17 additions & 4 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFlatten Pass"));

// Experimentally allow loop header duplication. This should allow for better
// optimization at Oz, since loop-idiom recognition can then recognize things
// like memcpy. If this ends up being useful for many targets, we should drop
// this flag and make a code generation option that can be controlled
// independent of the opt level and exposed through the frontend.
static cl::opt<bool> EnableLoopHeaderDuplication(
"enable-loop-header-duplication", cl::init(false), cl::Hidden,
cl::desc("Enable loop header duplication at any optimization level"));

static cl::opt<bool>
EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading"),
Expand Down Expand Up @@ -630,8 +639,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
/*AllowSpeculation=*/false));

// Disable header duplication in loop rotation at -Oz.
LPM1.addPass(
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz,
isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
Expand Down Expand Up @@ -812,7 +822,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
// Disable header duplication in loop rotation at -Oz.
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(
LoopRotatePass(Level != OptimizationLevel::Oz),
LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false),
PTO.EagerlyInvalidateAnalyses));
Expand Down Expand Up @@ -1422,7 +1433,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
LoopPassManager LPM;
// First rotate loops that may have been un-rotated by prior passes.
// Disable header duplication at -Oz.
LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz,
LTOPreLink));
// Some loops may have become dead by now. Try to delete them.
// FIXME: see discussion in https://reviews.llvm.org/D112851,
// this may need to be revisited once we run GVN before loop deletion
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/Transforms/LoopRotate/oz-disable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ

;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
; RUN: opt < %s -S -passes='default<Oz>' -enable-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS

; Loop should be rotated for -Os but not for -Oz.
; OS: rotating Loop at depth 1
; OZ-NOT: rotating Loop at depth 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3

;; Check that -enable-loop-header-duplication at Oz enables certain types of
;; optimizations, for example replacing the loop body w/ a call to memset. If
;; loop idiom recognition begins to recognize unrotated loops, this test will
;; need to be updated.

; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION
; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION

define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
; NOROTATION-LABEL: define void @test(
; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
; NOROTATION-NEXT: entry:
; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]]
; NOROTATION: loop.header:
; NOROTATION-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; NOROTATION-NEXT: [[_12_I:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
; NOROTATION-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
; NOROTATION: loop.latch:
; NOROTATION-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
; NOROTATION-NEXT: store i8 1, ptr [[PTR_IV]], align 1
; NOROTATION-NEXT: br label [[LOOP_HEADER]]
; NOROTATION: exit:
; NOROTATION-NEXT: ret void
;
; ROTATION-LABEL: define void @test(
; ROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
; ROTATION-NEXT: entry:
; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]
; ROTATION: loop.latch.preheader:
; ROTATION-NEXT: [[END3:%.*]] = ptrtoint ptr [[END]] to i64
; ROTATION-NEXT: [[START4:%.*]] = ptrtoint ptr [[START]] to i64
; ROTATION-NEXT: [[TMP0:%.*]] = sub i64 [[END3]], [[START4]]
; ROTATION-NEXT: tail call void @llvm.memset.p0.i64(ptr nonnull align 1 [[START]], i8 1, i64 [[TMP0]], i1 false)
; ROTATION-NEXT: br label [[EXIT]]
; ROTATION: exit:
; ROTATION-NEXT: ret void
;
entry:
br label %loop.header

loop.header:
%ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%_12.i = icmp eq i8* %ptr.iv, %end
br i1 %_12.i, label %exit, label %loop.latch

loop.latch:
%ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1
store i8 1, i8* %ptr.iv, align 1
br label %loop.header

exit:
ret void
}

0 comments on commit 2fef685

Please sign in to comment.