Skip to content

Commit cde00c0

Browse files
author
Serguei Katkov
committed
[Loop Peeling] Fix idom detection algorithm.
We'd like to determine the idom of exit block after peeling one iteration. Let Exit is exit block. Let ExitingSet - is a set of predecessors of Exit block. They are exiting blocks. Let Latch' and ExitingSet' are copies after a peeling. We'd like to find an idom'(Exit) - idom of Exit after peeling. It is an evident that idom'(Exit) will be the nearest common dominator of ExitingSet and ExitingSet'. idom(Exit) is a nearest common dominator of ExitingSet. idom(Exit)' is a nearest common dominator of ExitingSet'. Taking into account that we have a single Latch, Latch' will dominate Header and idom(Exit). So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. All these basic blocks are in the same loop, so what we find is (nearest common dominator of idom(Exit) and Latch)'. Reviewers: reames, fhahn Reviewed By: reames Subscribers: hiraditya, zzheng, llvm-commits Differential Revision: https://reviews.llvm.org/D65292 llvm-svn: 367044
1 parent 8b288c7 commit cde00c0

File tree

2 files changed

+66
-1
lines changed

2 files changed

+66
-1
lines changed

llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp

+20-1
Original file line numberDiff line numberDiff line change
@@ -575,11 +575,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
575575

576576
DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
577577
if (DT) {
578+
// We'd like to determine the idom of exit block after peeling one
579+
// iteration.
580+
// Let Exit is exit block.
581+
// Let ExitingSet - is a set of predecessors of Exit block. They are exiting
582+
// blocks.
583+
// Let Latch' and ExitingSet' are copies after a peeling.
584+
// We'd like to find an idom'(Exit) - idom of Exit after peeling.
585+
// It is an evident that idom'(Exit) will be the nearest common dominator
586+
// of ExitingSet and ExitingSet'.
587+
// idom(Exit) is a nearest common dominator of ExitingSet.
588+
// idom(Exit)' is a nearest common dominator of ExitingSet'.
589+
// Taking into account that we have a single Latch, Latch' will dominate
590+
// Header and idom(Exit).
591+
// So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
592+
// All these basic blocks are in the same loop, so what we find is
593+
// (nearest common dominator of idom(Exit) and Latch)'.
594+
// In the loop below we remember nearest common dominator of idom(Exit) and
595+
// Latch to update idom of Exit later.
578596
assert(L->hasDedicatedExits() && "No dedicated exits?");
579597
for (auto Edge : ExitEdges) {
580598
if (ExitIDom.count(Edge.second))
581599
continue;
582-
BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
600+
BasicBlock *BB = DT->findNearestCommonDominator(
601+
DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
583602
assert(L->contains(BB) && "IDom is not in a loop");
584603
ExitIDom[Edge.second] = BB;
585604
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s
3+
; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
4+
5+
; Regression test for setting the correct idom for exit blocks.
6+
7+
; CHECK: Loop Unroll: F[basic]
8+
; CHECK: PEELING loop %for.body with iteration count 1!
9+
10+
define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 {
11+
entry:
12+
br label %for.body
13+
14+
for.body:
15+
%i.05 = phi i32 [ 0, %entry ], [ %inc, %latch ]
16+
%p.addr.04 = phi i32* [ %p, %entry ], [ %incdec.ptr, %latch ]
17+
%incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
18+
store i32 %i.05, i32* %p.addr.04, align 4
19+
%inc = add nsw i32 %i.05, 1
20+
%cmp = icmp slt i32 %inc, %k
21+
br i1 %c1, label %left, label %right
22+
23+
left:
24+
br label %latch
25+
26+
right:
27+
br i1 %c1, label %latch, label %side_exit, !prof !2
28+
29+
latch:
30+
br i1 %cmp, label %for.body, label %for.end, !prof !1
31+
32+
for.end:
33+
ret i32 %inc
34+
35+
side_exit:
36+
%rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ]
37+
ret i32 %rval
38+
}
39+
40+
declare i32 @llvm.experimental.deoptimize.i32(...)
41+
42+
attributes #0 = { nounwind }
43+
44+
!1 = !{!"branch_weights", i32 1, i32 1}
45+
!2 = !{!"branch_weights", i32 1, i32 0}
46+
!3 = !{!"function_entry_count", i64 1}

0 commit comments

Comments
 (0)