Skip to content

Commit c8016e7

Browse files
author
Serguei Katkov
committed
[Loop Predication] Teach LP about reverse loops with uge and sge latch conditions
Add support of uge and sge latch condition to Loop Prediction for reverse loops. Reviewers: apilipenko, mkazantsev, sanjoy, anna Reviewed By: anna Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42837 llvm-svn: 324589
1 parent cfc98c2 commit c8016e7

File tree

2 files changed

+113
-5
lines changed

2 files changed

+113
-5
lines changed

llvm/lib/Transforms/Scalar/LoopPredication.cpp

+8-5
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@
155155
// When S = -1 (i.e. reverse iterating loop), the transformation is supported
156156
// when:
157157
// * The loop has a single latch with the condition of the form:
158-
// B(X) = X <pred> latchLimit, where <pred> is u> or s>.
158+
// B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=.
159159
// * The guard condition is of the form
160160
// G(X) = X - 1 u< guardLimit
161161
//
@@ -171,6 +171,10 @@
171171
// guardStart u< guardLimit && latchLimit u>= 1.
172172
// Similarly for sgt condition the widened condition is:
173173
// guardStart u< guardLimit && latchLimit s>= 1.
174+
// For uge condition the widened condition is:
175+
// guardStart u< guardLimit && latchLimit u> 1.
176+
// For sge condition the widened condition is:
177+
// guardStart u< guardLimit && latchLimit s> 1.
174178
//===----------------------------------------------------------------------===//
175179

176180
#include "llvm/Transforms/Scalar/LoopPredication.h"
@@ -485,9 +489,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
485489
// latchLimit <pred> 1.
486490
// See the header comment for reasoning of the checks.
487491
Instruction *InsertAt = Preheader->getTerminator();
488-
auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred)
489-
? ICmpInst::ICMP_SGE
490-
: ICmpInst::ICMP_UGE;
492+
auto LimitCheckPred = getLatchPredicateForGuard(LatchCheck.Pred);
491493
auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT,
492494
GuardStart, GuardLimit, InsertAt);
493495
auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
@@ -671,7 +673,8 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
671673
Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
672674
} else {
673675
assert(Step->isAllOnesValue() && "Step should be -1!");
674-
return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT;
676+
return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&
677+
Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;
675678
}
676679
};
677680

llvm/test/Transforms/LoopPredication/reverse.ll

+105
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,108 @@ exit:
138138
%result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
139139
ret i32 %result
140140
}
141+
142+
define i32 @signed_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
143+
; CHECK-LABEL: @signed_reverse_loop_n_to_lower_limit_equal(
144+
entry:
145+
%tmp5 = icmp eq i32 %n, 0
146+
br i1 %tmp5, label %exit, label %loop.preheader
147+
148+
; CHECK: loop.preheader:
149+
; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
150+
; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
151+
; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp sgt i32 %lowerlimit, 1
152+
; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]]
153+
loop.preheader:
154+
br label %loop
155+
156+
; CHECK: loop:
157+
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
158+
loop:
159+
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
160+
%i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
161+
%i.next = add nsw i32 %i, -1
162+
%within.bounds = icmp ult i32 %i.next, %length
163+
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
164+
%i.i64 = zext i32 %i.next to i64
165+
%array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
166+
%array.i = load i32, i32* %array.i.ptr, align 4
167+
%loop.acc.next = add i32 %loop.acc, %array.i
168+
%continue = icmp sge i32 %i, %lowerlimit
169+
br i1 %continue, label %loop, label %exit
170+
171+
exit:
172+
%result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
173+
ret i32 %result
174+
}
175+
176+
define i32 @unsigned_reverse_loop_n_to_lower_limit_equal(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
177+
; CHECK-LABEL: @unsigned_reverse_loop_n_to_lower_limit_equal(
178+
entry:
179+
%tmp5 = icmp eq i32 %n, 0
180+
br i1 %tmp5, label %exit, label %loop.preheader
181+
182+
; CHECK: loop.preheader:
183+
; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
184+
; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
185+
; CHECK-NEXT: [[no_wrap_check:%.*]] = icmp ugt i32 %lowerlimit, 1
186+
; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], [[no_wrap_check]]
187+
loop.preheader:
188+
br label %loop
189+
190+
; CHECK: loop:
191+
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
192+
loop:
193+
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
194+
%i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
195+
%i.next = add nsw i32 %i, -1
196+
%within.bounds = icmp ult i32 %i.next, %length
197+
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
198+
%i.i64 = zext i32 %i.next to i64
199+
%array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
200+
%array.i = load i32, i32* %array.i.ptr, align 4
201+
%loop.acc.next = add i32 %loop.acc, %array.i
202+
%continue = icmp uge i32 %i, %lowerlimit
203+
br i1 %continue, label %loop, label %exit
204+
205+
exit:
206+
%result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
207+
ret i32 %result
208+
}
209+
210+
211+
; if we predicated the loop, the guard will definitely fail and we will
212+
; deoptimize early on.
213+
define i32 @unsigned_reverse_loop_n_to_1(i32* %array, i32 %length, i32 %n, i32 %lowerlimit) {
214+
; CHECK-LABEL: @unsigned_reverse_loop_n_to_1(
215+
entry:
216+
%tmp5 = icmp eq i32 %n, 0
217+
br i1 %tmp5, label %exit, label %loop.preheader
218+
219+
; CHECK: loop.preheader:
220+
; CHECK-NEXT: [[range_start:%.*]] = add i32 %n, -1
221+
; CHECK-NEXT: [[first_iteration_check:%.*]] = icmp ult i32 [[range_start]], %length
222+
; CHECK-NEXT: [[wide_cond:%.*]] = and i1 [[first_iteration_check]], false
223+
loop.preheader:
224+
br label %loop
225+
226+
; CHECK: loop:
227+
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
228+
loop:
229+
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
230+
%i = phi i32 [ %i.next, %loop ], [ %n, %loop.preheader ]
231+
%i.next = add nsw i32 %i, -1
232+
%within.bounds = icmp ult i32 %i.next, %length
233+
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
234+
%i.i64 = zext i32 %i.next to i64
235+
%array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
236+
%array.i = load i32, i32* %array.i.ptr, align 4
237+
%loop.acc.next = add i32 %loop.acc, %array.i
238+
%continue = icmp uge i32 %i, 1
239+
br i1 %continue, label %loop, label %exit
240+
241+
exit:
242+
%result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
243+
ret i32 %result
244+
}
245+

0 commit comments

Comments
 (0)