Skip to content

Commit ad4d820

Browse files
committed
[SCEV] Rewrite more SCEVAddExpr when applying guards.
When re-writing SCEVAddExprs to apply information from guards, check if we have information for the expression itself. If so, apply it. When we have an expression of the form (Const + A), check if we have have guard info for (Const + 1 + A) and use it. This is needed to avoid regressions in a few cases, where we have BTCs with a subtracted constant. Rewriting expressions could cause regressions, e.g. when comparing 2 SCEV expressions where we are only able to rewrite one side, but I could not find any cases where this happens more with this patch in practice. Depends on llvm#160012 (included in PR) Proofs for some of the test changes: https://alive2.llvm.org/ce/z/RPX6t_
1 parent 57db6b6 commit ad4d820

File tree

8 files changed

+68
-35
lines changed

8 files changed

+68
-35
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16080,16 +16080,32 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
1608016080
}
1608116081

1608216082
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
16083-
// Trip count expressions sometimes consist of adding 3 operands, i.e.
16084-
// (Const + A + B). There may be guard info for A + B, and if so, apply
16085-
// it.
16086-
// TODO: Could more generally apply guards to Add sub-expressions.
16087-
if (isa<SCEVConstant>(Expr->getOperand(0)) &&
16088-
Expr->getNumOperands() == 3) {
16089-
if (const SCEV *S = Map.lookup(
16090-
SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
16091-
return SE.getAddExpr(Expr->getOperand(0), S);
16083+
if (const SCEV *S = Map.lookup(Expr))
16084+
return S;
16085+
if (isa<SCEVConstant>(Expr->getOperand(0))) {
16086+
// Trip count expressions sometimes consist of adding 3 operands, i.e.
16087+
// (Const + A + B). There may be guard info for A + B, and if so, apply
16088+
// it.
16089+
// TODO: Could more generally apply guards to Add sub-expressions.
16090+
if (Expr->getNumOperands() == 3) {
16091+
if (const SCEV *S = Map.lookup(
16092+
SE.getAddExpr(Expr->getOperand(1), Expr->getOperand(2))))
16093+
return SE.getAddExpr(Expr->getOperand(0), S);
16094+
}
16095+
16096+
// For expressions of the form (Const + A), check if we have guard info
16097+
// for (Const + 1 + A), and rewrite to ((Const + 1 + A) - 1). This makes
16098+
// sure we don't loose information when rewriting expressions based on
16099+
// back-edge taken counts in some cases..
16100+
if (Expr->getNumOperands() == 2) {
16101+
auto *NewC =
16102+
SE.getAddExpr(Expr->getOperand(0), SE.getOne(Expr->getType()));
16103+
if (const SCEV *S =
16104+
Map.lookup(SE.getAddExpr(NewC, Expr->getOperand(1))))
16105+
return SE.getMinusSCEV(S, SE.getOne(Expr->getType()));
16106+
}
1609216107
}
16108+
1609316109
SmallVector<const SCEV *, 2> Operands;
1609416110
bool Changed = false;
1609516111
for (const auto *Op : Expr->operands()) {

llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ declare void @clobber()
3333
define void @test_add_sub_1_guard(ptr %src, i32 %n) {
3434
; CHECK-LABEL: 'test_add_sub_1_guard'
3535
; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard
36-
; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
37-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
38-
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
36+
; CHECK-NEXT: Loop %loop: backedge-taken count is i64 0
37+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 0
38+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 0
3939
; CHECK-NEXT: Loop %loop: Trip multiple is 1
4040
;
4141
entry:

llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@ define void @umax(i32 noundef %a, i32 noundef %b) {
102102
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
103103
; CHECK-NEXT: --> ((2 * %a) umax (4 * %b)) U: [0,-1) S: [-2147483648,2147483647)
104104
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
105-
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
105+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umax (4 * %b))) LoopDispositions: { %for.body: Computable }
106106
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
107-
; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
107+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umax (4 * %b)) LoopDispositions: { %for.body: Computable }
108108
; CHECK-NEXT: Determining loop execution counts for: @umax
109109
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
110-
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
110+
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
111111
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umax (4 * %b)))
112112
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
113113
;
@@ -197,12 +197,12 @@ define void @smax(i32 noundef %a, i32 noundef %b) {
197197
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
198198
; CHECK-NEXT: --> ((2 * %a)<nsw> smax (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483647)
199199
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
200-
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
200+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
201201
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
202-
; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
202+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smax (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
203203
; CHECK-NEXT: Determining loop execution counts for: @smax
204204
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
205-
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 -3
205+
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
206206
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smax (4 * %b)<nsw>))
207207
; CHECK-NEXT: Loop %for.body: Trip multiple is 2
208208
;

llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,14 +343,13 @@ define void @slt_no_smax_needed(i64 %n, ptr %dst) {
343343
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
344344
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
345345
; CHECK: loop.preheader:
346-
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
347346
; CHECK-NEXT: br label [[LOOP:%.*]]
348347
; CHECK: loop:
349348
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
350349
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
351350
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
352351
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
353-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
352+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
354353
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
355354
; CHECK: exit.loopexit:
356355
; CHECK-NEXT: br label [[EXIT]]
@@ -385,14 +384,13 @@ define void @ult_no_umax_needed(i64 %n, ptr %dst) {
385384
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
386385
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
387386
; CHECK: loop.preheader:
388-
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
389387
; CHECK-NEXT: br label [[LOOP:%.*]]
390388
; CHECK: loop:
391389
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
392390
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
393391
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
394392
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
395-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
393+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SHR]]
396394
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
397395
; CHECK: exit.loopexit:
398396
; CHECK-NEXT: br label [[EXIT]]

llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ define void @test_memset_size_can_use_info_from_guards(i32 %x, ptr %dst) {
6161
; CHECK: [[LOOP1_BACKEDGE]]:
6262
; CHECK-NEXT: br label %[[LOOP1]]
6363
; CHECK: [[LOOP2_PREHEADER]]:
64-
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SUB]] to i64
65-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
66-
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
64+
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SHR]], -1
65+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
66+
; CHECK-NEXT: [[UMAX:%.*]] = add nuw nsw i64 [[TMP1]], 1
6767
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[UMAX]], i1 false)
6868
; CHECK-NEXT: br label %[[LOOP2:.*]]
6969
; CHECK: [[LOOP2]]:

llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,16 @@ define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias n
1919
; CHECK-NEXT: [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
2020
; CHECK-NEXT: br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
2121
; CHECK: for.body.preheader:
22+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1
23+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[N]], 1
24+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
25+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
26+
; CHECK: for.body.preheader.new:
27+
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
2228
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
2329
; CHECK: for.body:
24-
; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
30+
; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
31+
; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
2532
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
2633
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2734
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP0]], 3
@@ -34,8 +41,25 @@ define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias n
3441
; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
3542
; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
3643
; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[I_011]], 2
37-
; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]]
38-
; CHECK-NEXT: br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
44+
; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2
45+
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
46+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
47+
; CHECK: exit.loopexit.unr-lcssa:
48+
; CHECK-NEXT: [[I_011_UNR:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
49+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
50+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER]], label [[EXIT_LOOPEXIT:%.*]]
51+
; CHECK: for.body.epil.preheader:
52+
; CHECK-NEXT: [[I_011_EPIL_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR]], [[EXIT_LOOPEXIT_UNR_LCSSA]] ]
53+
; CHECK-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
54+
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
55+
; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]]
56+
; CHECK: for.body.epil:
57+
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_EPIL_INIT]]
58+
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_EPIL]], align 1
59+
; CHECK-NEXT: [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3
60+
; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011_EPIL_INIT]]
61+
; CHECK-NEXT: store i8 [[ADD_EPIL]], ptr [[ARRAYIDX4_EPIL]], align 1
62+
; CHECK-NEXT: br label [[EXIT_LOOPEXIT]]
3963
; CHECK: exit.loopexit:
4064
; CHECK-NEXT: br label [[EXIT]]
4165
; CHECK: exit:

llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
193193
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
194194
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
195195
; CHECK: pred.store.continue6:
196-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
196+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
197197
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
198198
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
199199
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -465,12 +465,7 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
465465
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
466466
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
467467
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
468-
; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
469-
; CHECK: [[VECTOR_MEMCHECK]]:
470-
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
471-
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
472-
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
473-
; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
468+
; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
474469
;
475470
entry:
476471
%offset = sub i32 %x, %y

0 commit comments

Comments
 (0)