-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[SCEV] Fold (C * A /u C) -> A, if A is a multiple of C and C a pow-of-2. #156730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-powerpc Author: Florian Hahn (fhahn) ChangesAlive2 Proof: https://alive2.llvm.org/ce/z/JoHJE9 Patch is 83.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156730.diff 9 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 09b126d35bde0..bd57d1192eb94 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3215,6 +3215,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
return getZeroExtendExpr(Res, Ops[1]->getType(), Depth + 1);
};
}
+
+ // Try to fold (C * D /u C) -> D, if C is a power-of-2 and D is a multiple
+ // of C.
+ const SCEV *D;
+ if (match(Ops[1], m_scev_UDiv(m_SCEV(D), m_scev_Specific(LHSC))) &&
+ LHSC->getAPInt().isPowerOf2() &&
+ LHSC->getAPInt().logBase2() <= getMinTrailingZeros(D)) {
+ return D;
+ }
}
}
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
index 8c77d704eac6a..4e5033b7a2f7f 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
@@ -12,9 +12,9 @@ define void @rewrite_zext(i32 %n) {
; CHECK-NEXT: %n.vec = and i64 %ext, -8
; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> U: [0,4294967289) S: [0,4294967289)
; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,17) S: [0,17) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,17) S: [0,17) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8
-; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,25) S: [8,25) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,25) S: [8,25) Exits: (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2
@@ -52,11 +52,11 @@ define i32 @rewrite_zext_min_max(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> U: [0,17) S: [0,17)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_min_max
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -98,11 +98,11 @@ define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %umin, 28
; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> U: [0,17) S: [0,17)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_zext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -144,11 +144,11 @@ define i32 @rewrite_sext_min_max(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nsw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_min_max
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -190,11 +190,11 @@ define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %smin, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nsw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_sext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -288,9 +288,9 @@ define i32 @rewrite_zext_no_icmp_ne(i32 %N) {
; CHECK-NEXT: %n.vec = and i64 %n.rnd.up, 8589934588
; CHECK-NEXT: --> (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw> U: [4,4294967297) S: [4,4294967297)
; CHECK-NEXT: %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,4294967293) S: [0,4294967293) Exits: (-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 + (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> /u 4))<nuw><nsw>)<nuw><nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_no_icmp_ne
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 1073741823
@@ -328,9 +328,9 @@ define void @rewrite_zext_and_base_1(i32 %n) {
; CHECK-NEXT: %n.vec = and i64 %ext, -8
; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> U: [0,4294967289) S: [0,4294967289)
; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8
-; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_1
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -371,9 +371,9 @@ define void @rewrite_zext_and_base_2(i32 %n) {
; CHECK-NEXT: %n.vec = and i64 %ext, -8
; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> U: [0,4294967289) S: [0,4294967289)
; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8
-; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -412,9 +412,9 @@ define void @guard_pessimizes_analysis_step2(i1 %c, i32 %N) {
; CHECK-NEXT: %init = phi i64 [ 2, %entry ], [ 4, %bb1 ]
; CHECK-NEXT: --> %init U: [2,5) S: [2,5)
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %init, %loop.ph ]
-; CHECK-NEXT: --> {%init,+,2}<nuw><nsw><%loop> U: [2,17) S: [2,17) Exits: ((2 * ((14 + (-1 * %init)<nsw>)<nsw> /u 2))<nuw><nsw> + %init) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%init,+,2}<nuw><nsw><%loop> U: [2,17) S: [2,17) Exits: 14 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 2
-; CHECK-NEXT: --> {(2 + %init)<nuw><nsw>,+,2}<nuw><nsw><%loop> U: [4,19) S: [4,19) Exits: (2 + (2 * ((14 + (-1 * %init)<nsw>)<nsw> /u 2))<nuw><nsw> + %init) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(2 + %init)<nuw><nsw>,+,2}<nuw><nsw><%loop> U: [4,19) S: [4,19) Exits: 16 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @guard_pessimizes_analysis_step2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((14 + (-1 * %init)<nsw>)<nsw> /u 2)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 6
@@ -456,11 +456,11 @@ define i32 @rewrite_sext_slt_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_slt_narrow_check
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -500,11 +500,11 @@ define i32 @rewrite_zext_ult_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ult_narrow_check
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -544,11 +544,11 @@ define i32 @rewrite_zext_ule_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 ...
[truncated]
|
|
@llvm/pr-subscribers-llvm-analysis Author: Florian Hahn (fhahn) ChangesAlive2 Proof: https://alive2.llvm.org/ce/z/JoHJE9 Patch is 83.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156730.diff 9 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 09b126d35bde0..bd57d1192eb94 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3215,6 +3215,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
return getZeroExtendExpr(Res, Ops[1]->getType(), Depth + 1);
};
}
+
+ // Try to fold (C * D /u C) -> D, if C is a power-of-2 and D is a multiple
+ // of C.
+ const SCEV *D;
+ if (match(Ops[1], m_scev_UDiv(m_SCEV(D), m_scev_Specific(LHSC))) &&
+ LHSC->getAPInt().isPowerOf2() &&
+ LHSC->getAPInt().logBase2() <= getMinTrailingZeros(D)) {
+ return D;
+ }
}
}
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
index 8c77d704eac6a..4e5033b7a2f7f 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
@@ -12,9 +12,9 @@ define void @rewrite_zext(i32 %n) {
; CHECK-NEXT: %n.vec = and i64 %ext, -8
; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> U: [0,4294967289) S: [0,4294967289)
; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,17) S: [0,17) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,17) S: [0,17) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8
-; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,25) S: [8,25) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,25) S: [8,25) Exits: (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2
@@ -52,11 +52,11 @@ define i32 @rewrite_zext_min_max(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> U: [0,17) S: [0,17)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_min_max
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -98,11 +98,11 @@ define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %umin, 28
; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> U: [0,17) S: [0,17)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_zext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -144,11 +144,11 @@ define i32 @rewrite_sext_min_max(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nsw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_min_max
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -190,11 +190,11 @@ define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %smin, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nsw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_sext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -288,9 +288,9 @@ define i32 @rewrite_zext_no_icmp_ne(i32 %N) {
; CHECK-NEXT: %n.vec = and i64 %n.rnd.up, 8589934588
; CHECK-NEXT: --> (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw> U: [4,4294967297) S: [4,4294967297)
; CHECK-NEXT: %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,4294967293) S: [0,4294967293) Exits: (-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 + (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> /u 4))<nuw><nsw>)<nuw><nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_no_icmp_ne
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32))<nsw> to i64))<nuw><nsw> /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 1073741823
@@ -328,9 +328,9 @@ define void @rewrite_zext_and_base_1(i32 %n) {
; CHECK-NEXT: %n.vec = and i64 %ext, -8
; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> U: [0,4294967289) S: [0,4294967289)
; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8
-; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_1
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -371,9 +371,9 @@ define void @rewrite_zext_and_base_2(i32 %n) {
; CHECK-NEXT: %n.vec = and i64 %ext, -8
; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> U: [0,4294967289) S: [0,4294967289)
; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,8}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8
-; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {8,+,8}<nuw><nsw><%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))<nuw><nsw>)<nsw> /u 8)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -412,9 +412,9 @@ define void @guard_pessimizes_analysis_step2(i1 %c, i32 %N) {
; CHECK-NEXT: %init = phi i64 [ 2, %entry ], [ 4, %bb1 ]
; CHECK-NEXT: --> %init U: [2,5) S: [2,5)
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %init, %loop.ph ]
-; CHECK-NEXT: --> {%init,+,2}<nuw><nsw><%loop> U: [2,17) S: [2,17) Exits: ((2 * ((14 + (-1 * %init)<nsw>)<nsw> /u 2))<nuw><nsw> + %init) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%init,+,2}<nuw><nsw><%loop> U: [2,17) S: [2,17) Exits: 14 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add i64 %iv, 2
-; CHECK-NEXT: --> {(2 + %init)<nuw><nsw>,+,2}<nuw><nsw><%loop> U: [4,19) S: [4,19) Exits: (2 + (2 * ((14 + (-1 * %init)<nsw>)<nsw> /u 2))<nuw><nsw> + %init) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(2 + %init)<nuw><nsw>,+,2}<nuw><nsw><%loop> U: [4,19) S: [4,19) Exits: 16 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @guard_pessimizes_analysis_step2
; CHECK-NEXT: Loop %loop: backedge-taken count is ((14 + (-1 * %init)<nsw>)<nsw> /u 2)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 6
@@ -456,11 +456,11 @@ define i32 @rewrite_sext_slt_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_slt_narrow_check
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -500,11 +500,11 @@ define i32 @rewrite_zext_ult_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ult_narrow_check
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3
@@ -544,11 +544,11 @@ define i32 @rewrite_zext_ule_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to put this in my own words to see if I follow. Given C is a power of two, both the divide and the multiply are shifts (right and left respectively). Since we know that D has more trailing zero bits than we shift out, we can shift the value back without changing it. Right?
If so, doesn't the same logic work for sdiv (i.e. ashr)?
If so, can't we generalize this for different power of two constants, with the result being a D /u C3 where C3 is the net shift? (Given we didn't discard any bits.)
If so, should we consider (C1 * D) /u C2 as well? That is, reverse the order of the multiply and the divide?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to put this in my own words to see if I follow. Given C is a power of two, both the divide and the multiply are shifts (right and left respectively). Since we know that D has more trailing zero bits than we shift out, we can shift the value back without changing it. Right?
Yep exactly!
If so, doesn't the same logic work for sdiv (i.e. ashr)?
Yes, https://alive2.llvm.org/ce/z/ekSvPh contains the same proof for UDiv and SDiv. As for SCEV, I don't think there's anything to do for those though, as they either will be UDiv (if both operands are known-positive), or SCEVUnknown otherwise unless I missed something.
If so, can't we generalize this for different power of two constants, with the result being a D /u C3 where C3 is the net shift? (Given we didn't discard any bits.)
Yes, that should also be possibe. We also don't need to limit ourselfs to constants, at least for the case where we multiply with the same values as we are dividing.
Would you like me to include either generalization here or OK as followup?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you like me to include either generalization here or OK as followup?
Followups are totally fine.
a577da9 to
e712719
Compare
Update Clang's __builtin_assume_dereferenceable to support non-constant lengths. The corresponding assume bundle has been updated to support non-constant sizes in cad62df. The current docs for the builtin don't mention the constant requirement for the size argument, so don't need to be updated: https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume-dereferenceable A number of patches landed recently to make the optimizer make better use of the dereferenceable assumptions, and once llvm#156730 lands, it can be used to vectorize some early-exit loops, for example std::find with std::vector::iterator: https://godbolt.org/z/qo58PKG37 #include <algorithm> #include <cstddef> #include <vector> auto find(std::vector<short>::iterator first, short s, unsigned size) { auto Addr = __builtin_assume_aligned(std::to_address(first), 2); __builtin_assume_dereferenceable(std::to_address(first), size * sizeof(short)); return std::find(first, first + size, s); }
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
… C a pow-of-2. (#156730) Alive2 Proof: https://alive2.llvm.org/ce/z/JoHJE9 PR: llvm/llvm-project#156730
|
It looks like this causes some large compile-time regressions during the clang build: https://llvm-compile-time-tracker.com/compare_clang.php?from=22fb21a64e1ca783fbde7d7d99735c2962f6fe34&to=74ec38fad0a1289f936e5388fa8bbe74653c55d9&stat=instructions%3Au&sortBy=interestingness This seems to be related to code size increases: https://llvm-compile-time-tracker.com/compare_clang.php?from=22fb21a64e1ca783fbde7d7d99735c2962f6fe34&to=74ec38fad0a1289f936e5388fa8bbe74653c55d9&stat=size-file&sortBy=relative-difference Can you please check what is going on there? |
|
It looks like some unfortunate interaction with inlining. With this patch, we now remove a single instruction from And this just reduces the inlining cost for the function below the threshold, so it will get inlined in many places: To confirm this is the issue, I checked that add Reverting the PR on top of that commit doesn't impact size/#instructions: http://llvm-compile-time-tracker.com/compare.php?from=3b7b312476da2a2b1fd44815532edf9987da337e&to=4b5fc76e0e568192b971772152a0569df2f607a2&stat=instructions:u |
|
@fhahn Thanks for checking! Do you think it would make sense to upstream that noinline annotation? It looks like clang already uses LLVM_ATTRIBUTE_NOINLINE in various places to avoid excessive inlining. Though this one isn't super terrible ("only" 0.4% of the binary size). |
#156929) Update Clang's __builtin_assume_dereferenceable to support non-constant lengths. The corresponding assume bundle has been updated to support non-constant sizes in cad62df. The current docs for the builtin don't mention the constant requirement for the size argument, so don't need to be updated: https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume-dereferenceable A number of patches landed recently to make the optimizer make better use of the dereferenceable assumptions, and once #156730 lands, it can be used to vectorize some early-exit loops, for example std::find with std::vector::iterator: https://godbolt.org/z/qo58PKG37 ``` #include <algorithm> #include <cstddef> #include <vector> auto find(std::vector<short>::iterator first, short s, unsigned size) { auto Addr = __builtin_assume_aligned(std::to_address(first), 2); __builtin_assume_dereferenceable(std::to_address(first), size * sizeof(short)); return std::find(first, first + size, s); } ``` PR: #156929
…ferenceable. (#156929) Update Clang's __builtin_assume_dereferenceable to support non-constant lengths. The corresponding assume bundle has been updated to support non-constant sizes in cad62df. The current docs for the builtin don't mention the constant requirement for the size argument, so don't need to be updated: https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume-dereferenceable A number of patches landed recently to make the optimizer make better use of the dereferenceable assumptions, and once llvm/llvm-project#156730 lands, it can be used to vectorize some early-exit loops, for example std::find with std::vector::iterator: https://godbolt.org/z/qo58PKG37 ``` #include <algorithm> #include <cstddef> #include <vector> auto find(std::vector<short>::iterator first, short s, unsigned size) { auto Addr = __builtin_assume_aligned(std::to_address(first), 2); __builtin_assume_dereferenceable(std::to_address(first), size * sizeof(short)); return std::find(first, first + size, s); } ``` PR: llvm/llvm-project#156929
Add extra test coverage for follow-up to #156730.
Add extra test coverage for follow-up to llvm/llvm-project#156730.
After llvm#156730, getLastArg(NoClaim) are now just below the inlining threshold and inlining them causes +0.44% code size increase for Clang and a corresponding compile-time increase without helping compile-time. Mark them as noinline to recover the original codesize. http://llvm-compile-time-tracker.com/compare.php?from=a271d07488a85ce677674bbe8101b10efff58c95&to=3b7b312476da2a2b1fd44815532edf9987da337e&stat=instructions:u
|
Sure, seems like this code-size increase is unnecessary: #157163 |
… A. (#157159) Generalize fold added in 74ec38f (#156730) to support multiplying and dividing by different constants, given they are both powers-of-2 and C1 is a multiple of C2, checked via logBase2. https://alive2.llvm.org/ce/z/eqJ2xj PR: #157159
… -> C1/C2 * A. (#157159) Generalize fold added in 74ec38f (llvm/llvm-project#156730) to support multiplying and dividing by different constants, given they are both powers-of-2 and C1 is a multiple of C2, checked via logBase2. https://alive2.llvm.org/ce/z/eqJ2xj PR: llvm/llvm-project#157159
After #156730, getLastArg(NoClaim) are now just below the inlining threshold and inlining them causes +0.44% code size increase for Clang and a corresponding compile-time increase without helping compile-time. Mark them as noinline to recover the original codesize. http://llvm-compile-time-tracker.com/compare.php?from=a271d07488a85ce677674bbe8101b10efff58c95&to=3b7b312476da2a2b1fd44815532edf9987da337e&stat=instructions:u PR: #157163
After llvm/llvm-project#156730, getLastArg(NoClaim) are now just below the inlining threshold and inlining them causes +0.44% code size increase for Clang and a corresponding compile-time increase without helping compile-time. Mark them as noinline to recover the original codesize. http://llvm-compile-time-tracker.com/compare.php?from=a271d07488a85ce677674bbe8101b10efff58c95&to=3b7b312476da2a2b1fd44815532edf9987da337e&stat=instructions:u PR: llvm/llvm-project#157163
…-2. (llvm#156730) Alive2 Proof: https://alive2.llvm.org/ce/z/JoHJE9 PR: llvm#156730 (cherry picked from commit 74ec38f)
Add extra test coverage for follow-up to llvm#156730. (cherry picked from commit 45a2214)
… A. (llvm#157159) Generalize fold added in 74ec38f (llvm#156730) to support multiplying and dividing by different constants, given they are both powers-of-2 and C1 is a multiple of C2, checked via logBase2. https://alive2.llvm.org/ce/z/eqJ2xj PR: llvm#157159 (cherry picked from commit a1afe66)
llvm#156929) Update Clang's __builtin_assume_dereferenceable to support non-constant lengths. The corresponding assume bundle has been updated to support non-constant sizes in cad62df. The current docs for the builtin don't mention the constant requirement for the size argument, so don't need to be updated: https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume-dereferenceable A number of patches landed recently to make the optimizer make better use of the dereferenceable assumptions, and once llvm#156730 lands, it can be used to vectorize some early-exit loops, for example std::find with std::vector::iterator: https://godbolt.org/z/qo58PKG37 ``` #include <algorithm> #include <cstddef> #include <vector> auto find(std::vector<short>::iterator first, short s, unsigned size) { auto Addr = __builtin_assume_aligned(std::to_address(first), 2); __builtin_assume_dereferenceable(std::to_address(first), size * sizeof(short)); return std::find(first, first + size, s); } ``` PR: llvm#156929 (cherry picked from commit c8d065b)
llvm#156929) Update Clang's __builtin_assume_dereferenceable to support non-constant lengths. The corresponding assume bundle has been updated to support non-constant sizes in cad62df. The current docs for the builtin don't mention the constant requirement for the size argument, so don't need to be updated: https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume-dereferenceable A number of patches landed recently to make the optimizer make better use of the dereferenceable assumptions, and once llvm#156730 lands, it can be used to vectorize some early-exit loops, for example std::find with std::vector::iterator: https://godbolt.org/z/qo58PKG37 ``` #include <algorithm> #include <cstddef> #include <vector> auto find(std::vector<short>::iterator first, short s, unsigned size) { auto Addr = __builtin_assume_aligned(std::to_address(first), 2); __builtin_assume_dereferenceable(std::to_address(first), size * sizeof(short)); return std::find(first, first + size, s); } ``` PR: llvm#156929 (cherry picked from commit c8d065b)
Alive2 Proof: https://alive2.llvm.org/ce/z/JoHJE9