Skip to content

Commit 4d06b34

Browse files
authored
Merge pull request #11744 from fhahn/pick-scev-laa-loads-changes-for-early-exit
Pick changes for std::find vectorization rdar://160925334 rdar://158592232 rdar://159859974
2 parents 9959c2e + dec6e12 commit 4d06b34

File tree

82 files changed

+6264
-956
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+6264
-956
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ Non-comprehensive list of changes in this release
389389
this build without optimizations (i.e. use `-O0` or use the `optnone` function
390390
attribute) or use the `fno-sanitize-merge=` flag in optimized builds.
391391

392+
- ``__builtin_assume_dereferenceable`` now accepts non-constant size operands.
393+
392394
New Compiler Flags
393395
------------------
394396
- New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).

clang/include/clang/Basic/Builtins.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ def BuiltinAssumeAligned : Builtin {
854854
def BuiltinAssumeDereferenceable : Builtin {
855855
let Spellings = ["__builtin_assume_dereferenceable"];
856856
let Attributes = [NoThrow, Const];
857-
let Prototype = "void(void const*, _Constant size_t)";
857+
let Prototype = "void(void const*, size_t)";
858858
}
859859

860860
def BuiltinFree : Builtin {

clang/test/CodeGen/builtin-assume-dereferenceable.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,62 @@ int test2(int *a) {
3232
__builtin_assume_dereferenceable(a, 32ull);
3333
return a[0];
3434
}
35+
36+
// CHECK-LABEL: @test3(
37+
// CHECK-NEXT: entry:
38+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
39+
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
40+
// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8
41+
// CHECK-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4
42+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
43+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
44+
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP1]] to i64
45+
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP0]], i64 [[CONV]]) ]
46+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
47+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
48+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
49+
// CHECK-NEXT: ret i32 [[TMP3]]
50+
//
51+
int test3(int *a, int n) {
52+
__builtin_assume_dereferenceable(a, n);
53+
return a[0];
54+
}
55+
56+
// CHECK-LABEL: @test4(
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
59+
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
60+
// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8
61+
// CHECK-NEXT: store i64 [[N:%.*]], ptr [[N_ADDR]], align 8
62+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
63+
// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[N_ADDR]], align 8
64+
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP0]], i64 [[TMP1]]) ]
65+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
66+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
67+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
68+
// CHECK-NEXT: ret i32 [[TMP3]]
69+
//
70+
int test4(int *a, unsigned long long n) {
71+
__builtin_assume_dereferenceable(a, n);
72+
return a[0];
73+
}
74+
75+
// CHECK-LABEL: @test5(
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
78+
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca float, align 4
79+
// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8
80+
// CHECK-NEXT: store float [[N:%.*]], ptr [[N_ADDR]], align 4
81+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
82+
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[N_ADDR]], align 4
83+
// CHECK-NEXT: [[CONV:%.*]] = fptoui float [[TMP1]] to i64
84+
// CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP0]], i64 [[CONV]]) ]
85+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
86+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
87+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
88+
// CHECK-NEXT: ret i32 [[TMP3]]
89+
//
90+
int test5(int *a, float n) {
91+
__builtin_assume_dereferenceable(a, n);
92+
return a[0];
93+
}

clang/test/SemaCXX/builtin-assume-dereferenceable.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ int test3(int *a) {
1818
}
1919

2020
int test4(int *a, unsigned size) {
21-
a = __builtin_assume_dereferenceable(a, size); // expected-error {{argument to '__builtin_assume_dereferenceable' must be a constant integer}}
21+
__builtin_assume_dereferenceable(a, size);
2222
return a[0];
2323
}
2424

2525
int test5(int *a, unsigned long long size) {
26-
a = __builtin_assume_dereferenceable(a, size); // expected-error {{argument to '__builtin_assume_dereferenceable' must be a constant integer}}
26+
__builtin_assume_dereferenceable(a, size);
2727
return a[0];
2828
}
2929

@@ -53,3 +53,8 @@ constexpr void *l = __builtin_assume_dereferenceable(p, 4); // expected-error {{
5353
void *foo() {
5454
return l;
5555
}
56+
57+
int test10(int *a) {
58+
__builtin_assume_dereferenceable(a, a); // expected-error {{cannot initialize a parameter of type 'unsigned long' with an lvalue of type 'int *'}}
59+
return a[0];
60+
}

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,15 @@ class MemoryDepChecker {
180180
const SmallVectorImpl<Instruction *> &Instrs) const;
181181
};
182182

183-
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
183+
MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
184+
DominatorTree *DT, const Loop *L,
184185
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
185-
unsigned MaxTargetVectorWidthInBits)
186-
: PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
187-
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
186+
unsigned MaxTargetVectorWidthInBits,
187+
std::optional<ScalarEvolution::LoopGuards> &LoopGuards)
188+
: PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
189+
SymbolicStrides(SymbolicStrides),
190+
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits),
191+
LoopGuards(LoopGuards) {}
188192

189193
/// Register the location (instructions are given increasing numbers)
190194
/// of a write access.
@@ -236,8 +240,8 @@ class MemoryDepChecker {
236240

237241
/// In same cases when the dependency check fails we can still
238242
/// vectorize the loop with a dynamic array access check.
239-
bool shouldRetryWithRuntimeCheck() const {
240-
return FoundNonConstantDistanceDependence &&
243+
bool shouldRetryWithRuntimeChecks() const {
244+
return ShouldRetryWithRuntimeChecks &&
241245
Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
242246
}
243247

@@ -288,6 +292,15 @@ class MemoryDepChecker {
288292
return PointerBounds;
289293
}
290294

295+
DominatorTree *getDT() const {
296+
assert(DT && "requested DT, but it is not available");
297+
return DT;
298+
}
299+
AssumptionCache *getAC() const {
300+
assert(AC && "requested AC, but it is not available");
301+
return AC;
302+
}
303+
291304
private:
292305
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
293306
/// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -296,6 +309,10 @@ class MemoryDepChecker {
296309
/// example we might assume a unit stride for a pointer in order to prove
297310
/// that a memory access is strided and doesn't wrap.
298311
PredicatedScalarEvolution &PSE;
312+
313+
AssumptionCache *AC;
314+
DominatorTree *DT;
315+
299316
const Loop *InnermostLoop;
300317

301318
/// Reference to map of pointer values to
@@ -327,9 +344,9 @@ class MemoryDepChecker {
327344
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
328345
std::numeric_limits<uint64_t>::max();
329346

330-
/// If we see a non-constant dependence distance we can still try to
331-
/// vectorize this loop with runtime checks.
332-
bool FoundNonConstantDistanceDependence = false;
347+
/// Whether we should try to vectorize the loop with runtime checks, if the
348+
/// dependencies are not safe.
349+
bool ShouldRetryWithRuntimeChecks = false;
333350

334351
/// Result of the dependence checks, indicating whether the checked
335352
/// dependences are safe for vectorization, require RT checks or are known to
@@ -358,7 +375,7 @@ class MemoryDepChecker {
358375
PointerBounds;
359376

360377
/// Cache for the loop guards of InnermostLoop.
361-
std::optional<ScalarEvolution::LoopGuards> LoopGuards;
378+
std::optional<ScalarEvolution::LoopGuards> &LoopGuards;
362379

363380
/// Check whether there is a plausible dependence between the two
364381
/// accesses.
@@ -516,8 +533,9 @@ class RuntimePointerChecking {
516533
AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
517534
};
518535

519-
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
520-
: DC(DC), SE(SE) {}
536+
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE,
537+
std::optional<ScalarEvolution::LoopGuards> &LoopGuards)
538+
: DC(DC), SE(SE), LoopGuards(LoopGuards) {}
521539

522540
/// Reset the state of the pointer runtime information.
523541
void reset() {
@@ -631,6 +649,9 @@ class RuntimePointerChecking {
631649
/// Holds a pointer to the ScalarEvolution analysis.
632650
ScalarEvolution *SE;
633651

652+
/// Cache for the loop guards of the loop.
653+
std::optional<ScalarEvolution::LoopGuards> &LoopGuards;
654+
634655
/// Set of run-time checks required to establish independence of
635656
/// otherwise may-aliasing pointers in the loop.
636657
SmallVector<RuntimePointerCheck, 4> Checks;
@@ -670,7 +691,7 @@ class LoopAccessInfo {
670691
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
671692
const TargetTransformInfo *TTI,
672693
const TargetLibraryInfo *TLI, AAResults *AA,
673-
DominatorTree *DT, LoopInfo *LI,
694+
DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
674695
bool AllowPartial = false);
675696

676697
/// Return true we can analyze the memory accesses in the loop and there are
@@ -806,6 +827,9 @@ class LoopAccessInfo {
806827

807828
Loop *TheLoop;
808829

830+
/// Cache for the loop guards of TheLoop.
831+
std::optional<ScalarEvolution::LoopGuards> LoopGuards;
832+
809833
/// Determines whether we should generate partial runtime checks when not all
810834
/// memory accesses could be analyzed.
811835
bool AllowPartial;
@@ -922,7 +946,9 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
922946
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
923947
const SCEV *MaxBTC, ScalarEvolution *SE,
924948
DenseMap<std::pair<const SCEV *, Type *>,
925-
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
949+
std::pair<const SCEV *, const SCEV *>> *PointerBounds,
950+
DominatorTree *DT, AssumptionCache *AC,
951+
std::optional<ScalarEvolution::LoopGuards> &LoopGuards);
926952

927953
class LoopAccessInfoManager {
928954
/// The cache.
@@ -935,12 +961,13 @@ class LoopAccessInfoManager {
935961
LoopInfo &LI;
936962
TargetTransformInfo *TTI;
937963
const TargetLibraryInfo *TLI = nullptr;
964+
AssumptionCache *AC;
938965

939966
public:
940967
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
941968
LoopInfo &LI, TargetTransformInfo *TTI,
942-
const TargetLibraryInfo *TLI)
943-
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
969+
const TargetLibraryInfo *TLI, AssumptionCache *AC)
970+
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}
944971

945972
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
946973

llvm/include/llvm/Analysis/ScalarEvolution.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,10 +1000,14 @@ class ScalarEvolution {
10001000
/// (at every loop iteration). It is, at the same time, the minimum number
10011001
/// of times S is divisible by 2. For example, given {4,+,8} it returns 2.
10021002
/// If S is guaranteed to be 0, it returns the bitwidth of S.
1003-
LLVM_ABI uint32_t getMinTrailingZeros(const SCEV *S);
1003+
/// If \p CtxI is not nullptr, return a constant multiple valid at \p CtxI.
1004+
LLVM_ABI uint32_t getMinTrailingZeros(const SCEV *S,
1005+
const Instruction *CtxI = nullptr);
10041006

1005-
/// Returns the max constant multiple of S.
1006-
LLVM_ABI APInt getConstantMultiple(const SCEV *S);
1007+
/// Returns the max constant multiple of S. If \p CtxI is not nullptr, return
1008+
/// a constant multiple valid at \p CtxI.
1009+
LLVM_ABI APInt getConstantMultiple(const SCEV *S,
1010+
const Instruction *CtxI = nullptr);
10071011

10081012
// Returns the max constant multiple of S. If S is exactly 0, return 1.
10091013
LLVM_ABI APInt getNonZeroConstantMultiple(const SCEV *S);
@@ -1339,6 +1343,7 @@ class ScalarEvolution {
13391343

13401344
class LoopGuards {
13411345
DenseMap<const SCEV *, const SCEV *> RewriteMap;
1346+
SmallDenseSet<std::pair<const SCEV *, const SCEV *>> NotEqual;
13421347
bool PreserveNUW = false;
13431348
bool PreserveNSW = false;
13441349
ScalarEvolution &SE;
@@ -1525,8 +1530,10 @@ class ScalarEvolution {
15251530
/// Return the Value set from which the SCEV expr is generated.
15261531
ArrayRef<Value *> getSCEVValues(const SCEV *S);
15271532

1528-
/// Private helper method for the getConstantMultiple method.
1529-
APInt getConstantMultipleImpl(const SCEV *S);
1533+
/// Private helper method for the getConstantMultiple method. If \p CtxI is
1534+
/// not nullptr, return a constant multiple valid at \p CtxI.
1535+
APInt getConstantMultipleImpl(const SCEV *S,
1536+
const Instruction *Ctx = nullptr);
15301537

15311538
/// Information about the number of times a particular loop exit may be
15321539
/// reached before exiting the loop.
@@ -2310,10 +2317,6 @@ class ScalarEvolution {
23102317
/// an add rec on said loop.
23112318
void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed);
23122319

2313-
/// Try to match the pattern generated by getURemExpr(A, B). If successful,
2314-
/// Assign A and B to LHS and RHS, respectively.
2315-
LLVM_ABI bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
2316-
23172320
/// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
23182321
/// `UniqueSCEVs`. Return if found, else nullptr.
23192322
SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);

0 commit comments

Comments
 (0)