Skip to content

Commit f1b47ad

Browse files
committed
Revert "[Analysis]Add getPointersDiff function to improve compile time."
This reverts commit 065a14a to investigate and fix crash in SLP vectorizer.
1 parent 2d72b67 commit f1b47ad

File tree

4 files changed

+140
-161
lines changed

4 files changed

+140
-161
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -679,15 +679,6 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
679679
const ValueToValueMap &StridesMap = ValueToValueMap(),
680680
bool Assume = false, bool ShouldCheckWrap = true);
681681

682-
/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
683-
/// compatible and it is possible to calculate the distance between them. This
684-
/// is a simple API that does not depend on the analysis pass.
685-
/// \param StrictCheck Ensure that the calculated distance matches the
686-
/// type-based one after all the bitcasts removal in the provided pointers.
687-
Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
688-
ScalarEvolution &SE, bool StrictCheck = false,
689-
bool CheckType = true);
690-
691682
/// Attempt to sort the pointers in \p VL and return the sorted indices
692683
/// in \p SortedIndices, if reordering is required.
693684
///

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 107 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,123 +1124,139 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
11241124
return Stride;
11251125
}
11261126

1127-
Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
1128-
const DataLayout &DL, ScalarEvolution &SE,
1129-
bool StrictCheck, bool CheckType) {
1130-
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1131-
// Make sure that A and B are different pointers.
1132-
if (PtrA == PtrB)
1133-
return 0;
1134-
1135-
// Make sure that PtrA and PtrB have the same type if required
1136-
if (CheckType && PtrA->getType() != PtrB->getType())
1137-
return None;
1138-
1139-
unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1140-
unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1141-
1142-
// Check that the address spaces match.
1143-
if (ASA != ASB)
1144-
return None;
1145-
unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1146-
1147-
APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1148-
Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1149-
Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1150-
1151-
int Val;
1152-
if (PtrA1 == PtrB1) {
1153-
// Retrieve the address space again as pointer stripping now tracks through
1154-
// `addrspacecast`.
1155-
ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1156-
ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1157-
// Check that the address spaces match and that the pointers are valid.
1158-
if (ASA != ASB)
1159-
return None;
1160-
1161-
IdxWidth = DL.getIndexSizeInBits(ASA);
1162-
OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1163-
OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1164-
1165-
OffsetB -= OffsetA;
1166-
Val = OffsetB.getSExtValue();
1167-
} else {
1168-
// Otherwise compute the distance with SCEV between the base pointers.
1169-
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1170-
const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1171-
const auto *Diff =
1172-
dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
1173-
if (!Diff)
1174-
return None;
1175-
Val = Diff->getAPInt().getSExtValue();
1176-
}
1177-
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1178-
int Size = DL.getTypeStoreSize(Ty);
1179-
int Dist = Val / Size;
1180-
1181-
// Ensure that the calculated distance matches the type-based one after all
1182-
// the bitcasts removal in the provided pointers.
1183-
if (!StrictCheck || Dist * Size == Val)
1184-
return Dist;
1185-
return None;
1186-
}
1187-
11881127
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
11891128
ScalarEvolution &SE,
11901129
SmallVectorImpl<unsigned> &SortedIndices) {
11911130
assert(llvm::all_of(
11921131
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
11931132
"Expected list of pointer operands.");
1133+
SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
1134+
OffValPairs.reserve(VL.size());
1135+
11941136
// Walk over the pointers, and map each of them to an offset relative to
11951137
// first pointer in the array.
11961138
Value *Ptr0 = VL[0];
1139+
const SCEV *Scev0 = SE.getSCEV(Ptr0);
1140+
Value *Obj0 = getUnderlyingObject(Ptr0);
1141+
1142+
llvm::SmallSet<int64_t, 4> Offsets;
1143+
for (auto *Ptr : VL) {
1144+
// TODO: Outline this code as a special, more time consuming, version of
1145+
// computeConstantDifference() function.
1146+
if (Ptr->getType()->getPointerAddressSpace() !=
1147+
Ptr0->getType()->getPointerAddressSpace())
1148+
return false;
1149+
// If a pointer refers to a different underlying object, bail - the
1150+
// pointers are by definition incomparable.
1151+
Value *CurrObj = getUnderlyingObject(Ptr);
1152+
if (CurrObj != Obj0)
1153+
return false;
11971154

1198-
using DistOrdPair = std::pair<int64_t, int>;
1199-
auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
1200-
return L.first < R.first;
1201-
};
1202-
std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1203-
Offsets.emplace(0, 0);
1204-
int Cnt = 1;
1205-
bool IsConsecutive = true;
1206-
for (auto *Ptr : VL.drop_front()) {
1207-
Optional<int> Diff = getPointersDiff(Ptr0, Ptr, DL, SE);
1155+
const SCEV *Scev = SE.getSCEV(Ptr);
1156+
const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1157+
// The pointers may not have a constant offset from each other, or SCEV
1158+
// may just not be smart enough to figure out they do. Regardless,
1159+
// there's nothing we can do.
12081160
if (!Diff)
12091161
return false;
12101162

12111163
// Check if the pointer with the same offset is found.
1212-
int64_t Offset = *Diff;
1213-
auto Res = Offsets.emplace(Offset, Cnt);
1214-
if (!Res.second)
1164+
int64_t Offset = Diff->getAPInt().getSExtValue();
1165+
if (!Offsets.insert(Offset).second)
12151166
return false;
1216-
// Consecutive order if the inserted element is the last one.
1217-
IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1218-
++Cnt;
1167+
OffValPairs.emplace_back(Offset, Ptr);
12191168
}
12201169
SortedIndices.clear();
1221-
if (!IsConsecutive) {
1222-
// Fill SortedIndices array only if it is non-consecutive.
1223-
SortedIndices.resize(VL.size());
1224-
Cnt = 0;
1225-
for (const std::pair<int64_t, int> &Pair : Offsets) {
1226-
IsConsecutive = IsConsecutive && Cnt == Pair.second;
1227-
SortedIndices[Cnt] = Pair.second;
1228-
++Cnt;
1229-
}
1230-
}
1170+
SortedIndices.resize(VL.size());
1171+
std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1172+
1173+
// Sort the memory accesses and keep the order of their uses in UseOrder.
1174+
llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
1175+
return OffValPairs[Left].first < OffValPairs[Right].first;
1176+
});
1177+
1178+
// Check if the order is consecutive already.
1179+
if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1180+
return I == SortedIndices[I];
1181+
}))
1182+
SortedIndices.clear();
1183+
12311184
return true;
12321185
}
12331186

1187+
/// Take the address space operand from the Load/Store instruction.
1188+
/// Returns -1 if this is not a valid Load/Store instruction.
1189+
static unsigned getAddressSpaceOperand(Value *I) {
1190+
if (LoadInst *L = dyn_cast<LoadInst>(I))
1191+
return L->getPointerAddressSpace();
1192+
if (StoreInst *S = dyn_cast<StoreInst>(I))
1193+
return S->getPointerAddressSpace();
1194+
return -1;
1195+
}
1196+
12341197
/// Returns true if the memory operations \p A and \p B are consecutive.
12351198
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
12361199
ScalarEvolution &SE, bool CheckType) {
12371200
Value *PtrA = getLoadStorePointerOperand(A);
12381201
Value *PtrB = getLoadStorePointerOperand(B);
1239-
if (!PtrA || !PtrB)
1202+
unsigned ASA = getAddressSpaceOperand(A);
1203+
unsigned ASB = getAddressSpaceOperand(B);
1204+
1205+
// Check that the address spaces match and that the pointers are valid.
1206+
if (!PtrA || !PtrB || (ASA != ASB))
1207+
return false;
1208+
1209+
// Make sure that A and B are different pointers.
1210+
if (PtrA == PtrB)
1211+
return false;
1212+
1213+
// Make sure that A and B have the same type if required.
1214+
if (CheckType && PtrA->getType() != PtrB->getType())
12401215
return false;
1241-
Optional<int> Diff =
1242-
getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
1243-
return Diff && *Diff == 1;
1216+
1217+
unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1218+
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1219+
1220+
APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1221+
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1222+
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1223+
1224+
// Retrieve the address space again as pointer stripping now tracks through
1225+
// `addrspacecast`.
1226+
ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
1227+
ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
1228+
// Check that the address spaces match and that the pointers are valid.
1229+
if (ASA != ASB)
1230+
return false;
1231+
1232+
IdxWidth = DL.getIndexSizeInBits(ASA);
1233+
OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1234+
OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1235+
1236+
APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1237+
1238+
// OffsetDelta = OffsetB - OffsetA;
1239+
const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1240+
const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1241+
const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1242+
const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
1243+
1244+
// Check if they are based on the same pointer. That makes the offsets
1245+
// sufficient.
1246+
if (PtrA == PtrB)
1247+
return OffsetDelta == Size;
1248+
1249+
// Compute the necessary base pointer delta to have the necessary final delta
1250+
// equal to the size.
1251+
// BaseDelta = Size - OffsetDelta;
1252+
const SCEV *SizeSCEV = SE.getConstant(Size);
1253+
const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1254+
1255+
// Otherwise compute the distance with SCEV between the base pointers.
1256+
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1257+
const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1258+
const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1259+
return X == PtrSCEVB;
12441260
}
12451261

12461262
MemoryDepChecker::VectorizationSafetyStatus

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 23 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -941,16 +941,10 @@ class BoUpSLP {
941941
ScalarEvolution &SE) {
942942
auto *LI1 = dyn_cast<LoadInst>(V1);
943943
auto *LI2 = dyn_cast<LoadInst>(V2);
944-
if (LI1 && LI2) {
945-
if (LI1->getParent() != LI2->getParent())
946-
return VLOperands::ScoreFail;
947-
948-
Optional<int> Dist =
949-
getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(),
950-
DL, SE, /*StrictCheck=*/true);
951-
return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
952-
: VLOperands::ScoreFail;
953-
}
944+
if (LI1 && LI2)
945+
return isConsecutiveAccess(LI1, LI2, DL, SE)
946+
? VLOperands::ScoreConsecutiveLoads
947+
: VLOperands::ScoreFail;
954948

955949
auto *C1 = dyn_cast<Constant>(V1);
956950
auto *C2 = dyn_cast<Constant>(V2);
@@ -2877,9 +2871,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
28772871
Ptr0 = PointerOps[CurrentOrder.front()];
28782872
PtrN = PointerOps[CurrentOrder.back()];
28792873
}
2880-
Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
2874+
const SCEV *Scev0 = SE->getSCEV(Ptr0);
2875+
const SCEV *ScevN = SE->getSCEV(PtrN);
2876+
const auto *Diff =
2877+
dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
2878+
uint64_t Size = DL->getTypeAllocSize(ScalarTy);
28812879
// Check that the sorted loads are consecutive.
2882-
if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
2880+
if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
28832881
if (CurrentOrder.empty()) {
28842882
// Original loads are consecutive and does not require reordering.
28852883
++NumOpsWantToKeepOriginalOrder;
@@ -3152,9 +3150,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
31523150
Ptr0 = PointerOps[CurrentOrder.front()];
31533151
PtrN = PointerOps[CurrentOrder.back()];
31543152
}
3155-
Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
3153+
const SCEV *Scev0 = SE->getSCEV(Ptr0);
3154+
const SCEV *ScevN = SE->getSCEV(PtrN);
3155+
const auto *Diff =
3156+
dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
3157+
uint64_t Size = DL->getTypeAllocSize(ScalarTy);
31563158
// Check that the sorted pointer operands are consecutive.
3157-
if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
3159+
if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
31583160
if (CurrentOrder.empty()) {
31593161
// Original stores are consecutive and does not require reordering.
31603162
++NumOpsWantToKeepOriginalOrder;
@@ -6105,41 +6107,20 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
61056107

61066108
int E = Stores.size();
61076109
SmallBitVector Tails(E, false);
6110+
SmallVector<int, 16> ConsecutiveChain(E, E + 1);
61086111
int MaxIter = MaxStoreLookup.getValue();
6109-
SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
6110-
E, std::make_pair(E, INT_MAX));
6111-
SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
61126112
int IterCnt;
61136113
auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
6114-
&CheckedPairs,
61156114
&ConsecutiveChain](int K, int Idx) {
61166115
if (IterCnt >= MaxIter)
61176116
return true;
6118-
if (CheckedPairs[Idx].test(K))
6119-
return ConsecutiveChain[K].second == 1 &&
6120-
ConsecutiveChain[K].first == Idx;
61216117
++IterCnt;
6122-
CheckedPairs[Idx].set(K);
6123-
CheckedPairs[K].set(Idx);
6124-
Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
6125-
Stores[Idx]->getPointerOperand(), *DL,
6126-
*SE, /*StrictCheck=*/true);
6127-
if (!Diff || *Diff == 0)
6128-
return false;
6129-
int Val = *Diff;
6130-
if (Val < 0) {
6131-
if (ConsecutiveChain[Idx].second > -Val) {
6132-
Tails.set(K);
6133-
ConsecutiveChain[Idx] = std::make_pair(K, -Val);
6134-
}
6135-
return false;
6136-
}
6137-
if (ConsecutiveChain[K].second <= Val)
6118+
if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
61386119
return false;
61396120

61406121
Tails.set(Idx);
6141-
ConsecutiveChain[K] = std::make_pair(Idx, Val);
6142-
return Val == 1;
6122+
ConsecutiveChain[K] = Idx;
6123+
return true;
61436124
};
61446125
// Do a quadratic search on all of the given stores in reverse order and find
61456126
// all of the pairs of stores that follow each other.
@@ -6159,28 +6140,16 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
61596140
// For stores that start but don't end a link in the chain:
61606141
for (int Cnt = E; Cnt > 0; --Cnt) {
61616142
int I = Cnt - 1;
6162-
if (ConsecutiveChain[I].first == E || Tails.test(I))
6143+
if (ConsecutiveChain[I] == E + 1 || Tails.test(I))
61636144
continue;
61646145
// We found a store instr that starts a chain. Now follow the chain and try
61656146
// to vectorize it.
61666147
BoUpSLP::ValueList Operands;
61676148
// Collect the chain into a list.
6168-
while (I != E && !VectorizedStores.count(Stores[I])) {
6149+
while (I != E + 1 && !VectorizedStores.count(Stores[I])) {
61696150
Operands.push_back(Stores[I]);
6170-
Tails.set(I);
6171-
if (ConsecutiveChain[I].second != 1) {
6172-
// Mark the new end in the chain and go back, if required. It might be
6173-
// required if the original stores comes in reversed order, for example.
6174-
if (ConsecutiveChain[I].first != E &&
6175-
Tails.test(ConsecutiveChain[I].first)) {
6176-
Tails.reset(ConsecutiveChain[I].first);
6177-
if (Cnt < ConsecutiveChain[I].first + 2)
6178-
Cnt = ConsecutiveChain[I].first + 2;
6179-
}
6180-
break;
6181-
}
61826151
// Move to the next value in the chain.
6183-
I = ConsecutiveChain[I].first;
6152+
I = ConsecutiveChain[I];
61846153
}
61856154

61866155
unsigned MaxVecRegSize = R.getMaxVecRegSize();

0 commit comments

Comments
 (0)