Skip to content

Commit 7344f3d

Browse files
[LoopVectorize] Add strict in-order reduction support for fixed-width vectorization
Previously we could only vectorize FP reductions if fast math was enabled, as this allows us to reorder FP operations. However, it may still be beneficial to vectorize the loop by moving the reduction inside the vectorized loop and making sure that the scalar reduction value be an input to the horizontal reduction, e.g: %phi = phi float [ 0.0, %entry ], [ %reduction, %vector_body ] %load = load <8 x float> %reduction = call float @llvm.vector.reduce.fadd.v8f32(float %phi, <8 x float> %load) This patch adds a new flag (IsOrdered) to RecurrenceDescriptor and makes use of the changes added by D75069 as much as possible, which already teaches the vectorizer about in-loop reductions. For now in-order reduction support is off by default and controlled with the `-enable-strict-reductions` flag. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D98435
1 parent 88c2454 commit 7344f3d

File tree

6 files changed

+368
-10
lines changed

6 files changed

+368
-10
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ class RecurrenceDescriptor {
6969

7070
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
7171
FastMathFlags FMF, Instruction *ExactFP, Type *RT,
72-
bool Signed, SmallPtrSetImpl<Instruction *> &CI)
72+
bool Signed, bool Ordered,
73+
SmallPtrSetImpl<Instruction *> &CI)
7374
: StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
74-
ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed) {
75+
ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed),
76+
IsOrdered(Ordered) {
7577
CastInsts.insert(CI.begin(), CI.end());
7678
}
7779

@@ -228,6 +230,9 @@ class RecurrenceDescriptor {
228230
/// Returns true if all source operands of the recurrence are SExtInsts.
229231
bool isSigned() const { return IsSigned; }
230232

233+
/// Expose an ordered FP reduction to the instance users.
234+
bool isOrdered() const { return IsOrdered; }
235+
231236
/// Attempts to find a chain of operations from Phi to LoopExitInst that can
232237
/// be treated as a set of reductions instructions for in-loop reductions.
233238
SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
@@ -250,6 +255,10 @@ class RecurrenceDescriptor {
250255
Type *RecurrenceType = nullptr;
251256
// True if all source operands of the recurrence are SExtInsts.
252257
bool IsSigned = false;
258+
// True if this recurrence can be treated as an in-order reduction.
259+
// Currently only a non-reassociative FAdd can be considered in-order,
260+
// if it is also the only FAdd in the PHI's use chain.
261+
bool IsOrdered = false;
253262
// Instructions used for type-promoting the recurrence.
254263
SmallPtrSet<Instruction *, 8> CastInsts;
255264
};

llvm/include/llvm/Transforms/Utils/LoopUtils.h

+5
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,11 @@ Value *createSimpleTargetReduction(IRBuilderBase &B,
389389
Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
390390
RecurrenceDescriptor &Desc, Value *Src);
391391

392+
/// Create an ordered reduction intrinsic using the given recurrence
393+
/// descriptor \p Desc.
394+
Value *createOrderedReduction(IRBuilderBase &B, RecurrenceDescriptor &Desc,
395+
Value *Src, Value *Start);
396+
392397
/// Get the intersection (logical and) of all of the potential IR flags
393398
/// of each scalar operation (VL) that will be converted into a vector (I).
394399
/// If OpValue is non-null, we only consider operations similar to OpValue

llvm/lib/Analysis/IVDescriptors.cpp

+30-1
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,32 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
189189
}
190190
}
191191

192+
// Check if a given Phi node can be recognized as an ordered reduction for
193+
// vectorizing floating point operations without unsafe math.
194+
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
195+
Instruction *Exit, PHINode *Phi) {
196+
// Currently only FAdd is supported
197+
if (Kind != RecurKind::FAdd)
198+
return false;
199+
200+
bool IsOrdered =
201+
Exit->getOpcode() == Instruction::FAdd && Exit == ExactFPMathInst;
202+
203+
// The only pattern accepted is the one in which the reduction PHI
204+
// is used as one of the operands of the exit instruction
205+
auto *LHS = Exit->getOperand(0);
206+
auto *RHS = Exit->getOperand(1);
207+
IsOrdered &= ((LHS == Phi) || (RHS == Phi));
208+
209+
if (!IsOrdered)
210+
return false;
211+
212+
LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
213+
<< ", ExitInst: " << *Exit << "\n");
214+
215+
return true;
216+
}
217+
192218
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
193219
Loop *TheLoop, FastMathFlags FuncFMF,
194220
RecurrenceDescriptor &RedDes,
@@ -416,6 +442,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
416442
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
417443
return false;
418444

445+
const bool IsOrdered = checkOrderedReduction(
446+
Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
447+
419448
if (Start != Phi) {
420449
// If the starting value is not the same as the phi node, we speculatively
421450
// looked through an 'and' instruction when evaluating a potential
@@ -470,7 +499,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
470499
// Save the description of this reduction variable.
471500
RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
472501
ReduxDesc.getExactFPMathInst(), RecurrenceType,
473-
IsSigned, CastInsts);
502+
IsSigned, IsOrdered, CastInsts);
474503
RedDes = RD;
475504

476505
return true;

llvm/lib/Transforms/Utils/LoopUtils.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,17 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
10701070
return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
10711071
}
10721072

1073+
Value *llvm::createOrderedReduction(IRBuilderBase &B,
1074+
RecurrenceDescriptor &Desc, Value *Src,
1075+
Value *Start) {
1076+
auto Kind = Desc.getRecurrenceKind();
1077+
assert(Kind == RecurKind::FAdd && "Unexpected reduction kind");
1078+
assert(Src->getType()->isVectorTy() && "Expected a vector type");
1079+
assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
1080+
1081+
return B.CreateFAddReduce(Start, Src);
1082+
}
1083+
10731084
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
10741085
auto *VecOp = dyn_cast<Instruction>(I);
10751086
if (!VecOp)

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+31-7
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,11 @@ static cl::opt<bool>
330330
cl::desc("Prefer in-loop vector reductions, "
331331
"overriding the targets preference."));
332332

333+
cl::opt<bool> EnableStrictReductions(
334+
"enable-strict-reductions", cl::init(false), cl::Hidden,
335+
cl::desc("Enable the vectorisation of loops with in-order (strict) "
336+
"FP reductions"));
337+
333338
static cl::opt<bool> PreferPredicatedReductionSelect(
334339
"prefer-predicated-reduction-select", cl::init(false), cl::Hidden,
335340
cl::desc(
@@ -4259,6 +4264,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi,
42594264
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
42604265
}
42614266

4267+
static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc) {
4268+
return EnableStrictReductions && RdxDesc.isOrdered();
4269+
}
4270+
42624271
void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
42634272
// Get it's reduction variable descriptor.
42644273
assert(Legal->isReductionVariable(Phi) &&
@@ -4288,6 +4297,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
42884297
for (unsigned Part = 0; Part < UF; ++Part) {
42894298
Value *VecRdxPhi = State.get(State.Plan->getVPValue(Phi), Part);
42904299
Value *Val = State.get(State.Plan->getVPValue(LoopVal), Part);
4300+
if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc) &&
4301+
State.VF.isVector())
4302+
Val = State.get(State.Plan->getVPValue(LoopVal), UF - 1);
42914303
cast<PHINode>(VecRdxPhi)
42924304
->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
42934305
}
@@ -4379,7 +4391,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
43794391
// terminate on this line. This is the easiest way to ensure we don't
43804392
// accidentally cause an extra step back into the loop while debugging.
43814393
setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
4382-
{
4394+
if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc))
4395+
ReducedPartRdx = State.get(LoopExitInstDef, UF - 1);
4396+
else {
43834397
// Floating-point operations should have some FMF to enable the reduction.
43844398
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
43854399
Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
@@ -6078,7 +6092,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
60786092
if (!Legal->isReductionVariable(PN))
60796093
continue;
60806094
RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[PN];
6081-
if (PreferInLoopReductions ||
6095+
if (PreferInLoopReductions || useOrderedReductions(RdxDesc) ||
60826096
TTI.preferInLoopReduction(RdxDesc.getOpcode(),
60836097
RdxDesc.getRecurrenceType(),
60846098
TargetTransformInfo::ReductionFlags()))
@@ -7657,7 +7671,7 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
76577671
// If the target would prefer this reduction to happen "in-loop", then we
76587672
// want to record it as such.
76597673
unsigned Opcode = RdxDesc.getOpcode();
7660-
if (!PreferInLoopReductions &&
7674+
if (!PreferInLoopReductions && !useOrderedReductions(RdxDesc) &&
76617675
!TTI.preferInLoopReduction(Opcode, Phi->getType(),
76627676
TargetTransformInfo::ReductionFlags()))
76637677
continue;
@@ -9200,8 +9214,10 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
92009214

92019215
void VPReductionRecipe::execute(VPTransformState &State) {
92029216
assert(!State.Instance && "Reduction being replicated.");
9217+
Value *PrevInChain = State.get(getChainOp(), 0);
92039218
for (unsigned Part = 0; Part < State.UF; ++Part) {
92049219
RecurKind Kind = RdxDesc->getRecurrenceKind();
9220+
bool IsOrdered = useOrderedReductions(*RdxDesc);
92059221
Value *NewVecOp = State.get(getVecOp(), Part);
92069222
if (VPValue *Cond = getCondOp()) {
92079223
Value *NewCond = State.get(Cond, Part);
@@ -9213,15 +9229,23 @@ void VPReductionRecipe::execute(VPTransformState &State) {
92139229
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
92149230
NewVecOp = Select;
92159231
}
9216-
Value *NewRed =
9217-
createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
9218-
Value *PrevInChain = State.get(getChainOp(), Part);
9232+
Value *NewRed;
92199233
Value *NextInChain;
9234+
if (IsOrdered) {
9235+
NewRed = createOrderedReduction(State.Builder, *RdxDesc, NewVecOp,
9236+
PrevInChain);
9237+
PrevInChain = NewRed;
9238+
} else {
9239+
PrevInChain = State.get(getChainOp(), Part);
9240+
NewRed = createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
9241+
}
92209242
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
92219243
NextInChain =
92229244
createMinMaxOp(State.Builder, RdxDesc->getRecurrenceKind(),
92239245
NewRed, PrevInChain);
9224-
} else {
9246+
} else if (IsOrdered)
9247+
NextInChain = NewRed;
9248+
else {
92259249
NextInChain = State.Builder.CreateBinOp(
92269250
(Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
92279251
PrevInChain);

0 commit comments

Comments
 (0)