@@ -330,6 +330,11 @@ static cl::opt<bool>
330
330
cl::desc("Prefer in-loop vector reductions, "
331
331
"overriding the targets preference."));
332
332
333
+ cl::opt<bool> EnableStrictReductions(
334
+ "enable-strict-reductions", cl::init(false), cl::Hidden,
335
+ cl::desc("Enable the vectorisation of loops with in-order (strict) "
336
+ "FP reductions"));
337
+
333
338
static cl::opt<bool> PreferPredicatedReductionSelect(
334
339
"prefer-predicated-reduction-select", cl::init(false), cl::Hidden,
335
340
cl::desc(
@@ -4259,6 +4264,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi,
4259
4264
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
4260
4265
}
4261
4266
4267
+ static bool useOrderedReductions(RecurrenceDescriptor &RdxDesc) {
4268
+ return EnableStrictReductions && RdxDesc.isOrdered();
4269
+ }
4270
+
4262
4271
void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
4263
4272
// Get it's reduction variable descriptor.
4264
4273
assert(Legal->isReductionVariable(Phi) &&
@@ -4288,6 +4297,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
4288
4297
for (unsigned Part = 0; Part < UF; ++Part) {
4289
4298
Value *VecRdxPhi = State.get(State.Plan->getVPValue(Phi), Part);
4290
4299
Value *Val = State.get(State.Plan->getVPValue(LoopVal), Part);
4300
+ if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc) &&
4301
+ State.VF.isVector())
4302
+ Val = State.get(State.Plan->getVPValue(LoopVal), UF - 1);
4291
4303
cast<PHINode>(VecRdxPhi)
4292
4304
->addIncoming(Val, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
4293
4305
}
@@ -4379,7 +4391,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
4379
4391
// terminate on this line. This is the easiest way to ensure we don't
4380
4392
// accidentally cause an extra step back into the loop while debugging.
4381
4393
setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
4382
- {
4394
+ if (IsInLoopReductionPhi && useOrderedReductions(RdxDesc))
4395
+ ReducedPartRdx = State.get(LoopExitInstDef, UF - 1);
4396
+ else {
4383
4397
// Floating-point operations should have some FMF to enable the reduction.
4384
4398
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
4385
4399
Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
@@ -6078,7 +6092,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
6078
6092
if (!Legal->isReductionVariable(PN))
6079
6093
continue;
6080
6094
RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[PN];
6081
- if (PreferInLoopReductions ||
6095
+ if (PreferInLoopReductions || useOrderedReductions(RdxDesc) ||
6082
6096
TTI.preferInLoopReduction(RdxDesc.getOpcode(),
6083
6097
RdxDesc.getRecurrenceType(),
6084
6098
TargetTransformInfo::ReductionFlags()))
@@ -7657,7 +7671,7 @@ void LoopVectorizationCostModel::collectInLoopReductions() {
7657
7671
// If the target would prefer this reduction to happen "in-loop", then we
7658
7672
// want to record it as such.
7659
7673
unsigned Opcode = RdxDesc.getOpcode();
7660
- if (!PreferInLoopReductions &&
7674
+ if (!PreferInLoopReductions && !useOrderedReductions(RdxDesc) &&
7661
7675
!TTI.preferInLoopReduction(Opcode, Phi->getType(),
7662
7676
TargetTransformInfo::ReductionFlags()))
7663
7677
continue;
@@ -9200,8 +9214,10 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
9200
9214
9201
9215
void VPReductionRecipe::execute(VPTransformState &State) {
9202
9216
assert(!State.Instance && "Reduction being replicated.");
9217
+ Value *PrevInChain = State.get(getChainOp(), 0);
9203
9218
for (unsigned Part = 0; Part < State.UF; ++Part) {
9204
9219
RecurKind Kind = RdxDesc->getRecurrenceKind();
9220
+ bool IsOrdered = useOrderedReductions(*RdxDesc);
9205
9221
Value *NewVecOp = State.get(getVecOp(), Part);
9206
9222
if (VPValue *Cond = getCondOp()) {
9207
9223
Value *NewCond = State.get(Cond, Part);
@@ -9213,15 +9229,23 @@ void VPReductionRecipe::execute(VPTransformState &State) {
9213
9229
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
9214
9230
NewVecOp = Select;
9215
9231
}
9216
- Value *NewRed =
9217
- createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
9218
- Value *PrevInChain = State.get(getChainOp(), Part);
9232
+ Value *NewRed;
9219
9233
Value *NextInChain;
9234
+ if (IsOrdered) {
9235
+ NewRed = createOrderedReduction(State.Builder, *RdxDesc, NewVecOp,
9236
+ PrevInChain);
9237
+ PrevInChain = NewRed;
9238
+ } else {
9239
+ PrevInChain = State.get(getChainOp(), Part);
9240
+ NewRed = createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp);
9241
+ }
9220
9242
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
9221
9243
NextInChain =
9222
9244
createMinMaxOp(State.Builder, RdxDesc->getRecurrenceKind(),
9223
9245
NewRed, PrevInChain);
9224
- } else {
9246
+ } else if (IsOrdered)
9247
+ NextInChain = NewRed;
9248
+ else {
9225
9249
NextInChain = State.Builder.CreateBinOp(
9226
9250
(Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
9227
9251
PrevInChain);
0 commit comments