@@ -641,10 +641,6 @@ class InnerLoopVectorizer {
641641 // / the block that was created for it.
642642 void sinkScalarOperands (Instruction *PredInst);
643643
644- // / Shrinks vector element sizes to the smallest bitwidth they can be legally
645- // / represented as.
646- void truncateToMinimalBitwidths (VPTransformState &State);
647-
648644 // / Returns (and creates if needed) the trip count of the widened loop.
649645 Value *getOrCreateVectorTripCount (BasicBlock *InsertBlock);
650646
@@ -3429,151 +3425,8 @@ static Type *largestIntegerVectorType(Type *T1, Type *T2) {
34293425 return I1->getBitWidth () > I2->getBitWidth () ? T1 : T2;
34303426}
34313427
3432- void InnerLoopVectorizer::truncateToMinimalBitwidths (VPTransformState &State) {
3433- // For every instruction `I` in MinBWs, truncate the operands, create a
3434- // truncated version of `I` and reextend its result. InstCombine runs
3435- // later and will remove any ext/trunc pairs.
3436- SmallPtrSet<Value *, 4 > Erased;
3437- for (const auto &KV : Cost->getMinimalBitwidths ()) {
3438- // If the value wasn't vectorized, we must maintain the original scalar
3439- // type. The absence of the value from State indicates that it
3440- // wasn't vectorized.
3441- // FIXME: Should not rely on getVPValue at this point.
3442- VPValue *Def = State.Plan ->getVPValue (KV.first , true );
3443- if (!State.hasAnyVectorValue (Def))
3444- continue ;
3445- // If the instruction is defined outside the loop, only update the first
3446- // part; the first part will be re-used for all other parts.
3447- unsigned UFToUse = OrigLoop->contains (KV.first ) ? UF : 1 ;
3448- for (unsigned Part = 0 ; Part < UFToUse; ++Part) {
3449- Value *I = State.get (Def, Part);
3450- if (Erased.count (I) || I->use_empty () || !isa<Instruction>(I))
3451- continue ;
3452- Type *OriginalTy = I->getType ();
3453- Type *ScalarTruncatedTy =
3454- IntegerType::get (OriginalTy->getContext (), KV.second );
3455- auto *TruncatedTy = VectorType::get (
3456- ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getElementCount ());
3457- if (TruncatedTy == OriginalTy)
3458- continue ;
3459-
3460- IRBuilder<> B (cast<Instruction>(I));
3461- auto ShrinkOperand = [&](Value *V) -> Value * {
3462- if (auto *ZI = dyn_cast<ZExtInst>(V))
3463- if (ZI->getSrcTy () == TruncatedTy)
3464- return ZI->getOperand (0 );
3465- return B.CreateZExtOrTrunc (V, TruncatedTy);
3466- };
3467-
3468- // The actual instruction modification depends on the instruction type,
3469- // unfortunately.
3470- Value *NewI = nullptr ;
3471- if (auto *BO = dyn_cast<BinaryOperator>(I)) {
3472- Value *Op0 = ShrinkOperand (BO->getOperand (0 ));
3473- Value *Op1 = ShrinkOperand (BO->getOperand (1 ));
3474- NewI = B.CreateBinOp (BO->getOpcode (), Op0, Op1);
3475-
3476- // Any wrapping introduced by shrinking this operation shouldn't be
3477- // considered undefined behavior. So, we can't unconditionally copy
3478- // arithmetic wrapping flags to NewI.
3479- cast<BinaryOperator>(NewI)->copyIRFlags (I, /* IncludeWrapFlags=*/ false );
3480- } else if (auto *CI = dyn_cast<ICmpInst>(I)) {
3481- Value *Op0 = ShrinkOperand (BO->getOperand (0 ));
3482- Value *Op1 = ShrinkOperand (BO->getOperand (1 ));
3483- NewI = B.CreateICmp (CI->getPredicate (), Op0, Op1);
3484- } else if (auto *SI = dyn_cast<SelectInst>(I)) {
3485- Value *TV = ShrinkOperand (SI->getTrueValue ());
3486- Value *FV = ShrinkOperand (SI->getFalseValue ());
3487- NewI = B.CreateSelect (SI->getCondition (), TV, FV);
3488- } else if (auto *CI = dyn_cast<CastInst>(I)) {
3489- switch (CI->getOpcode ()) {
3490- default :
3491- llvm_unreachable (" Unhandled cast!" );
3492- case Instruction::Trunc:
3493- NewI = ShrinkOperand (CI->getOperand (0 ));
3494- break ;
3495- case Instruction::SExt:
3496- NewI = B.CreateSExtOrTrunc (
3497- CI->getOperand (0 ),
3498- smallestIntegerVectorType (OriginalTy, TruncatedTy));
3499- break ;
3500- case Instruction::ZExt:
3501- NewI = B.CreateZExtOrTrunc (
3502- CI->getOperand (0 ),
3503- smallestIntegerVectorType (OriginalTy, TruncatedTy));
3504- break ;
3505- }
3506- } else if (auto *SI = dyn_cast<ShuffleVectorInst>(I)) {
3507- auto Elements0 =
3508- cast<VectorType>(SI->getOperand (0 )->getType ())->getElementCount ();
3509- auto *O0 = B.CreateZExtOrTrunc (
3510- SI->getOperand (0 ), VectorType::get (ScalarTruncatedTy, Elements0));
3511- auto Elements1 =
3512- cast<VectorType>(SI->getOperand (1 )->getType ())->getElementCount ();
3513- auto *O1 = B.CreateZExtOrTrunc (
3514- SI->getOperand (1 ), VectorType::get (ScalarTruncatedTy, Elements1));
3515-
3516- NewI = B.CreateShuffleVector (O0, O1, SI->getShuffleMask ());
3517- } else if (isa<LoadInst>(I) || isa<PHINode>(I)) {
3518- // Don't do anything with the operands, just extend the result.
3519- continue ;
3520- } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
3521- auto Elements =
3522- cast<VectorType>(IE->getOperand (0 )->getType ())->getElementCount ();
3523- auto *O0 = B.CreateZExtOrTrunc (
3524- IE->getOperand (0 ), VectorType::get (ScalarTruncatedTy, Elements));
3525- auto *O1 = B.CreateZExtOrTrunc (IE->getOperand (1 ), ScalarTruncatedTy);
3526- NewI = B.CreateInsertElement (O0, O1, IE->getOperand (2 ));
3527- } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
3528- auto Elements =
3529- cast<VectorType>(EE->getOperand (0 )->getType ())->getElementCount ();
3530- auto *O0 = B.CreateZExtOrTrunc (
3531- EE->getOperand (0 ), VectorType::get (ScalarTruncatedTy, Elements));
3532- NewI = B.CreateExtractElement (O0, EE->getOperand (2 ));
3533- } else {
3534- // If we don't know what to do, be conservative and don't do anything.
3535- continue ;
3536- }
3537-
3538- // Lastly, extend the result.
3539- NewI->takeName (cast<Instruction>(I));
3540- Value *Res = B.CreateZExtOrTrunc (NewI, OriginalTy);
3541- I->replaceAllUsesWith (Res);
3542- cast<Instruction>(I)->eraseFromParent ();
3543- Erased.insert (I);
3544- State.reset (Def, Res, Part);
3545- }
3546- }
3547-
3548- // We'll have created a bunch of ZExts that are now parentless. Clean up.
3549- for (const auto &KV : Cost->getMinimalBitwidths ()) {
3550- // If the value wasn't vectorized, we must maintain the original scalar
3551- // type. The absence of the value from State indicates that it
3552- // wasn't vectorized.
3553- // FIXME: Should not rely on getVPValue at this point.
3554- VPValue *Def = State.Plan ->getVPValue (KV.first , true );
3555- if (!State.hasAnyVectorValue (Def))
3556- continue ;
3557- unsigned UFToUse = OrigLoop->contains (KV.first ) ? UF : 1 ;
3558- for (unsigned Part = 0 ; Part < UFToUse; ++Part) {
3559- Value *I = State.get (Def, Part);
3560- ZExtInst *Inst = dyn_cast<ZExtInst>(I);
3561- if (Inst && Inst->use_empty ()) {
3562- Value *NewI = Inst->getOperand (0 );
3563- Inst->eraseFromParent ();
3564- State.reset (Def, NewI, Part);
3565- }
3566- }
3567- }
3568- }
3569-
35703428void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State,
35713429 VPlan &Plan) {
3572- // Insert truncates and extends for any truncated instructions as hints to
3573- // InstCombine.
3574- if (VF.isVector ())
3575- truncateToMinimalBitwidths (State);
3576-
35773430 // Fix widened non-induction PHIs by setting up the PHI operands.
35783431 if (EnableVPlanNativePath)
35793432 fixNonInductionPHIs (Plan, State);
@@ -8741,6 +8594,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
87418594 VFRange SubRange = {VF, MaxVFTimes2};
87428595 if (auto Plan = tryToBuildVPlanWithVPRecipes (SubRange)) {
87438596 // Now optimize the initial VPlan.
8597+ if (!Plan->hasVF (ElementCount::getFixed (1 )))
8598+ VPlanTransforms::truncateToMinimalBitwidths (
8599+ *Plan, CM.getMinimalBitwidths (), PSE.getSE ()->getContext ());
87448600 VPlanTransforms::optimize (*Plan, *PSE.getSE ());
87458601 assert (VPlanVerifier::verifyPlanIsValid (*Plan) && " VPlan is invalid" );
87468602 VPlans.push_back (std::move (Plan));
0 commit comments