@@ -3489,6 +3489,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3489
3489
return lowerRotate (MI);
3490
3490
case G_ISNAN:
3491
3491
return lowerIsNaN (MI);
3492
+ GISEL_VECREDUCE_CASES_NONSEQ
3493
+ return lowerVectorReduction (MI);
3492
3494
}
3493
3495
}
3494
3496
@@ -4637,35 +4639,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4637
4639
return Legalized;
4638
4640
}
4639
4641
4640
- LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions (
4641
- MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4642
- unsigned Opc = MI.getOpcode ();
4643
- assert (Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4644
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4645
- " Sequential reductions not expected" );
4646
-
4647
- if (TypeIdx != 1 )
4648
- return UnableToLegalize;
4649
-
4650
- // The semantics of the normal non-sequential reductions allow us to freely
4651
- // re-associate the operation.
4652
- Register SrcReg = MI.getOperand (1 ).getReg ();
4653
- LLT SrcTy = MRI.getType (SrcReg);
4654
- Register DstReg = MI.getOperand (0 ).getReg ();
4655
- LLT DstTy = MRI.getType (DstReg);
4656
-
4657
- if (SrcTy.getNumElements () % NarrowTy.getNumElements () != 0 )
4658
- return UnableToLegalize;
4659
-
4660
- SmallVector<Register> SplitSrcs;
4661
- const unsigned NumParts = SrcTy.getNumElements () / NarrowTy.getNumElements ();
4662
- extractParts (SrcReg, NarrowTy, NumParts, SplitSrcs);
4663
- SmallVector<Register> PartialReductions;
4664
- for (unsigned Part = 0 ; Part < NumParts; ++Part) {
4665
- PartialReductions.push_back (
4666
- MIRBuilder.buildInstr (Opc, {DstTy}, {SplitSrcs[Part]}).getReg (0 ));
4667
- }
4668
-
4642
+ static unsigned getScalarOpcForReduction (unsigned Opc) {
4669
4643
unsigned ScalarOpc;
4670
4644
switch (Opc) {
4671
4645
case TargetOpcode::G_VECREDUCE_FADD:
@@ -4708,10 +4682,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4708
4682
ScalarOpc = TargetOpcode::G_UMIN;
4709
4683
break ;
4710
4684
default :
4711
- LLVM_DEBUG (dbgs () << " Can't legalize: unknown reduction kind.\n " );
4685
+ llvm_unreachable (" Unhandled reduction" );
4686
+ }
4687
+ return ScalarOpc;
4688
+ }
4689
+
4690
+ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions (
4691
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4692
+ unsigned Opc = MI.getOpcode ();
4693
+ assert (Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4694
+ Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4695
+ " Sequential reductions not expected" );
4696
+
4697
+ if (TypeIdx != 1 )
4712
4698
return UnableToLegalize;
4699
+
4700
+ // The semantics of the normal non-sequential reductions allow us to freely
4701
+ // re-associate the operation.
4702
+ Register SrcReg = MI.getOperand (1 ).getReg ();
4703
+ LLT SrcTy = MRI.getType (SrcReg);
4704
+ Register DstReg = MI.getOperand (0 ).getReg ();
4705
+ LLT DstTy = MRI.getType (DstReg);
4706
+
4707
+ if (NarrowTy.isVector () &&
4708
+ (SrcTy.getNumElements () % NarrowTy.getNumElements () != 0 ))
4709
+ return UnableToLegalize;
4710
+
4711
+ unsigned ScalarOpc = getScalarOpcForReduction (Opc);
4712
+ SmallVector<Register> SplitSrcs;
4713
+ // If NarrowTy is a scalar then we're being asked to scalarize.
4714
+ const unsigned NumParts =
4715
+ NarrowTy.isVector () ? SrcTy.getNumElements () / NarrowTy.getNumElements ()
4716
+ : SrcTy.getNumElements ();
4717
+
4718
+ extractParts (SrcReg, NarrowTy, NumParts, SplitSrcs);
4719
+ if (NarrowTy.isScalar ()) {
4720
+ if (DstTy != NarrowTy)
4721
+ return UnableToLegalize; // FIXME: handle implicit extensions.
4722
+
4723
+ if (isPowerOf2_32 (NumParts)) {
4724
+ // Generate a tree of scalar operations to reduce the critical path.
4725
+ SmallVector<Register> PartialResults;
4726
+ unsigned NumPartsLeft = NumParts;
4727
+ while (NumPartsLeft > 1 ) {
4728
+ for (unsigned Idx = 0 ; Idx < NumPartsLeft - 1 ; Idx += 2 ) {
4729
+ PartialResults.emplace_back (
4730
+ MIRBuilder
4731
+ .buildInstr (ScalarOpc, {NarrowTy},
4732
+ {SplitSrcs[Idx], SplitSrcs[Idx + 1 ]})
4733
+ .getReg (0 ));
4734
+ }
4735
+ SplitSrcs = PartialResults;
4736
+ PartialResults.clear ();
4737
+ NumPartsLeft = SplitSrcs.size ();
4738
+ }
4739
+ assert (SplitSrcs.size () == 1 );
4740
+ MIRBuilder.buildCopy (DstReg, SplitSrcs[0 ]);
4741
+ MI.eraseFromParent ();
4742
+ return Legalized;
4743
+ }
4744
+ // If we can't generate a tree, then just do sequential operations.
4745
+ Register Acc = SplitSrcs[0 ];
4746
+ for (unsigned Idx = 1 ; Idx < NumParts; ++Idx)
4747
+ Acc = MIRBuilder.buildInstr (ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4748
+ .getReg (0 );
4749
+ MIRBuilder.buildCopy (DstReg, Acc);
4750
+ MI.eraseFromParent ();
4751
+ return Legalized;
4752
+ }
4753
+ SmallVector<Register> PartialReductions;
4754
+ for (unsigned Part = 0 ; Part < NumParts; ++Part) {
4755
+ PartialReductions.push_back (
4756
+ MIRBuilder.buildInstr (Opc, {DstTy}, {SplitSrcs[Part]}).getReg (0 ));
4713
4757
}
4714
4758
4759
+
4715
4760
// If the types involved are powers of 2, we can generate intermediate vector
4716
4761
// ops, before generating a final reduction operation.
4717
4762
if (isPowerOf2_32 (SrcTy.getNumElements ()) &&
@@ -7389,3 +7434,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerIsNaN(MachineInstr &MI) {
7389
7434
MI.eraseFromParent ();
7390
7435
return Legalized;
7391
7436
}
7437
+
7438
+ LegalizerHelper::LegalizeResult
7439
+ LegalizerHelper::lowerVectorReduction (MachineInstr &MI) {
7440
+ Register SrcReg = MI.getOperand (1 ).getReg ();
7441
+ LLT SrcTy = MRI.getType (SrcReg);
7442
+ LLT DstTy = MRI.getType (SrcReg);
7443
+
7444
+ // The source could be a scalar if the IR type was <1 x sN>.
7445
+ if (SrcTy.isScalar ()) {
7446
+ if (DstTy.getSizeInBits () > SrcTy.getSizeInBits ())
7447
+ return UnableToLegalize; // FIXME: handle extension.
7448
+ // This can be just a plain copy.
7449
+ Observer.changingInstr (MI);
7450
+ MI.setDesc (MIRBuilder.getTII ().get (TargetOpcode::COPY));
7451
+ Observer.changedInstr (MI);
7452
+ return Legalized;
7453
+ }
7454
+ return UnableToLegalize;;
7455
+ }
0 commit comments