Skip to content

Commit cc96a82

Browse files
committed
[TableGen][SchedModels] Fix read/write variant substitution
Patch fixes case when sched class has write and read variants belonging to different processor models. Differential revision: https://reviews.llvm.org/D89777
1 parent ff2e24a commit cc96a82

File tree

3 files changed

+65
-32
lines changed

3 files changed

+65
-32
lines changed

llvm/lib/Target/ARM/ARMScheduleA57.td

+5-1
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,11 @@ def : ReadAdvance<ReadMUL, 0>;
270270
// from similar μops, allowing a typical sequence of multiply-accumulate μops
271271
// to issue one every 1 cycle (sched advance = 2).
272272
def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
273-
def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
273+
def A57WriteMLAL : SchedWriteVariant<[
274+
SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
275+
SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
276+
]>;
277+
274278
def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
275279

276280
def : InstRW<[A57WriteMLA],

llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s

+13-13
Original file line numberDiff line numberDiff line change
@@ -1421,9 +1421,9 @@
14211421
# CHECK-NEXT: 1 3 1.00 smladeq r2, r3, r5, r8
14221422
# CHECK-NEXT: 1 3 1.00 smladxhi r2, r3, r5, r8
14231423
# CHECK-NEXT: 2 4 2.00 smlal r2, r3, r5, r8
1424-
# CHECK-NEXT: 2 4 2.00 smlals r2, r3, r5, r8
1424+
# CHECK-NEXT: 4 5 2.00 smlals r2, r3, r5, r8
14251425
# CHECK-NEXT: 2 4 2.00 smlaleq r2, r3, r5, r8
1426-
# CHECK-NEXT: 2 4 2.00 smlalshi r2, r3, r5, r8
1426+
# CHECK-NEXT: 4 5 2.00 smlalshi r2, r3, r5, r8
14271427
# CHECK-NEXT: 2 4 2.00 smlalbb r3, r1, r9, r0
14281428
# CHECK-NEXT: 2 4 2.00 smlalbt r5, r6, r4, r1
14291429
# CHECK-NEXT: 2 4 2.00 smlaltb r4, r2, r3, r2
@@ -1634,12 +1634,12 @@
16341634
# CHECK-NEXT: 2 4 2.00 umaallt r3, r4, r5, r6
16351635
# CHECK-NEXT: 2 4 2.00 umlal r2, r4, r6, r8
16361636
# CHECK-NEXT: 2 4 2.00 umlalgt r6, r1, r2, r6
1637-
# CHECK-NEXT: 2 4 2.00 umlals r2, r9, r2, r3
1638-
# CHECK-NEXT: 2 4 2.00 umlalseq r3, r5, r1, r2
1637+
# CHECK-NEXT: 4 5 2.00 umlals r2, r9, r2, r3
1638+
# CHECK-NEXT: 4 5 2.00 umlalseq r3, r5, r1, r2
16391639
# CHECK-NEXT: 2 4 2.00 umull r2, r4, r6, r8
16401640
# CHECK-NEXT: 2 4 2.00 umullgt r6, r1, r2, r6
1641-
# CHECK-NEXT: 2 4 2.00 umulls r2, r9, r2, r3
1642-
# CHECK-NEXT: 2 4 2.00 umullseq r3, r5, r1, r2
1641+
# CHECK-NEXT: 4 5 2.00 umulls r2, r9, r2, r3
1642+
# CHECK-NEXT: 4 5 2.00 umullseq r3, r5, r1, r2
16431643
# CHECK-NEXT: 1 2 1.00 uqadd16 r1, r2, r3
16441644
# CHECK-NEXT: 1 2 1.00 uqadd16gt r4, r7, r9
16451645
# CHECK-NEXT: 1 2 1.00 uqadd8 r3, r4, r8
@@ -1719,7 +1719,7 @@
17191719

17201720
# CHECK: Resource pressure per iteration:
17211721
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6]
1722-
# CHECK-NEXT: 8.00 133.00 133.00 53.00 522.00 12.00 - -
1722+
# CHECK-NEXT: 8.00 139.00 139.00 53.00 522.00 12.00 - -
17231723

17241724
# CHECK: Resource pressure by instruction:
17251725
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions:
@@ -2285,9 +2285,9 @@
22852285
# CHECK-NEXT: - - - - 1.00 - - - smladeq r2, r3, r5, r8
22862286
# CHECK-NEXT: - - - - 1.00 - - - smladxhi r2, r3, r5, r8
22872287
# CHECK-NEXT: - - - - 2.00 - - - smlal r2, r3, r5, r8
2288-
# CHECK-NEXT: - - - - 2.00 - - - smlals r2, r3, r5, r8
2288+
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlals r2, r3, r5, r8
22892289
# CHECK-NEXT: - - - - 2.00 - - - smlaleq r2, r3, r5, r8
2290-
# CHECK-NEXT: - - - - 2.00 - - - smlalshi r2, r3, r5, r8
2290+
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlalshi r2, r3, r5, r8
22912291
# CHECK-NEXT: - - - - 2.00 - - - smlalbb r3, r1, r9, r0
22922292
# CHECK-NEXT: - - - - 2.00 - - - smlalbt r5, r6, r4, r1
22932293
# CHECK-NEXT: - - - - 2.00 - - - smlaltb r4, r2, r3, r2
@@ -2498,12 +2498,12 @@
24982498
# CHECK-NEXT: - - - - 2.00 - - - umaallt r3, r4, r5, r6
24992499
# CHECK-NEXT: - - - - 2.00 - - - umlal r2, r4, r6, r8
25002500
# CHECK-NEXT: - - - - 2.00 - - - umlalgt r6, r1, r2, r6
2501-
# CHECK-NEXT: - - - - 2.00 - - - umlals r2, r9, r2, r3
2502-
# CHECK-NEXT: - - - - 2.00 - - - umlalseq r3, r5, r1, r2
2501+
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlals r2, r9, r2, r3
2502+
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlalseq r3, r5, r1, r2
25032503
# CHECK-NEXT: - - - - 2.00 - - - umull r2, r4, r6, r8
25042504
# CHECK-NEXT: - - - - 2.00 - - - umullgt r6, r1, r2, r6
2505-
# CHECK-NEXT: - - - - 2.00 - - - umulls r2, r9, r2, r3
2506-
# CHECK-NEXT: - - - - 2.00 - - - umullseq r3, r5, r1, r2
2505+
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umulls r2, r9, r2, r3
2506+
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umullseq r3, r5, r1, r2
25072507
# CHECK-NEXT: - - - - 1.00 - - - uqadd16 r1, r2, r3
25082508
# CHECK-NEXT: - - - - 1.00 - - - uqadd16gt r4, r7, r9
25092509
# CHECK-NEXT: - - - - 1.00 - - - uqadd8 r3, r4, r8

llvm/utils/TableGen/CodeGenSchedule.cpp

+47-18
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,16 @@ struct PredTransition {
13151315
SmallVector<SmallVector<unsigned,4>, 16> WriteSequences;
13161316
SmallVector<SmallVector<unsigned,4>, 16> ReadSequences;
13171317
SmallVector<unsigned, 4> ProcIndices;
1318+
1319+
PredTransition() = default;
1320+
PredTransition(ArrayRef<PredCheck> PT) {
1321+
PredTerm.assign(PT.begin(), PT.end());
1322+
ProcIndices.assign(1, 0);
1323+
}
1324+
PredTransition(ArrayRef<PredCheck> PT, ArrayRef<unsigned> PIds) {
1325+
PredTerm.assign(PT.begin(), PT.end());
1326+
ProcIndices.assign(PIds.begin(), PIds.end());
1327+
}
13181328
};
13191329

13201330
// Encapsulate a set of partially constructed transitions.
@@ -1328,7 +1338,8 @@ class PredTransitions {
13281338
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
13291339

13301340
void substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
1331-
bool IsRead, unsigned StartIdx);
1341+
bool IsRead, bool IsForAnyCPU,
1342+
unsigned StartIdx);
13321343

13331344
void substituteVariants(const PredTransition &Trans);
13341345

@@ -1568,7 +1579,20 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
15681579
// starts. RWSeq must be applied to all transitions between StartIdx and the end
15691580
// of TransVec.
15701581
void PredTransitions::substituteVariantOperand(
1571-
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
1582+
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
1583+
unsigned StartIdx) {
1584+
1585+
auto CollectAndAddVariants = [&](unsigned TransIdx,
1586+
const CodeGenSchedRW &SchedRW) {
1587+
// Distribute this partial PredTransition across intersecting variants.
1588+
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
1589+
std::vector<TransVariant> IntersectingVariants;
1590+
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
1591+
// Now expand each variant on top of its copy of the transition.
1592+
for (const TransVariant &IV : IntersectingVariants)
1593+
pushVariant(IV, IsRead);
1594+
return !IntersectingVariants.empty();
1595+
};
15721596

15731597
// Visit each original RW within the current sequence.
15741598
for (SmallVectorImpl<unsigned>::const_iterator
@@ -1577,6 +1601,7 @@ void PredTransitions::substituteVariantOperand(
15771601
// Push this RW on all partial PredTransitions or distribute variants.
15781602
// New PredTransitions may be pushed within this loop which should not be
15791603
// revisited (TransEnd must be loop invariant).
1604+
bool HasAliases = false, WasPushed = false;
15801605
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
15811606
TransIdx != TransEnd; ++TransIdx) {
15821607
// In the common case, push RW onto the current operand's sequence.
@@ -1587,17 +1612,22 @@ void PredTransitions::substituteVariantOperand(
15871612
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
15881613
continue;
15891614
}
1590-
// Distribute this partial PredTransition across intersecting variants.
1591-
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
1592-
std::vector<TransVariant> IntersectingVariants;
1593-
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
1594-
// Now expand each variant on top of its copy of the transition.
1595-
for (std::vector<TransVariant>::const_iterator
1596-
IVI = IntersectingVariants.begin(),
1597-
IVE = IntersectingVariants.end();
1598-
IVI != IVE; ++IVI) {
1599-
pushVariant(*IVI, IsRead);
1600-
}
1615+
HasAliases = true;
1616+
WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
1617+
}
1618+
if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
1619+
// If we're here this means that in some sched class:
1620+
// a) We have read variant for CPU A
1621+
// b) We have write variant for CPU B
1622+
// b) We don't have write variant for CPU A
1623+
// d) We must expand all read/write variants (IsForAnyCPU is true)
1624+
// e) We couldn't expand SchedRW because TransVec doesn't have
1625+
// any transition with compatible CPU ID.
1626+
// In such case we create new empty transition with zero (AnyCPU)
1627+
// index.
1628+
TransVec.emplace_back(TransVec[StartIdx].PredTerm);
1629+
TransVec.back().ReadSequences.emplace_back();
1630+
CollectAndAddVariants(TransVec.size() - 1, SchedRW);
16011631
}
16021632
}
16031633
}
@@ -1612,10 +1642,9 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
16121642
// Build up a set of partial results starting at the back of
16131643
// PredTransitions. Remember the first new transition.
16141644
unsigned StartIdx = TransVec.size();
1615-
TransVec.emplace_back();
1616-
TransVec.back().PredTerm = Trans.PredTerm;
1617-
TransVec.back().ProcIndices = Trans.ProcIndices;
1645+
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
16181646

1647+
bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
16191648
// Visit each original write sequence.
16201649
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
16211650
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@@ -1625,7 +1654,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
16251654
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
16261655
I->WriteSequences.emplace_back();
16271656
}
1628-
substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
1657+
substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
16291658
}
16301659
// Visit each original read sequence.
16311660
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1636,7 +1665,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
16361665
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
16371666
I->ReadSequences.emplace_back();
16381667
}
1639-
substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
1668+
substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
16401669
}
16411670
}
16421671

0 commit comments

Comments
 (0)