Skip to content

Commit 993eaf2

Browse files
committed
Recommit [TableGen][SchedModels] Fix read/write variant substitution
Original commit rG112b3cb6ba49 introduced non-determinism in subtarget generator due to iteration over DenseMap. New patch fixes this changing ProcModelMapTy from DenseMap to std::map.
1 parent c55d9af commit 993eaf2

File tree

4 files changed

+83
-77
lines changed

4 files changed

+83
-77
lines changed

llvm/lib/Target/ARM/ARMScheduleA57.td

+1-6
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,6 @@ class A57BranchForm<SchedWriteRes non_br> :
183183
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
184184
// Why more complex instruction uses more simple pipeline?
185185
// May be an error in doc.
186-
def A57WriteALUsi : SchedWriteVariant<[
187-
// lsl #2, lsl #1, or lsr #1.
188-
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
189-
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
190-
]>;
191186
def A57WriteALUsr : SchedWriteVariant<[
192187
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
193188
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
@@ -200,7 +195,7 @@ def A57ReadALUsr : SchedReadVariant<[
200195
SchedVar<IsPredicatedPred, [ReadDefault]>,
201196
SchedVar<NoSchedPred, [ReadDefault]>
202197
]>;
203-
def : SchedAlias<WriteALUsi, A57WriteALUsi>;
198+
def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
204199
def : SchedAlias<WriteALUsr, A57WriteALUsr>;
205200
def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
206201
def : SchedAlias<ReadALUsr, A57ReadALUsr>;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s
2+
3+
# CHECK-LABEL: ********** MI Scheduling **********
4+
# CHECK: %[[RES:[0-9]+]]:rgpr = t2MLA
5+
# CHECK-NEXT: # preds left
6+
# CHECK-NEXT: # succs left
7+
# CHECK-NEXT: # rdefs left
8+
# CHECK-NEXT: Latency : 3
9+
# CHECK-NEXT: Depth
10+
# CHECK-NEXT: Height
11+
# CHECK-NEXT: Predecessors:
12+
# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
13+
# CHECK-NEXT: SU({{.*}}): Out Latency=
14+
# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
15+
# CHECK-NEXT: Successors:
16+
# CHECK-NEXT: SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]]
17+
# CHECK-NEXT: Pressure Diff
18+
# CHECK-NEXT: Single Issue : false;
19+
# CHECK-NEXT: SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg
20+
21+
name: test_smlal_forwarding
22+
tracksRegLiveness: true
23+
body: |
24+
bb.0:
25+
liveins: $r1, $r3, $r4, $r5, $r6
26+
%1:rgpr = COPY $r1
27+
%3:rgpr = COPY $r3
28+
%4:rgpr = COPY $r4
29+
%5:rgpr = COPY $r5
30+
%6:rgpr = COPY $r6
31+
%3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg
32+
%6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg
33+
$r0 = COPY %6:rgpr
34+
BX_RET 14, $noreg, implicit $r0

llvm/utils/TableGen/CodeGenSchedule.cpp

+44-70
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
283283

284284
static void
285285
processSTIPredicate(STIPredicateFunction &Fn,
286-
const DenseMap<Record *, unsigned> &ProcModelMap) {
286+
const ProcModelMapTy &ProcModelMap) {
287287
DenseMap<const Record *, unsigned> Opcode2Index;
288288
using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
289289
std::vector<OpcodeMapPair> OpcodeMappings;
@@ -1338,8 +1338,7 @@ class PredTransitions {
13381338
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
13391339

13401340
bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
1341-
bool IsRead, bool IsForAnyCPU,
1342-
unsigned StartIdx);
1341+
bool IsRead, unsigned StartIdx);
13431342

13441343
bool substituteVariants(const PredTransition &Trans);
13451344

@@ -1413,29 +1412,6 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
14131412
return false;
14141413
}
14151414

1416-
static bool hasAliasedVariants(const CodeGenSchedRW &RW,
1417-
CodeGenSchedModels &SchedModels) {
1418-
if (RW.HasVariants)
1419-
return true;
1420-
1421-
for (Record *Alias : RW.Aliases) {
1422-
const CodeGenSchedRW &AliasRW =
1423-
SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
1424-
if (AliasRW.HasVariants)
1425-
return true;
1426-
if (AliasRW.IsSequence) {
1427-
IdxVec ExpandedRWs;
1428-
SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
1429-
for (unsigned SI : ExpandedRWs) {
1430-
if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
1431-
SchedModels))
1432-
return true;
1433-
}
1434-
}
1435-
}
1436-
return false;
1437-
}
1438-
14391415
static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
14401416
ArrayRef<unsigned> ProcIndices) {
14411417
std::vector<Record *> Preds;
@@ -1613,21 +1589,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
16131589
// starts. RWSeq must be applied to all transitions between StartIdx and the end
16141590
// of TransVec.
16151591
bool PredTransitions::substituteVariantOperand(
1616-
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
1617-
unsigned StartIdx) {
1618-
1619-
auto CollectAndAddVariants = [&](unsigned TransIdx,
1620-
const CodeGenSchedRW &SchedRW) {
1621-
// Distribute this partial PredTransition across intersecting variants.
1622-
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
1623-
std::vector<TransVariant> IntersectingVariants;
1624-
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
1625-
// Now expand each variant on top of its copy of the transition.
1626-
for (const TransVariant &IV : IntersectingVariants)
1627-
pushVariant(IV, IsRead);
1628-
return !IntersectingVariants.empty();
1629-
};
1630-
1592+
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
16311593
bool Subst = false;
16321594
// Visit each original RW within the current sequence.
16331595
for (SmallVectorImpl<unsigned>::const_iterator
@@ -1636,35 +1598,24 @@ bool PredTransitions::substituteVariantOperand(
16361598
// Push this RW on all partial PredTransitions or distribute variants.
16371599
// New PredTransitions may be pushed within this loop which should not be
16381600
// revisited (TransEnd must be loop invariant).
1639-
bool HasAliases = false, WasPushed = false;
16401601
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
16411602
TransIdx != TransEnd; ++TransIdx) {
1642-
// In the common case, push RW onto the current operand's sequence.
1643-
if (!hasAliasedVariants(SchedRW, SchedModels)) {
1603+
// Distribute this partial PredTransition across intersecting variants.
1604+
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
1605+
std::vector<TransVariant> IntersectingVariants;
1606+
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
1607+
// Now expand each variant on top of its copy of the transition.
1608+
for (const TransVariant &IV : IntersectingVariants)
1609+
pushVariant(IV, IsRead);
1610+
if (IntersectingVariants.empty()) {
16441611
if (IsRead)
16451612
TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
16461613
else
16471614
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
16481615
continue;
1616+
} else {
1617+
Subst = true;
16491618
}
1650-
HasAliases = true;
1651-
WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
1652-
Subst |= WasPushed;
1653-
}
1654-
if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
1655-
// If we're here this means that in some sched class:
1656-
// a) We have read variant for CPU A
1657-
// b) We have write variant for CPU B
1658-
// b) We don't have write variant for CPU A
1659-
// d) We must expand all read/write variants (IsForAnyCPU is true)
1660-
// e) We couldn't expand SchedRW because TransVec doesn't have
1661-
// any transition with compatible CPU ID.
1662-
// In such case we create new empty transition with zero (AnyCPU)
1663-
// index.
1664-
TransVec.reserve(TransVec.size() + 1);
1665-
TransVec.emplace_back(TransVec[StartIdx].PredTerm);
1666-
TransVec.back().ReadSequences.emplace_back();
1667-
Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW);
16681619
}
16691620
}
16701621
return Subst;
@@ -1683,7 +1634,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
16831634
bool Subst = false;
16841635
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
16851636

1686-
bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
1637+
assert(!llvm::count(Trans.ProcIndices, 0));
16871638
// Visit each original write sequence.
16881639
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
16891640
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@@ -1693,8 +1644,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
16931644
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
16941645
I->WriteSequences.emplace_back();
16951646
}
1696-
Subst |=
1697-
substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
1647+
Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
16981648
}
16991649
// Visit each original read sequence.
17001650
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1705,8 +1655,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
17051655
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
17061656
I->ReadSequences.emplace_back();
17071657
}
1708-
Subst |=
1709-
substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
1658+
Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
17101659
}
17111660
return Subst;
17121661
}
@@ -1745,6 +1694,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
17451694
// requires creating a new SchedClass.
17461695
for (ArrayRef<PredTransition>::iterator
17471696
I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
1697+
// Variant expansion (substituteVariants) may create unconditional
1698+
// transitions. We don't need to build sched classes for them.
1699+
if (I->PredTerm.empty())
1700+
continue;
17481701
IdxVec OperWritesVariant, OperReadsVariant;
17491702
addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
17501703
addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
@@ -1777,6 +1730,26 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
17771730
}
17781731
}
17791732

1733+
std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
1734+
std::vector<unsigned> ProcIdVec;
1735+
for (const auto &PM : ProcModelMap)
1736+
if (PM.second != 0)
1737+
ProcIdVec.push_back(PM.second);
1738+
return ProcIdVec;
1739+
}
1740+
1741+
static std::vector<PredTransition>
1742+
makePerProcessorTransitions(const PredTransition &Trans,
1743+
ArrayRef<unsigned> ProcIndices) {
1744+
std::vector<PredTransition> PerCpuTransVec;
1745+
for (unsigned ProcId : ProcIndices) {
1746+
assert(ProcId != 0);
1747+
PerCpuTransVec.push_back(Trans);
1748+
PerCpuTransVec.back().ProcIndices.assign(1, ProcId);
1749+
}
1750+
return PerCpuTransVec;
1751+
}
1752+
17801753
// Create new SchedClasses for the given ReadWrite list. If any of the
17811754
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
17821755
// of the ReadWrite list, following Aliases if necessary.
@@ -1812,6 +1785,10 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
18121785
}
18131786
LLVM_DEBUG(dbgs() << '\n');
18141787

1788+
LastTransitions = makePerProcessorTransitions(
1789+
LastTransitions[0], llvm::count(ProcIndices, 0)
1790+
? ArrayRef<unsigned>(getAllProcIndices())
1791+
: ProcIndices);
18151792
// Collect all PredTransitions for individual operands.
18161793
// Iterate until no variant writes remain.
18171794
bool SubstitutedAny;
@@ -1823,9 +1800,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
18231800
LLVM_DEBUG(Transitions.dump());
18241801
LastTransitions.swap(Transitions.TransVec);
18251802
} while (SubstitutedAny);
1826-
// If the first transition has no variants, nothing to do.
1827-
if (LastTransitions[0].PredTerm.empty())
1828-
return;
18291803

18301804
// WARNING: We are about to mutate the SchedClasses vector. Do not refer to
18311805
// OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.

llvm/utils/TableGen/CodeGenSchedule.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Support/ErrorHandling.h"
2222
#include "llvm/TableGen/Record.h"
2323
#include "llvm/TableGen/SetTheory.h"
24+
#include <map>
2425

2526
namespace llvm {
2627

@@ -409,6 +410,8 @@ class STIPredicateFunction {
409410
ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
410411
};
411412

413+
using ProcModelMapTy = std::map<const Record*, unsigned>;
414+
412415
/// Top level container for machine model data.
413416
class CodeGenSchedModels {
414417
RecordKeeper &Records;
@@ -421,7 +424,6 @@ class CodeGenSchedModels {
421424
std::vector<CodeGenProcModel> ProcModels;
422425

423426
// Map Processor's MachineModel or ProcItin to a CodeGenProcModel index.
424-
using ProcModelMapTy = DenseMap<Record*, unsigned>;
425427
ProcModelMapTy ProcModelMap;
426428

427429
// Per-operand SchedReadWrite types.
@@ -443,6 +445,7 @@ class CodeGenSchedModels {
443445
InstClassMapTy InstrClassMap;
444446

445447
std::vector<STIPredicateFunction> STIPredicates;
448+
std::vector<unsigned> getAllProcIndices() const;
446449

447450
public:
448451
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);

0 commit comments

Comments
 (0)