Skip to content

Commit e6a4ba3

Browse files
committed
[amdgpu] Handle the case where there is no scavenged register.
- When an unconditional branch is expanded into an indirect branch, if there is no scavenged register, an SGPR pair needs spilling to enable the destination PC calculation. In addition, before jumping into the destination, that clobbered SGPR pair need restoring. - As SGPR cannot be spilled to or restored from memory directly, the spilling/restoring of that SGPR pair reuses the regular SGPR spilling support but without spilling it into memory. As that spilling and restoring points are fully controlled, we only need to spill that SGPR into the temporary VGPR, which needs spilling into its emergency slot. - The target-specific hook is revised to take additional restore block, where the restoring code is filled. After that, the relaxation will place that restore block directly before the destination block and insert an unconditional branch in any fall-through block into the destination block. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D106449
1 parent 32d4586 commit e6a4ba3

11 files changed

+1921
-85
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

+7-8
Original file line numberDiff line numberDiff line change
@@ -582,15 +582,14 @@ class TargetInstrInfo : public MCInstrInfo {
582582
}
583583

584584
/// Insert an unconditional indirect branch at the end of \p MBB to \p
585-
/// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to
585+
/// NewDestBB. Optionally, insert the clobbered register restoring in \p
586+
/// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to
586587
/// the offset of the position to insert the new branch.
587-
///
588-
/// \returns The number of bytes added to the block.
589-
virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
590-
MachineBasicBlock &NewDestBB,
591-
const DebugLoc &DL,
592-
int64_t BrOffset = 0,
593-
RegScavenger *RS = nullptr) const {
588+
virtual void insertIndirectBranch(MachineBasicBlock &MBB,
589+
MachineBasicBlock &NewDestBB,
590+
MachineBasicBlock &RestoreBB,
591+
const DebugLoc &DL, int64_t BrOffset = 0,
592+
RegScavenger *RS = nullptr) const {
594593
llvm_unreachable("target did not implement");
595594
}
596595

llvm/lib/CodeGen/BranchRelaxation.cpp

+40-2
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
463463

464464
DebugLoc DL = MI.getDebugLoc();
465465
MI.eraseFromParent();
466-
BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
467-
*BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
468466

467+
// Create the optional restore block and, initially, place it at the end of
468+
// function. That block will be placed later if it's used; otherwise, it will
469+
// be erased.
470+
MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
471+
472+
TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
473+
DestOffset - SrcOffset, RS.get());
474+
475+
BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
469476
adjustBlockOffsets(*MBB);
477+
478+
// If RestoreBB is required, try to place just before DestBB.
479+
if (!RestoreBB->empty()) {
480+
// TODO: For multiple far branches to the same destination, there are
481+
// chances that some restore blocks could be shared if they clobber the
482+
// same registers and share the same restore sequence. So far, those
483+
// restore blocks are just duplicated for each far branch.
484+
assert(!DestBB->isEntryBlock());
485+
MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
486+
if (auto *FT = PrevBB->getFallThrough()) {
487+
assert(FT == DestBB);
488+
TII->insertUnconditionalBranch(*PrevBB, DestBB, DebugLoc());
489+
// Recalculate the block size.
490+
BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
491+
}
492+
// Now, RestoreBB could be placed directly before DestBB.
493+
MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
494+
// Update successors and predecessors.
495+
RestoreBB->addSuccessor(DestBB);
496+
BranchBB->replaceSuccessor(DestBB, RestoreBB);
497+
if (TRI->trackLivenessAfterRegAlloc(*MF))
498+
computeAndAddLiveIns(LiveRegs, *RestoreBB);
499+
// Compute the restore block size.
500+
BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
501+
// Update the offset starting from the previous block.
502+
adjustBlockOffsets(*PrevBB);
503+
} else {
504+
// Remove restore block if it's not required.
505+
MF->erase(RestoreBB);
506+
}
507+
470508
return true;
471509
}
472510

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+26-20
Original file line numberDiff line numberDiff line change
@@ -2223,15 +2223,17 @@ MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
22232223
return MI.getOperand(0).getMBB();
22242224
}
22252225

2226-
unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
2227-
MachineBasicBlock &DestBB,
2228-
const DebugLoc &DL,
2229-
int64_t BrOffset,
2230-
RegScavenger *RS) const {
2226+
void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
2227+
MachineBasicBlock &DestBB,
2228+
MachineBasicBlock &RestoreBB,
2229+
const DebugLoc &DL, int64_t BrOffset,
2230+
RegScavenger *RS) const {
22312231
assert(RS && "RegScavenger required for long branching");
22322232
assert(MBB.empty() &&
22332233
"new block should be inserted for expanding unconditional branch");
22342234
assert(MBB.pred_size() == 1);
2235+
assert(RestoreBB.empty() &&
2236+
"restore block should be inserted for restoring clobbered registers");
22352237

22362238
MachineFunction *MF = MBB.getParent();
22372239
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -2268,14 +2270,6 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
22682270
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
22692271
.addReg(PCReg);
22702272

2271-
auto ComputeBlockSize = [](const TargetInstrInfo *TII,
2272-
const MachineBasicBlock &MBB) {
2273-
unsigned Size = 0;
2274-
for (const MachineInstr &MI : MBB)
2275-
Size += TII->getInstSizeInBytes(MI);
2276-
return Size;
2277-
};
2278-
22792273
// FIXME: If spilling is necessary, this will fail because this scavenger has
22802274
// no emergency stack slots. It is non-trivial to spill in this situation,
22812275
// because the restore code needs to be specially placed after the
@@ -2314,22 +2308,34 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
23142308

23152309
RS->enterBasicBlockEnd(MBB);
23162310
Register Scav = RS->scavengeRegisterBackwards(
2317-
AMDGPU::SReg_64RegClass,
2318-
MachineBasicBlock::iterator(GetPC), false, 0);
2319-
MRI.replaceRegWith(PCReg, Scav);
2320-
MRI.clearVirtRegs();
2321-
RS->setRegUsed(Scav);
2311+
AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
2312+
/* RestoreAfter */ false, 0, /* AllowSpill */ false);
2313+
if (Scav) {
2314+
RS->setRegUsed(Scav);
2315+
MRI.replaceRegWith(PCReg, Scav);
2316+
MRI.clearVirtRegs();
2317+
} else {
2318+
// As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for
2319+
// SGPR spill.
2320+
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2321+
const SIRegisterInfo *TRI = ST.getRegisterInfo();
2322+
TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2323+
MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2324+
MRI.clearVirtRegs();
2325+
}
23222326

2327+
MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol();
23232328
// Now, the distance could be defined.
23242329
auto *Offset = MCBinaryExpr::createSub(
2325-
MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx),
2330+
MCSymbolRefExpr::create(DestLabel, MCCtx),
23262331
MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx);
23272332
// Add offset assignments.
23282333
auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx);
23292334
OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx));
23302335
auto *ShAmt = MCConstantExpr::create(32, MCCtx);
23312336
OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx));
2332-
return ComputeBlockSize(this, MBB);
2337+
2338+
return;
23332339
}
23342340

23352341
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {

llvm/lib/Target/AMDGPU/SIInstrInfo.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
275275

276276
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
277277

278-
unsigned insertIndirectBranch(MachineBasicBlock &MBB,
279-
MachineBasicBlock &NewDestBB,
280-
const DebugLoc &DL,
281-
int64_t BrOffset,
282-
RegScavenger *RS = nullptr) const override;
278+
void insertIndirectBranch(MachineBasicBlock &MBB,
279+
MachineBasicBlock &NewDestBB,
280+
MachineBasicBlock &RestoreBB, const DebugLoc &DL,
281+
int64_t BrOffset, RegScavenger *RS) const override;
283282

284283
bool analyzeBranchImpl(MachineBasicBlock &MBB,
285284
MachineBasicBlock::iterator I,

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

+103-22
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ struct SGPRSpillBuilder {
9797
unsigned EltSize = 4;
9898

9999
RegScavenger *RS;
100-
MachineBasicBlock &MBB;
100+
MachineBasicBlock *MBB;
101101
MachineFunction &MF;
102102
SIMachineFunctionInfo &MFI;
103103
const SIInstrInfo &TII;
@@ -110,9 +110,14 @@ struct SGPRSpillBuilder {
110110
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
111111
bool IsWave32, MachineBasicBlock::iterator MI, int Index,
112112
RegScavenger *RS)
113-
: SuperReg(MI->getOperand(0).getReg()), MI(MI),
114-
IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
115-
RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
113+
: SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
114+
MI->getOperand(0).isKill(), Index, RS) {}
115+
116+
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
117+
bool IsWave32, MachineBasicBlock::iterator MI, Register Reg,
118+
bool IsKill, int Index, RegScavenger *RS)
119+
: SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
120+
Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
116121
MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
117122
IsWave32(IsWave32) {
118123
const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
@@ -189,8 +194,9 @@ struct SGPRSpillBuilder {
189194
if (SavedExecReg) {
190195
RS->setRegUsed(SavedExecReg);
191196
// Set exec to needed lanes
192-
BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
193-
auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
197+
BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
198+
auto I =
199+
BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
194200
if (!TmpVGPRLive)
195201
I.addReg(TmpVGPR, RegState::ImplicitDefine);
196202
// Spill needed lanes
@@ -201,7 +207,7 @@ struct SGPRSpillBuilder {
201207
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
202208
/*IsKill*/ false);
203209
// Spill inactive lanes
204-
auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
210+
auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
205211
if (!TmpVGPRLive)
206212
I.addReg(TmpVGPR, RegState::ImplicitDefine);
207213
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
@@ -224,7 +230,7 @@ struct SGPRSpillBuilder {
224230
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
225231
/*IsKill*/ false);
226232
// Restore exec
227-
auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
233+
auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
228234
.addReg(SavedExecReg, RegState::Kill);
229235
// Add an implicit use of the load so it is not dead.
230236
// FIXME This inserts an unnecessary waitcnt
@@ -235,7 +241,7 @@ struct SGPRSpillBuilder {
235241
// Restore inactive lanes
236242
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
237243
/*IsKill*/ false);
238-
auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
244+
auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
239245
if (!TmpVGPRLive) {
240246
I.addReg(TmpVGPR, RegState::ImplicitKill);
241247
}
@@ -261,11 +267,17 @@ struct SGPRSpillBuilder {
261267
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
262268
/*IsKill*/ false);
263269
// Spill inactive lanes
264-
BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
270+
BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265271
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
266-
BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
272+
BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
267273
}
268274
}
275+
276+
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) {
277+
assert(MBB->getParent() == &MF);
278+
MI = NewMI;
279+
MBB = NewMBB;
280+
}
269281
};
270282

271283
} // namespace llvm
@@ -1337,13 +1349,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
13371349
if (IsLoad) {
13381350
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
13391351
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1340-
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
1352+
buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
13411353
Offset * SB.EltSize, MMO, SB.RS);
13421354
} else {
13431355
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
13441356
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1345-
buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
1346-
Offset * SB.EltSize, MMO, SB.RS);
1357+
buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill,
1358+
FrameReg, Offset * SB.EltSize, MMO, SB.RS);
13471359
// This only ever adds one VGPR spill
13481360
SB.MFI.addToSpilledVGPRs(1);
13491361
}
@@ -1381,8 +1393,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
13811393

13821394
// Mark the "old value of vgpr" input undef only if this is the first sgpr
13831395
// spill to this specific vgpr in the first basic block.
1384-
auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1385-
Spill.VGPR)
1396+
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1397+
SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
13861398
.addReg(SubReg, getKillRegState(UseKill))
13871399
.addImm(Spill.Lane)
13881400
.addReg(Spill.VGPR);
@@ -1428,7 +1440,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
14281440
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
14291441

14301442
MachineInstrBuilder WriteLane =
1431-
BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1443+
BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
14321444
SB.TmpVGPR)
14331445
.addReg(SubReg, SubKillState)
14341446
.addImm(i % PVD.PerVGPR)
@@ -1490,10 +1502,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
14901502
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
14911503

14921504
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1493-
auto MIB =
1494-
BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1495-
.addReg(Spill.VGPR)
1496-
.addImm(Spill.Lane);
1505+
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1506+
SubReg)
1507+
.addReg(Spill.VGPR)
1508+
.addImm(Spill.Lane);
14971509
if (SB.NumSubRegs > 1 && i == 0)
14981510
MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
14991511
if (LIS) {
@@ -1524,7 +1536,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
15241536
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
15251537

15261538
bool LastSubReg = (i + 1 == e);
1527-
auto MIB = BuildMI(SB.MBB, MI, SB.DL,
1539+
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
15281540
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
15291541
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
15301542
.addImm(i);
@@ -1550,6 +1562,75 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
15501562
return true;
15511563
}
15521564

1565+
bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
1566+
MachineBasicBlock &RestoreMBB,
1567+
Register SGPR, RegScavenger *RS) const {
1568+
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1569+
RS);
1570+
SB.prepare();
1571+
// Generate the spill of SGPR to SB.TmpVGPR.
1572+
unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1573+
auto PVD = SB.getPerVGPRData();
1574+
for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1575+
unsigned TmpVGPRFlags = RegState::Undef;
1576+
// Write sub registers into the VGPR
1577+
for (unsigned i = Offset * PVD.PerVGPR,
1578+
e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1579+
i < e; ++i) {
1580+
Register SubReg =
1581+
SB.NumSubRegs == 1
1582+
? SB.SuperReg
1583+
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1584+
1585+
MachineInstrBuilder WriteLane =
1586+
BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1587+
SB.TmpVGPR)
1588+
.addReg(SubReg, SubKillState)
1589+
.addImm(i % PVD.PerVGPR)
1590+
.addReg(SB.TmpVGPR, TmpVGPRFlags);
1591+
TmpVGPRFlags = 0;
1592+
// There could be undef components of a spilled super register.
1593+
// TODO: Can we detect this and skip the spill?
1594+
if (SB.NumSubRegs > 1) {
1595+
// The last implicit use of the SB.SuperReg carries the "Kill" flag.
1596+
unsigned SuperKillState = 0;
1597+
if (i + 1 == SB.NumSubRegs)
1598+
SuperKillState |= getKillRegState(SB.IsKill);
1599+
WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1600+
}
1601+
}
1602+
// Don't need to write VGPR out.
1603+
}
1604+
1605+
// Restore clobbered registers in the specified restore block.
1606+
MI = RestoreMBB.end();
1607+
SB.setMI(&RestoreMBB, MI);
1608+
// Generate the restore of SGPR from SB.TmpVGPR.
1609+
for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1610+
// Don't need to load VGPR in.
1611+
// Unpack lanes
1612+
for (unsigned i = Offset * PVD.PerVGPR,
1613+
e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1614+
i < e; ++i) {
1615+
Register SubReg =
1616+
SB.NumSubRegs == 1
1617+
? SB.SuperReg
1618+
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1619+
bool LastSubReg = (i + 1 == e);
1620+
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1621+
SubReg)
1622+
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1623+
.addImm(i);
1624+
if (SB.NumSubRegs > 1 && i == 0)
1625+
MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1626+
}
1627+
}
1628+
SB.restore();
1629+
1630+
SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
1631+
return false;
1632+
}
1633+
15531634
/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
15541635
/// a VGPR and the stack slot can be safely eliminated when all other users are
15551636
/// handled.

0 commit comments

Comments
 (0)