@@ -97,7 +97,7 @@ struct SGPRSpillBuilder {
97
97
unsigned EltSize = 4 ;
98
98
99
99
RegScavenger *RS;
100
- MachineBasicBlock & MBB;
100
+ MachineBasicBlock * MBB;
101
101
MachineFunction &MF;
102
102
SIMachineFunctionInfo &MFI;
103
103
const SIInstrInfo &TII;
@@ -110,9 +110,14 @@ struct SGPRSpillBuilder {
110
110
SGPRSpillBuilder (const SIRegisterInfo &TRI, const SIInstrInfo &TII,
111
111
bool IsWave32, MachineBasicBlock::iterator MI, int Index,
112
112
RegScavenger *RS)
113
- : SuperReg(MI->getOperand (0 ).getReg()), MI(MI),
114
- IsKill(MI->getOperand (0 ).isKill()), DL(MI->getDebugLoc ()), Index(Index),
115
- RS(RS), MBB(*MI->getParent ()), MF(*MBB.getParent()),
113
+ : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand (0 ).getReg(),
114
+ MI->getOperand(0 ).isKill(), Index, RS) {}
115
+
116
+ SGPRSpillBuilder (const SIRegisterInfo &TRI, const SIInstrInfo &TII,
117
+ bool IsWave32, MachineBasicBlock::iterator MI, Register Reg,
118
+ bool IsKill, int Index, RegScavenger *RS)
119
+ : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc ()),
120
+ Index(Index), RS(RS), MBB(MI->getParent ()), MF(*MBB->getParent ()),
116
121
MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
117
122
IsWave32(IsWave32) {
118
123
const TargetRegisterClass *RC = TRI.getPhysRegClass (SuperReg);
@@ -189,8 +194,9 @@ struct SGPRSpillBuilder {
189
194
if (SavedExecReg) {
190
195
RS->setRegUsed (SavedExecReg);
191
196
// Set exec to needed lanes
192
- BuildMI (MBB, MI, DL, TII.get (MovOpc), SavedExecReg).addReg (ExecReg);
193
- auto I = BuildMI (MBB, MI, DL, TII.get (MovOpc), ExecReg).addImm (VGPRLanes);
197
+ BuildMI (*MBB, MI, DL, TII.get (MovOpc), SavedExecReg).addReg (ExecReg);
198
+ auto I =
199
+ BuildMI (*MBB, MI, DL, TII.get (MovOpc), ExecReg).addImm (VGPRLanes);
194
200
if (!TmpVGPRLive)
195
201
I.addReg (TmpVGPR, RegState::ImplicitDefine);
196
202
// Spill needed lanes
@@ -201,7 +207,7 @@ struct SGPRSpillBuilder {
201
207
TRI.buildVGPRSpillLoadStore (*this , TmpVGPRIndex, 0 , /* IsLoad*/ false ,
202
208
/* IsKill*/ false );
203
209
// Spill inactive lanes
204
- auto I = BuildMI (MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
210
+ auto I = BuildMI (* MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
205
211
if (!TmpVGPRLive)
206
212
I.addReg (TmpVGPR, RegState::ImplicitDefine);
207
213
TRI.buildVGPRSpillLoadStore (*this , TmpVGPRIndex, 0 , /* IsLoad*/ false );
@@ -224,7 +230,7 @@ struct SGPRSpillBuilder {
224
230
TRI.buildVGPRSpillLoadStore (*this , TmpVGPRIndex, 0 , /* IsLoad*/ true ,
225
231
/* IsKill*/ false );
226
232
// Restore exec
227
- auto I = BuildMI (MBB, MI, DL, TII.get (MovOpc), ExecReg)
233
+ auto I = BuildMI (* MBB, MI, DL, TII.get (MovOpc), ExecReg)
228
234
.addReg (SavedExecReg, RegState::Kill);
229
235
// Add an implicit use of the load so it is not dead.
230
236
// FIXME This inserts an unnecessary waitcnt
@@ -235,7 +241,7 @@ struct SGPRSpillBuilder {
235
241
// Restore inactive lanes
236
242
TRI.buildVGPRSpillLoadStore (*this , TmpVGPRIndex, 0 , /* IsLoad*/ true ,
237
243
/* IsKill*/ false );
238
- auto I = BuildMI (MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
244
+ auto I = BuildMI (* MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
239
245
if (!TmpVGPRLive) {
240
246
I.addReg (TmpVGPR, RegState::ImplicitKill);
241
247
}
@@ -261,11 +267,17 @@ struct SGPRSpillBuilder {
261
267
TRI.buildVGPRSpillLoadStore (*this , Index, Offset, IsLoad,
262
268
/* IsKill*/ false );
263
269
// Spill inactive lanes
264
- BuildMI (MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
270
+ BuildMI (* MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
265
271
TRI.buildVGPRSpillLoadStore (*this , Index, Offset, IsLoad);
266
- BuildMI (MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
272
+ BuildMI (* MBB, MI, DL, TII.get (NotOpc), ExecReg).addReg (ExecReg);
267
273
}
268
274
}
275
+
276
+ void setMI (MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) {
277
+ assert (MBB->getParent () == &MF);
278
+ MI = NewMI;
279
+ MBB = NewMBB;
280
+ }
269
281
};
270
282
271
283
} // namespace llvm
@@ -1337,13 +1349,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
1337
1349
if (IsLoad) {
1338
1350
unsigned Opc = ST.enableFlatScratch () ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1339
1351
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1340
- buildSpillLoadStore (SB.MBB , SB.MI , Opc, Index, SB.TmpVGPR , false , FrameReg,
1352
+ buildSpillLoadStore (* SB.MBB , SB.MI , Opc, Index, SB.TmpVGPR , false , FrameReg,
1341
1353
Offset * SB.EltSize , MMO, SB.RS );
1342
1354
} else {
1343
1355
unsigned Opc = ST.enableFlatScratch () ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1344
1356
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1345
- buildSpillLoadStore (SB.MBB , SB.MI , Opc, Index, SB.TmpVGPR , IsKill, FrameReg ,
1346
- Offset * SB.EltSize , MMO, SB.RS );
1357
+ buildSpillLoadStore (* SB.MBB , SB.MI , Opc, Index, SB.TmpVGPR , IsKill,
1358
+ FrameReg, Offset * SB.EltSize , MMO, SB.RS );
1347
1359
// This only ever adds one VGPR spill
1348
1360
SB.MFI .addToSpilledVGPRs (1 );
1349
1361
}
@@ -1381,8 +1393,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
1381
1393
1382
1394
// Mark the "old value of vgpr" input undef only if this is the first sgpr
1383
1395
// spill to this specific vgpr in the first basic block.
1384
- auto MIB = BuildMI (SB.MBB , MI, SB.DL , SB. TII . get (AMDGPU::V_WRITELANE_B32) ,
1385
- Spill.VGPR )
1396
+ auto MIB = BuildMI (* SB.MBB , MI, SB.DL ,
1397
+ SB. TII . get (AMDGPU::V_WRITELANE_B32), Spill.VGPR )
1386
1398
.addReg (SubReg, getKillRegState (UseKill))
1387
1399
.addImm (Spill.Lane )
1388
1400
.addReg (Spill.VGPR );
@@ -1428,7 +1440,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
1428
1440
: Register (getSubReg (SB.SuperReg , SB.SplitParts [i]));
1429
1441
1430
1442
MachineInstrBuilder WriteLane =
1431
- BuildMI (SB.MBB , MI, SB.DL , SB.TII .get (AMDGPU::V_WRITELANE_B32),
1443
+ BuildMI (* SB.MBB , MI, SB.DL , SB.TII .get (AMDGPU::V_WRITELANE_B32),
1432
1444
SB.TmpVGPR )
1433
1445
.addReg (SubReg, SubKillState)
1434
1446
.addImm (i % PVD.PerVGPR )
@@ -1490,10 +1502,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
1490
1502
: Register (getSubReg (SB.SuperReg , SB.SplitParts [i]));
1491
1503
1492
1504
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1493
- auto MIB =
1494
- BuildMI (SB. MBB , MI, SB. DL , SB. TII . get (AMDGPU::V_READLANE_B32), SubReg)
1495
- .addReg (Spill.VGPR )
1496
- .addImm (Spill.Lane );
1505
+ auto MIB = BuildMI (*SB. MBB , MI, SB. DL , SB. TII . get (AMDGPU::V_READLANE_B32),
1506
+ SubReg)
1507
+ .addReg (Spill.VGPR )
1508
+ .addImm (Spill.Lane );
1497
1509
if (SB.NumSubRegs > 1 && i == 0 )
1498
1510
MIB.addReg (SB.SuperReg , RegState::ImplicitDefine);
1499
1511
if (LIS) {
@@ -1524,7 +1536,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
1524
1536
: Register (getSubReg (SB.SuperReg , SB.SplitParts [i]));
1525
1537
1526
1538
bool LastSubReg = (i + 1 == e);
1527
- auto MIB = BuildMI (SB.MBB , MI, SB.DL ,
1539
+ auto MIB = BuildMI (* SB.MBB , MI, SB.DL ,
1528
1540
SB.TII .get (AMDGPU::V_READLANE_B32), SubReg)
1529
1541
.addReg (SB.TmpVGPR , getKillRegState (LastSubReg))
1530
1542
.addImm (i);
@@ -1550,6 +1562,75 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
1550
1562
return true ;
1551
1563
}
1552
1564
1565
+ bool SIRegisterInfo::spillEmergencySGPR (MachineBasicBlock::iterator MI,
1566
+ MachineBasicBlock &RestoreMBB,
1567
+ Register SGPR, RegScavenger *RS) const {
1568
+ SGPRSpillBuilder SB (*this , *ST.getInstrInfo (), isWave32, MI, SGPR, false , 0 ,
1569
+ RS);
1570
+ SB.prepare ();
1571
+ // Generate the spill of SGPR to SB.TmpVGPR.
1572
+ unsigned SubKillState = getKillRegState ((SB.NumSubRegs == 1 ) && SB.IsKill );
1573
+ auto PVD = SB.getPerVGPRData ();
1574
+ for (unsigned Offset = 0 ; Offset < PVD.NumVGPRs ; ++Offset) {
1575
+ unsigned TmpVGPRFlags = RegState::Undef;
1576
+ // Write sub registers into the VGPR
1577
+ for (unsigned i = Offset * PVD.PerVGPR ,
1578
+ e = std::min ((Offset + 1 ) * PVD.PerVGPR , SB.NumSubRegs );
1579
+ i < e; ++i) {
1580
+ Register SubReg =
1581
+ SB.NumSubRegs == 1
1582
+ ? SB.SuperReg
1583
+ : Register (getSubReg (SB.SuperReg , SB.SplitParts [i]));
1584
+
1585
+ MachineInstrBuilder WriteLane =
1586
+ BuildMI (*SB.MBB , MI, SB.DL , SB.TII .get (AMDGPU::V_WRITELANE_B32),
1587
+ SB.TmpVGPR )
1588
+ .addReg (SubReg, SubKillState)
1589
+ .addImm (i % PVD.PerVGPR )
1590
+ .addReg (SB.TmpVGPR , TmpVGPRFlags);
1591
+ TmpVGPRFlags = 0 ;
1592
+ // There could be undef components of a spilled super register.
1593
+ // TODO: Can we detect this and skip the spill?
1594
+ if (SB.NumSubRegs > 1 ) {
1595
+ // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1596
+ unsigned SuperKillState = 0 ;
1597
+ if (i + 1 == SB.NumSubRegs )
1598
+ SuperKillState |= getKillRegState (SB.IsKill );
1599
+ WriteLane.addReg (SB.SuperReg , RegState::Implicit | SuperKillState);
1600
+ }
1601
+ }
1602
+ // Don't need to write VGPR out.
1603
+ }
1604
+
1605
+ // Restore clobbered registers in the specified restore block.
1606
+ MI = RestoreMBB.end ();
1607
+ SB.setMI (&RestoreMBB, MI);
1608
+ // Generate the restore of SGPR from SB.TmpVGPR.
1609
+ for (unsigned Offset = 0 ; Offset < PVD.NumVGPRs ; ++Offset) {
1610
+ // Don't need to load VGPR in.
1611
+ // Unpack lanes
1612
+ for (unsigned i = Offset * PVD.PerVGPR ,
1613
+ e = std::min ((Offset + 1 ) * PVD.PerVGPR , SB.NumSubRegs );
1614
+ i < e; ++i) {
1615
+ Register SubReg =
1616
+ SB.NumSubRegs == 1
1617
+ ? SB.SuperReg
1618
+ : Register (getSubReg (SB.SuperReg , SB.SplitParts [i]));
1619
+ bool LastSubReg = (i + 1 == e);
1620
+ auto MIB = BuildMI (*SB.MBB , MI, SB.DL , SB.TII .get (AMDGPU::V_READLANE_B32),
1621
+ SubReg)
1622
+ .addReg (SB.TmpVGPR , getKillRegState (LastSubReg))
1623
+ .addImm (i);
1624
+ if (SB.NumSubRegs > 1 && i == 0 )
1625
+ MIB.addReg (SB.SuperReg , RegState::ImplicitDefine);
1626
+ }
1627
+ }
1628
+ SB.restore ();
1629
+
1630
+ SB.MFI .addToSpilledSGPRs (SB.NumSubRegs );
1631
+ return false ;
1632
+ }
1633
+
1553
1634
// / Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1554
1635
// / a VGPR and the stack slot can be safely eliminated when all other users are
1555
1636
// / handled.
0 commit comments