Skip to content

Commit 0fa5aac

Browse files
committed
[RISCV] Teach VSETVLI insertion to look through PHIs to prove we don't need to insert a vsetvli.
If an instruction's AVL operand is a PHI node in the same block, we may be able to peek through the PHI to find vsetvli instructions that produce the AVL in other basic blocks. If we can prove those vsetvli instructions have the same VTYPE and were the last vsetvli in their respective blocks, then we don't need to insert a vsetvli for this pseudo instruction. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D103277
1 parent 2d2a902 commit 0fa5aac

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

+56-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ using namespace llvm;
3333
#define DEBUG_TYPE "riscv-insert-vsetvli"
3434
#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
3535

36+
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
37+
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
38+
cl::desc("Disable looking through phis when inserting vsetvlis."));
39+
3640
namespace {
3741

3842
class VSETVLIInfo {
@@ -285,6 +289,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
285289

286290
private:
287291
bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo);
292+
bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB);
288293
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
289294
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
290295

@@ -526,6 +531,55 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
526531
WorkList.push(S);
527532
}
528533

534+
// If we weren't able to prove a vsetvli was directly unneeded, it might still
535+
// be/ unneeded if the AVL is a phi node where all incoming values are VL
536+
// outputs from the last VSETVLI in their respective basic blocks.
537+
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
538+
const MachineBasicBlock &MBB) {
539+
if (DisableInsertVSETVLPHIOpt)
540+
return true;
541+
542+
if (!Require.hasAVLReg())
543+
return true;
544+
545+
Register AVLReg = Require.getAVLReg();
546+
if (!AVLReg.isVirtual())
547+
return true;
548+
549+
// We need the AVL to be produce by a PHI node in this basic block.
550+
MachineInstr *PHI = MRI->getVRegDef(AVLReg);
551+
if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
552+
return true;
553+
554+
for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
555+
PHIOp += 2) {
556+
Register InReg = PHI->getOperand(PHIOp).getReg();
557+
MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
558+
const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
559+
// If the exit from the predecessor has the VTYPE we are looking for
560+
// we might be able to avoid a VSETVLI.
561+
if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
562+
return true;
563+
564+
// We need the PHI input to the be the output of a VSET(I)VLI.
565+
MachineInstr *DefMI = MRI->getVRegDef(InReg);
566+
if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI &&
567+
DefMI->getOpcode() != RISCV::PseudoVSETIVLI))
568+
return true;
569+
570+
// We found a VSET(I)VLI make sure it matches the output of the
571+
// predecessor block.
572+
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
573+
if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
574+
!DefInfo.hasSameVTYPE(PBBInfo.Exit))
575+
return true;
576+
}
577+
578+
// If all the incoming values to the PHI checked out, we don't need
579+
// to insert a VSETVLI.
580+
return false;
581+
}
582+
529583
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
530584
VSETVLIInfo CurInfo;
531585

@@ -564,7 +618,8 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
564618
// use the predecessor information.
565619
assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
566620
"Expected a valid predecessor state.");
567-
if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) {
621+
if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
622+
needVSETVLIPHI(NewInfo, MBB)) {
568623
insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
569624
CurInfo = NewInfo;
570625
}

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll

+3-9
Original file line numberDiff line numberDiff line change
@@ -83,21 +83,18 @@ if.end: ; preds = %if.else, %if.then
8383
ret <vscale x 1 x double> %3
8484
}
8585

86-
; FIXME: The last vsetvli is redundant, but we need to look through a phi to
87-
; prove it.
8886
define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
8987
; CHECK-LABEL: test3:
9088
; CHECK: # %bb.0: # %entry
9189
; CHECK-NEXT: beqz a1, .LBB2_2
9290
; CHECK-NEXT: # %bb.1: # %if.then
9391
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
9492
; CHECK-NEXT: vfadd.vv v25, v8, v9
95-
; CHECK-NEXT: j .LBB2_3
93+
; CHECK-NEXT: vfmul.vv v8, v25, v8
94+
; CHECK-NEXT: ret
9695
; CHECK-NEXT: .LBB2_2: # %if.else
9796
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
9897
; CHECK-NEXT: vfsub.vv v25, v8, v9
99-
; CHECK-NEXT: .LBB2_3: # %if.end
100-
; CHECK-NEXT: vsetvli zero, a0, e64,m1,ta,mu
10198
; CHECK-NEXT: vfmul.vv v8, v25, v8
10299
; CHECK-NEXT: ret
103100
entry:
@@ -445,8 +442,6 @@ if.end: ; preds = %if.else, %if.then
445442
ret <vscale x 1 x double> %3
446443
}
447444

448-
; FIXME: The vsetvli in for.body can be removed, it's redundant by its
449-
; predecessors, but we need to look through a PHI to prove it.
450445
define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* nocapture %y) {
451446
; CHECK-LABEL: saxpy_vec:
452447
; CHECK: # %bb.0: # %entry
@@ -456,12 +451,11 @@ define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* no
456451
; CHECK-NEXT: fmv.w.x ft0, a1
457452
; CHECK-NEXT: .LBB8_2: # %for.body
458453
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
459-
; CHECK-NEXT: vsetvli zero, a4, e32,m8,ta,mu
460454
; CHECK-NEXT: vle32.v v8, (a2)
461455
; CHECK-NEXT: vle32.v v16, (a3)
462456
; CHECK-NEXT: slli a1, a4, 2
463457
; CHECK-NEXT: add a2, a2, a1
464-
; CHECK-NEXT: vsetvli zero, zero, e32,m8,tu,mu
458+
; CHECK-NEXT: vsetvli zero, a4, e32,m8,tu,mu
465459
; CHECK-NEXT: vfmacc.vf v16, ft0, v8
466460
; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu
467461
; CHECK-NEXT: vse32.v v16, (a3)

0 commit comments

Comments
 (0)