Skip to content

Commit ac87133

Browse files
committed
[RISCV] Teach vsetvli insertion to remember when predecessors have same AVL and SEW/LMUL ratio if their VTYPEs otherwise mismatch.
Previously we went directly to unknown state on VTYPE mismatch. If we instead remember the partial match, we can use this to still use X0, X0 vsetvli in successors if AVL and needed SEW/LMUL ratio match. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D104069
1 parent 8c2c972 commit ac87133

File tree

3 files changed

+134
-8
lines changed

3 files changed

+134
-8
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

+43-6
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,12 @@ class VSETVLIInfo {
5858
uint8_t TailAgnostic : 1;
5959
uint8_t MaskAgnostic : 1;
6060
uint8_t MaskRegOp : 1;
61+
uint8_t SEWLMULRatioOnly : 1;
6162

6263
public:
6364
VSETVLIInfo()
64-
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false) {}
65+
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
66+
SEWLMULRatioOnly(false) {}
6567

6668
static VSETVLIInfo getUnknown() {
6769
VSETVLIInfo Info;
@@ -127,16 +129,20 @@ class VSETVLIInfo {
127129
}
128130

129131
unsigned encodeVTYPE() const {
130-
assert(isValid() && !isUnknown() &&
132+
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
131133
"Can't encode VTYPE for uninitialized or unknown");
132134
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
133135
}
134136

137+
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
138+
135139
bool hasSameVTYPE(const VSETVLIInfo &Other) const {
136140
assert(isValid() && Other.isValid() &&
137141
"Can't compare invalid VSETVLIInfos");
138142
assert(!isUnknown() && !Other.isUnknown() &&
139143
"Can't compare VTYPE in unknown state");
144+
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
145+
"Can't compare when only LMUL/SEW ratio is valid.");
140146
return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
141147
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
142148
Other.MaskAgnostic);
@@ -172,10 +178,16 @@ class VSETVLIInfo {
172178
bool isCompatible(const VSETVLIInfo &InstrInfo) const {
173179
assert(isValid() && InstrInfo.isValid() &&
174180
"Can't compare invalid VSETVLIInfos");
181+
assert(!InstrInfo.SEWLMULRatioOnly &&
182+
"Expected a valid VTYPE for instruction!");
175183
// Nothing is compatible with Unknown.
176184
if (isUnknown() || InstrInfo.isUnknown())
177185
return false;
178186

187+
// If only our VLMAX ratio is valid, then this isn't compatible.
188+
if (SEWLMULRatioOnly)
189+
return false;
190+
179191
// If the instruction doesn't need an AVLReg and the SEW matches, consider
180192
// it/ compatible.
181193
if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) {
@@ -209,8 +221,19 @@ class VSETVLIInfo {
209221
if (Other.isUnknown())
210222
return isUnknown();
211223

212-
// Otherwise compare the VTYPE and AVL.
213-
return hasSameVTYPE(Other) && hasSameAVL(Other);
224+
if (!hasSameAVL(Other))
225+
return false;
226+
227+
// If only the VLMAX is valid, check that it is the same.
228+
if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly)
229+
return hasSameVLMAX(Other);
230+
231+
// If the full VTYPE is valid, check that it is the same.
232+
if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly)
233+
return hasSameVTYPE(Other);
234+
235+
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
236+
return false;
214237
}
215238

216239
// Calculate the VSETVLIInfo visible to a block assuming this and Other are
@@ -224,10 +247,23 @@ class VSETVLIInfo {
224247
if (!isValid())
225248
return Other;
226249

250+
// If either is unknown, the result is unknown.
251+
if (isUnknown() || Other.isUnknown())
252+
return VSETVLIInfo::getUnknown();
253+
254+
// If we have an exact, match return this.
227255
if (*this == Other)
228256
return *this;
229257

230-
// If the configurations don't match, assume unknown.
258+
// Not an exact match, but maybe the AVL and VLMAX are the same. If so,
259+
// return an SEW/LMUL ratio only value.
260+
if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
261+
VSETVLIInfo MergeInfo = *this;
262+
MergeInfo.SEWLMULRatioOnly = true;
263+
return MergeInfo;
264+
}
265+
266+
// Otherwise the result is unknown.
231267
return VSETVLIInfo::getUnknown();
232268
}
233269

@@ -444,7 +480,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
444480
// and the last VL/VTYPE we observed is the same, we don't need a
445481
// VSETVLI here.
446482
if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
447-
Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) {
483+
Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
484+
Require.hasSameVTYPE(CurInfo)) {
448485
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
449486
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
450487
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
584584
; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
585585
; RV32-NEXT: vslideup.vi v25, v26, 1
586586
; RV32-NEXT: .LBB8_4: # %else2
587-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
587+
; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
588588
; RV32-NEXT: vse32.v v25, (a1)
589589
; RV32-NEXT: addi sp, sp, 16
590590
; RV32-NEXT: ret
@@ -644,7 +644,7 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
644644
; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
645645
; RV64-NEXT: vslideup.vi v25, v26, 1
646646
; RV64-NEXT: .LBB8_4: # %else2
647-
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
647+
; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
648648
; RV64-NEXT: vse32.v v25, (a1)
649649
; RV64-NEXT: addi sp, sp, 16
650650
; RV64-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll

+89
Original file line numberDiff line numberDiff line change
@@ -496,3 +496,92 @@ declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
496496
declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* nocapture, i64)
497497
declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64)
498498
declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64)
499+
500+
; We need a vsetvli in the last block because the predecessors have different
501+
; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
502+
; we don't need to read AVL and can keep VL unchanged.
503+
define <vscale x 2 x i32> @test_vsetvli_x0_x0(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i32> %z, i64 %vl, i1 %cond) nounwind {
504+
; CHECK-LABEL: test_vsetvli_x0_x0:
505+
; CHECK: # %bb.0: # %entry
506+
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
507+
; CHECK-NEXT: vle32.v v25, (a0)
508+
; CHECK-NEXT: andi a0, a3, 1
509+
; CHECK-NEXT: beqz a0, .LBB9_2
510+
; CHECK-NEXT: # %bb.1: # %if
511+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
512+
; CHECK-NEXT: vle16.v v26, (a1)
513+
; CHECK-NEXT: vwadd.vx v8, v26, zero
514+
; CHECK-NEXT: .LBB9_2: # %if.end
515+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
516+
; CHECK-NEXT: vadd.vv v8, v25, v8
517+
; CHECK-NEXT: ret
518+
entry:
519+
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
520+
br i1 %cond, label %if, label %if.end
521+
522+
if:
523+
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
524+
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16> %b, i16 0, i64 %vl)
525+
br label %if.end
526+
527+
if.end:
528+
%d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
529+
%e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
530+
ret <vscale x 2 x i32> %e
531+
}
532+
declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>*, i64)
533+
declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>*, i64)
534+
declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16>, i16, i64)
535+
declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64)
536+
537+
; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
538+
; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
539+
; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
540+
; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
541+
; a predecessor we know the vtype for.
542+
define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i16>* %z, i64 %vl, i1 %cond, i1 %cond2, <vscale x 2 x i32> %w) nounwind {
543+
; CHECK-LABEL: test_vsetvli_x0_x0_2:
544+
; CHECK: # %bb.0: # %entry
545+
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
546+
; CHECK-NEXT: vle32.v v25, (a0)
547+
; CHECK-NEXT: andi a0, a4, 1
548+
; CHECK-NEXT: beqz a0, .LBB10_2
549+
; CHECK-NEXT: # %bb.1: # %if
550+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
551+
; CHECK-NEXT: vle16.v v26, (a1)
552+
; CHECK-NEXT: vwadd.wv v25, v25, v26
553+
; CHECK-NEXT: .LBB10_2: # %if.end
554+
; CHECK-NEXT: andi a0, a5, 1
555+
; CHECK-NEXT: beqz a0, .LBB10_4
556+
; CHECK-NEXT: # %bb.3: # %if2
557+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
558+
; CHECK-NEXT: vle16.v v26, (a2)
559+
; CHECK-NEXT: vwadd.wv v25, v25, v26
560+
; CHECK-NEXT: .LBB10_4: # %if2.end
561+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
562+
; CHECK-NEXT: vadd.vv v8, v25, v8
563+
; CHECK-NEXT: ret
564+
entry:
565+
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
566+
br i1 %cond, label %if, label %if.end
567+
568+
if:
569+
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
570+
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
571+
br label %if.end
572+
573+
if.end:
574+
%d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
575+
br i1 %cond2, label %if2, label %if2.end
576+
577+
if2:
578+
%e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %z, i64 %vl)
579+
%f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
580+
br label %if2.end
581+
582+
if2.end:
583+
%g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
584+
%h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64 %vl)
585+
ret <vscale x 2 x i32> %h
586+
}
587+
declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i16>, i64)

0 commit comments

Comments
 (0)