You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV] Teach vsetvli insertion to remember when predecessors have same AVL and SEW/LMUL ratio if their VTYPEs otherwise mismatch.
Previously we went directly to unknown state on VTYPE mismatch.
If we instead remember the partial match, we can use this to
still use X0, X0 vsetvli in successors if AVL and needed SEW/LMUL
ratio match.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D104069
declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* nocapture, i64)
497
497
declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64)
498
498
declarevoid@llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64)
499
+
500
+
; We need a vsetvli in the last block because the predecessors have different
501
+
; VTYPEs. The AVL is the same and the SEW/LMUL ratio implies the same VLMAX so
502
+
; we don't need to read AVL and can keep VL unchanged.
503
+
define <vscale x 2 x i32> @test_vsetvli_x0_x0(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i32> %z, i64%vl, i1%cond) nounwind {
504
+
; CHECK-LABEL: test_vsetvli_x0_x0:
505
+
; CHECK: # %bb.0: # %entry
506
+
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
507
+
; CHECK-NEXT: vle32.v v25, (a0)
508
+
; CHECK-NEXT: andi a0, a3, 1
509
+
; CHECK-NEXT: beqz a0, .LBB9_2
510
+
; CHECK-NEXT: # %bb.1: # %if
511
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
512
+
; CHECK-NEXT: vle16.v v26, (a1)
513
+
; CHECK-NEXT: vwadd.vx v8, v26, zero
514
+
; CHECK-NEXT: .LBB9_2: # %if.end
515
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
516
+
; CHECK-NEXT: vadd.vv v8, v25, v8
517
+
; CHECK-NEXT: ret
518
+
entry:
519
+
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64%vl)
520
+
bri1%cond, label%if, label%if.end
521
+
522
+
if:
523
+
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64%vl)
524
+
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16> %b, i160, i64%vl)
525
+
brlabel%if.end
526
+
527
+
if.end:
528
+
%d = phi <vscale x 2 x i32> [ %z, %entry ], [ %c, %if ]
529
+
%e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64%vl)
530
+
ret <vscale x 2 x i32> %e
531
+
}
532
+
declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>*, i64)
533
+
declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>*, i64)
534
+
declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16>, i16, i64)
535
+
declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64)
536
+
537
+
; We can use X0, X0 vsetvli in if2 and if2.end. The merge point as if.end will
538
+
; see two different vtypes with the same SEW/LMUL ratio. At if2.end we will only
539
+
; know the SEW/LMUL ratio for the if.end predecessor and the full vtype for
540
+
; the if2 predecessor. This makes sure we can merge a SEW/LMUL predecessor with
541
+
; a predecessor we know the vtype for.
542
+
define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(<vscale x 2 x i32>* %x, <vscale x 2 x i16>* %y, <vscale x 2 x i16>* %z, i64%vl, i1%cond, i1%cond2, <vscale x 2 x i32> %w) nounwind {
543
+
; CHECK-LABEL: test_vsetvli_x0_x0_2:
544
+
; CHECK: # %bb.0: # %entry
545
+
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
546
+
; CHECK-NEXT: vle32.v v25, (a0)
547
+
; CHECK-NEXT: andi a0, a4, 1
548
+
; CHECK-NEXT: beqz a0, .LBB10_2
549
+
; CHECK-NEXT: # %bb.1: # %if
550
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
551
+
; CHECK-NEXT: vle16.v v26, (a1)
552
+
; CHECK-NEXT: vwadd.wv v25, v25, v26
553
+
; CHECK-NEXT: .LBB10_2: # %if.end
554
+
; CHECK-NEXT: andi a0, a5, 1
555
+
; CHECK-NEXT: beqz a0, .LBB10_4
556
+
; CHECK-NEXT: # %bb.3: # %if2
557
+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
558
+
; CHECK-NEXT: vle16.v v26, (a2)
559
+
; CHECK-NEXT: vwadd.wv v25, v25, v26
560
+
; CHECK-NEXT: .LBB10_4: # %if2.end
561
+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
562
+
; CHECK-NEXT: vadd.vv v8, v25, v8
563
+
; CHECK-NEXT: ret
564
+
entry:
565
+
%a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64%vl)
566
+
bri1%cond, label%if, label%if.end
567
+
568
+
if:
569
+
%b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64%vl)
570
+
%c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64%vl)
571
+
brlabel%if.end
572
+
573
+
if.end:
574
+
%d = phi <vscale x 2 x i32> [ %a, %entry ], [ %c, %if ]
575
+
bri1%cond2, label%if2, label%if2.end
576
+
577
+
if2:
578
+
%e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %z, i64%vl)
579
+
%f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64%vl)
580
+
brlabel%if2.end
581
+
582
+
if2.end:
583
+
%g = phi <vscale x 2 x i32> [ %d, %if.end ], [ %f, %if2 ]
584
+
%h = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %g, <vscale x 2 x i32> %w, i64%vl)
585
+
ret <vscale x 2 x i32> %h
586
+
}
587
+
declare <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32>, <vscale x 2 x i16>, i64)
0 commit comments