Skip to content

Commit b95f66a

Browse files
committed
[X86][SSE] LowerRotate - perform modulo on the amount splat source directly.
If the rotation amount is a known splat, perform the modulo on the splat source, and then perform the splat. That way the amount-extension performed later by LowerScalarVariableShift can fold the splats away without any multiple-use issues. Fixes one of the concerns raised on D104156
1 parent 087a8ee commit b95f66a

9 files changed

+77
-98
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -29034,8 +29034,18 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
2903429034
}
2903529035

2903629036
// ISD::ROT* uses modulo rotate amounts.
29037-
Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
29038-
DAG.getConstant(EltSizeInBits - 1, DL, VT));
29037+
if (SDValue BaseRotAmt = DAG.getSplatValue(Amt)) {
29038+
// If the amount is a splat, perform the modulo BEFORE the splat,
29039+
// this helps LowerScalarVariableShift to remove the splat later.
29040+
Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt);
29041+
Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
29042+
DAG.getConstant(EltSizeInBits - 1, DL, VT));
29043+
Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT),
29044+
SmallVector<int>(NumElts, 0));
29045+
} else {
29046+
Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
29047+
DAG.getConstant(EltSizeInBits - 1, DL, VT));
29048+
}
2903929049

2904029050
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
2904129051
bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&

llvm/test/CodeGen/X86/min-legal-vector-width.ll

-2
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,6 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind "min-leg
16901690
define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind "min-legal-vector-width"="256" {
16911691
; CHECK-AVX512-LABEL: splatvar_rotate_v32i8:
16921692
; CHECK-AVX512: # %bb.0:
1693-
; CHECK-AVX512-NEXT: vpbroadcastb %xmm1, %xmm1
16941693
; CHECK-AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
16951694
; CHECK-AVX512-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
16961695
; CHECK-AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm3
@@ -1710,7 +1709,6 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind "mi
17101709
;
17111710
; CHECK-VBMI-LABEL: splatvar_rotate_v32i8:
17121711
; CHECK-VBMI: # %bb.0:
1713-
; CHECK-VBMI-NEXT: vpbroadcastb %xmm1, %xmm1
17141712
; CHECK-VBMI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
17151713
; CHECK-VBMI-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
17161714
; CHECK-VBMI-NEXT: vpsllw %xmm2, %ymm0, %ymm3

llvm/test/CodeGen/X86/vector-fshl-256.ll

-2
Original file line numberDiff line numberDiff line change
@@ -1440,14 +1440,12 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) {
14401440
; AVX1-NEXT: vmovd %ecx, %xmm3
14411441
; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00
14421442
; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8
1443-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
14441443
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [31,31,31,31]
14451444
; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm2
14461445
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm9 = xmm2[0],zero,xmm2[1],zero
14471446
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [32,32,32,32]
14481447
; AVX1-NEXT: vpsubd %xmm2, %xmm5, %xmm2
14491448
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm10 = xmm2[0],zero,xmm2[1],zero
1450-
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
14511449
; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
14521450
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm11 = xmm4[0],zero,xmm4[1],zero
14531451
; AVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4

llvm/test/CodeGen/X86/vector-fshl-rot-128.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -988,12 +988,12 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
988988
; SSE2-LABEL: splatvar_funnnel_v8i16:
989989
; SSE2: # %bb.0:
990990
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
991-
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
992-
; SSE2-NEXT: pand %xmm1, %xmm2
993-
; SSE2-NEXT: movdqa %xmm0, %xmm3
994-
; SSE2-NEXT: psllw %xmm2, %xmm3
995991
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
996992
; SSE2-NEXT: psubw %xmm1, %xmm2
993+
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
994+
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
995+
; SSE2-NEXT: movdqa %xmm0, %xmm3
996+
; SSE2-NEXT: psllw %xmm1, %xmm3
997997
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
998998
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
999999
; SSE2-NEXT: psrlw %xmm2, %xmm0
@@ -1104,12 +1104,12 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
11041104
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
11051105
; X86-SSE2: # %bb.0:
11061106
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1107-
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
1108-
; X86-SSE2-NEXT: pand %xmm1, %xmm2
1109-
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1110-
; X86-SSE2-NEXT: psllw %xmm2, %xmm3
11111107
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
11121108
; X86-SSE2-NEXT: psubw %xmm1, %xmm2
1109+
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1110+
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1111+
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1112+
; X86-SSE2-NEXT: psllw %xmm1, %xmm3
11131113
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
11141114
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
11151115
; X86-SSE2-NEXT: psrlw %xmm2, %xmm0

llvm/test/CodeGen/X86/vector-fshl-rot-256.ll

+16-29
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,6 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
686686
define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
687687
; AVX1-LABEL: splatvar_funnnel_v8i32:
688688
; AVX1: # %bb.0:
689-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
690689
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
691690
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
692691
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
@@ -704,7 +703,6 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
704703
;
705704
; AVX2-LABEL: splatvar_funnnel_v8i32:
706705
; AVX2: # %bb.0:
707-
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
708706
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
709707
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
710708
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
@@ -783,8 +781,6 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
783781
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
784782
; AVX1-LABEL: splatvar_funnnel_v16i16:
785783
; AVX1: # %bb.0:
786-
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
787-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
788784
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
789785
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
790786
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
@@ -802,7 +798,6 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
802798
;
803799
; AVX2-LABEL: splatvar_funnnel_v16i16:
804800
; AVX2: # %bb.0:
805-
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
806801
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
807802
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
808803
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
@@ -815,7 +810,6 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
815810
;
816811
; AVX512F-LABEL: splatvar_funnnel_v16i16:
817812
; AVX512F: # %bb.0:
818-
; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
819813
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
820814
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
821815
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
@@ -828,7 +822,6 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
828822
;
829823
; AVX512VL-LABEL: splatvar_funnnel_v16i16:
830824
; AVX512VL: # %bb.0:
831-
; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
832825
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
833826
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
834827
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
@@ -841,7 +834,6 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
841834
;
842835
; AVX512BW-LABEL: splatvar_funnnel_v16i16:
843836
; AVX512BW: # %bb.0:
844-
; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1
845837
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
846838
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
847839
; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
@@ -854,7 +846,6 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
854846
;
855847
; AVX512VLBW-LABEL: splatvar_funnnel_v16i16:
856848
; AVX512VLBW: # %bb.0:
857-
; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1
858849
; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
859850
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
860851
; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2
@@ -905,35 +896,33 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
905896
define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
906897
; AVX1-LABEL: splatvar_funnnel_v32i8:
907898
; AVX1: # %bb.0:
908-
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
909-
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
899+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
910900
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
911901
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
912-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
913-
; AVX1-NEXT: vpsllw %xmm3, %xmm4, %xmm5
914-
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
915-
; AVX1-NEXT: vpsllw %xmm3, %xmm6, %xmm7
916-
; AVX1-NEXT: vpshufb %xmm2, %xmm7, %xmm2
917-
; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
902+
; AVX1-NEXT: vpsllw %xmm3, %xmm2, %xmm4
903+
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
904+
; AVX1-NEXT: vpsllw %xmm3, %xmm5, %xmm6
905+
; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7
906+
; AVX1-NEXT: vpshufb %xmm7, %xmm6, %xmm6
907+
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
918908
; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
919909
; AVX1-NEXT: vpsubb %xmm1, %xmm7, %xmm1
920910
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
921-
; AVX1-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
922-
; AVX1-NEXT: vpsrlw %xmm1, %xmm6, %xmm6
923-
; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
924-
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
925-
; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4
911+
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
912+
; AVX1-NEXT: vpsrlw %xmm1, %xmm5, %xmm5
913+
; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
914+
; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2
915+
; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
926916
; AVX1-NEXT: vpsllw %xmm3, %xmm0, %xmm3
927-
; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
917+
; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
928918
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
929-
; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
930-
; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
931-
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
919+
; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0
920+
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
921+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
932922
; AVX1-NEXT: retq
933923
;
934924
; AVX2-LABEL: splatvar_funnnel_v32i8:
935925
; AVX2: # %bb.0:
936-
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
937926
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
938927
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
939928
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3
@@ -954,7 +943,6 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
954943
;
955944
; AVX512F-LABEL: splatvar_funnnel_v32i8:
956945
; AVX512F: # %bb.0:
957-
; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1
958946
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
959947
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
960948
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3
@@ -975,7 +963,6 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
975963
;
976964
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
977965
; AVX512VL: # %bb.0:
978-
; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1
979966
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
980967
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
981968
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3

llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll

+4-5
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
162162
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
163163
; SSE2-LABEL: splatvar_funnnel_v2i32:
164164
; SSE2: # %bb.0:
165-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
166165
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
166+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
167167
; SSE2-NEXT: pslld $23, %xmm1
168168
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
169169
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
@@ -182,9 +182,9 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
182182
;
183183
; SSE41-LABEL: splatvar_funnnel_v2i32:
184184
; SSE41: # %bb.0:
185-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
186185
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
187186
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
187+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
188188
; SSE41-NEXT: pslld $23, %xmm1
189189
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
190190
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
@@ -200,9 +200,9 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
200200
;
201201
; AVX1-LABEL: splatvar_funnnel_v2i32:
202202
; AVX1: # %bb.0:
203-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
204203
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
205204
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
205+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
206206
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
207207
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
208208
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
@@ -218,7 +218,6 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
218218
;
219219
; AVX2-LABEL: splatvar_funnnel_v2i32:
220220
; AVX2: # %bb.0:
221-
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
222221
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
223222
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
224223
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
@@ -289,8 +288,8 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
289288
;
290289
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
291290
; X86-SSE2: # %bb.0:
292-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
293291
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
292+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
294293
; X86-SSE2-NEXT: pslld $23, %xmm1
295294
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
296295
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1

llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll

+13-13
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,13 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
174174
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
175175
; SSE2-LABEL: splatvar_funnnel_v2i32:
176176
; SSE2: # %bb.0:
177-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
178177
; SSE2-NEXT: pxor %xmm2, %xmm2
179178
; SSE2-NEXT: psubd %xmm1, %xmm2
180179
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
181-
; SSE2-NEXT: pslld $23, %xmm2
182-
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
183-
; SSE2-NEXT: cvttps2dq %xmm2, %xmm1
180+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
181+
; SSE2-NEXT: pslld $23, %xmm1
182+
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
183+
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
184184
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
185185
; SSE2-NEXT: pmuludq %xmm1, %xmm0
186186
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
@@ -196,13 +196,13 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
196196
;
197197
; SSE41-LABEL: splatvar_funnnel_v2i32:
198198
; SSE41: # %bb.0:
199-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
200199
; SSE41-NEXT: pxor %xmm2, %xmm2
201200
; SSE41-NEXT: psubd %xmm1, %xmm2
202201
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
203-
; SSE41-NEXT: pslld $23, %xmm2
204-
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
205-
; SSE41-NEXT: cvttps2dq %xmm2, %xmm1
202+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
203+
; SSE41-NEXT: pslld $23, %xmm1
204+
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
205+
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
206206
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
207207
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
208208
; SSE41-NEXT: pmuludq %xmm2, %xmm3
@@ -216,10 +216,10 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
216216
;
217217
; AVX1-LABEL: splatvar_funnnel_v2i32:
218218
; AVX1: # %bb.0:
219-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
220219
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
221220
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
222221
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
222+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
223223
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
224224
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
225225
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
@@ -313,13 +313,13 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
313313
;
314314
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
315315
; X86-SSE2: # %bb.0:
316-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
317316
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
318317
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
319318
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
320-
; X86-SSE2-NEXT: pslld $23, %xmm2
321-
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
322-
; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1
319+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
320+
; X86-SSE2-NEXT: pslld $23, %xmm1
321+
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
322+
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
323323
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
324324
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0
325325
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]

llvm/test/CodeGen/X86/vector-rotate-128.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -955,12 +955,12 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
955955
; SSE2-LABEL: splatvar_rotate_v8i16:
956956
; SSE2: # %bb.0:
957957
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
958-
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
959-
; SSE2-NEXT: pand %xmm1, %xmm2
960-
; SSE2-NEXT: movdqa %xmm0, %xmm3
961-
; SSE2-NEXT: psllw %xmm2, %xmm3
962958
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
963959
; SSE2-NEXT: psubw %xmm1, %xmm2
960+
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
961+
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
962+
; SSE2-NEXT: movdqa %xmm0, %xmm3
963+
; SSE2-NEXT: psllw %xmm1, %xmm3
964964
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
965965
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
966966
; SSE2-NEXT: psrlw %xmm2, %xmm0
@@ -1071,12 +1071,12 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
10711071
; X86-SSE2-LABEL: splatvar_rotate_v8i16:
10721072
; X86-SSE2: # %bb.0:
10731073
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1074-
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
1075-
; X86-SSE2-NEXT: pand %xmm1, %xmm2
1076-
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1077-
; X86-SSE2-NEXT: psllw %xmm2, %xmm3
10781074
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
10791075
; X86-SSE2-NEXT: psubw %xmm1, %xmm2
1076+
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1077+
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1078+
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1079+
; X86-SSE2-NEXT: psllw %xmm1, %xmm3
10801080
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
10811081
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
10821082
; X86-SSE2-NEXT: psrlw %xmm2, %xmm0

0 commit comments

Comments
 (0)