Skip to content

Commit fd42a4a

Browse files
committed
[X86][SSE] Add add(shl(and(x,c1),c2),c3) test case with non-uniform shift value
As mentioned by @nikic on rGef5debac4302, we should merge the guaranteed top zero bits from the shifted value and min shift amount code so they can both set the high bits to zero.
1 parent a43b006 commit fd42a4a

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

llvm/test/CodeGen/X86/combine-shl.ll

+37
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,43 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
865865
ret <4 x i32> %2
866866
}
867867

868+
define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
869+
; SSE2-LABEL: combine_vec_add_shl_and_nonsplat:
870+
; SSE2: # %bb.0:
871+
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
872+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4,8,16,32]
873+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
874+
; SSE2-NEXT: pmuludq %xmm1, %xmm0
875+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
876+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
877+
; SSE2-NEXT: pmuludq %xmm2, %xmm1
878+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
879+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
880+
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
881+
; SSE2-NEXT: retq
882+
;
883+
; SSE41-LABEL: combine_vec_add_shl_and_nonsplat:
884+
; SSE41: # %bb.0:
885+
; SSE41-NEXT: pxor %xmm1, %xmm1
886+
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
887+
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
888+
; SSE41-NEXT: por {{.*}}(%rip), %xmm0
889+
; SSE41-NEXT: retq
890+
;
891+
; AVX-LABEL: combine_vec_add_shl_and_nonsplat:
892+
; AVX: # %bb.0:
893+
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
894+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
895+
; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
896+
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
897+
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
898+
; AVX-NEXT: retq
899+
%1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
900+
%2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
901+
%3 = add <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
902+
ret <4 x i32> %3
903+
}
904+
868905
define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) {
869906
; SSE2-LABEL: combine_vec_add_shuffle_shl:
870907
; SSE2: # %bb.0:

0 commit comments

Comments
 (0)