Skip to content

Commit 171b74e

Browse files
committed
[x86] add tests for vector fdiv with splat divisor; NFC
llvm-svn: 359006
1 parent 3ba5f66 commit 171b74e

File tree

1 file changed

+101
-0
lines changed

1 file changed

+101
-0
lines changed
+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefix=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefix=AVX
4+
5+
define <2 x double> @splat_fdiv_v2f64(<2 x double> %x, double %y) {
6+
; SSE-LABEL: splat_fdiv_v2f64:
7+
; SSE: # %bb.0:
8+
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
9+
; SSE-NEXT: divpd %xmm1, %xmm0
10+
; SSE-NEXT: retq
11+
;
12+
; AVX-LABEL: splat_fdiv_v2f64:
13+
; AVX: # %bb.0:
14+
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
15+
; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0
16+
; AVX-NEXT: retq
17+
%vy = insertelement <2 x double> undef, double %y, i32 0
18+
%splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> zeroinitializer
19+
%r = fdiv fast <2 x double> %x, %splaty
20+
ret <2 x double> %r
21+
}
22+
23+
define <4 x double> @splat_fdiv_v4f64(<4 x double> %x, double %y) {
24+
; SSE-LABEL: splat_fdiv_v4f64:
25+
; SSE: # %bb.0:
26+
; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
27+
; SSE-NEXT: divsd %xmm2, %xmm3
28+
; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0]
29+
; SSE-NEXT: mulpd %xmm3, %xmm0
30+
; SSE-NEXT: mulpd %xmm3, %xmm1
31+
; SSE-NEXT: retq
32+
;
33+
; AVX-LABEL: splat_fdiv_v4f64:
34+
; AVX: # %bb.0:
35+
; AVX-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
36+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
37+
; AVX-NEXT: vdivpd %ymm1, %ymm0, %ymm0
38+
; AVX-NEXT: retq
39+
%vy = insertelement <4 x double> undef, double %y, i32 0
40+
%splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer
41+
%r = fdiv arcp <4 x double> %x, %splaty
42+
ret <4 x double> %r
43+
}
44+
45+
define <4 x float> @splat_fdiv_v4f32(<4 x float> %x, float %y) {
46+
; SSE-LABEL: splat_fdiv_v4f32:
47+
; SSE: # %bb.0:
48+
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
49+
; SSE-NEXT: rcpps %xmm1, %xmm2
50+
; SSE-NEXT: mulps %xmm2, %xmm1
51+
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
52+
; SSE-NEXT: subps %xmm1, %xmm3
53+
; SSE-NEXT: mulps %xmm2, %xmm3
54+
; SSE-NEXT: addps %xmm2, %xmm3
55+
; SSE-NEXT: mulps %xmm3, %xmm0
56+
; SSE-NEXT: retq
57+
;
58+
; AVX-LABEL: splat_fdiv_v4f32:
59+
; AVX: # %bb.0:
60+
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
61+
; AVX-NEXT: vrcpps %xmm1, %xmm2
62+
; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm1
63+
; AVX-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
64+
; AVX-NEXT: vsubps %xmm1, %xmm3, %xmm1
65+
; AVX-NEXT: vmulps %xmm1, %xmm2, %xmm1
66+
; AVX-NEXT: vaddps %xmm1, %xmm2, %xmm1
67+
; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
68+
; AVX-NEXT: retq
69+
%vy = insertelement <4 x float> undef, float %y, i32 0
70+
%splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
71+
%r = fdiv arcp reassoc <4 x float> %x, %splaty
72+
ret <4 x float> %r
73+
}
74+
75+
define <8 x float> @splat_fdiv_v8f32(<8 x float> %x, float %y) {
76+
; SSE-LABEL: splat_fdiv_v8f32:
77+
; SSE: # %bb.0:
78+
; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
79+
; SSE-NEXT: divss %xmm2, %xmm3
80+
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0,0,0]
81+
; SSE-NEXT: mulps %xmm3, %xmm0
82+
; SSE-NEXT: mulps %xmm3, %xmm1
83+
; SSE-NEXT: retq
84+
;
85+
; AVX-LABEL: splat_fdiv_v8f32:
86+
; AVX: # %bb.0:
87+
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0]
88+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
89+
; AVX-NEXT: vrcpps %ymm1, %ymm2
90+
; AVX-NEXT: vmulps %ymm2, %ymm1, %ymm1
91+
; AVX-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
92+
; AVX-NEXT: vsubps %ymm1, %ymm3, %ymm1
93+
; AVX-NEXT: vmulps %ymm1, %ymm2, %ymm1
94+
; AVX-NEXT: vaddps %ymm1, %ymm2, %ymm1
95+
; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
96+
; AVX-NEXT: retq
97+
%vy = insertelement <8 x float> undef, float %y, i32 0
98+
%splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
99+
%r = fdiv fast <8 x float> %x, %splaty
100+
ret <8 x float> %r
101+
}

0 commit comments

Comments
 (0)