Skip to content

Commit 757c7c2

Browse files
committed
[X86][SSE] Add SSE2 extract-concat tests
Check pre-SSE41 codegen where we have less PEXTR*/PINSR* instructions
1 parent 1e2772c commit 757c7c2

File tree

1 file changed

+56
-40
lines changed

1 file changed

+56
-40
lines changed

llvm/test/CodeGen/X86/extract-concat.ll

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,26 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE42
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
34
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
45
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
56
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
67

78
define void @foo(<4 x float> %in, <4 x i8>* %out) {
9+
; SSE2-LABEL: foo:
10+
; SSE2: # %bb.0:
11+
; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
12+
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
13+
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
14+
; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
15+
; SSE2-NEXT: shll $8, %ecx
16+
; SSE2-NEXT: orl %eax, %ecx
17+
; SSE2-NEXT: movd %ecx, %xmm0
18+
; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00
19+
; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax
20+
; SSE2-NEXT: pinsrw $1, %eax, %xmm0
21+
; SSE2-NEXT: movd %xmm0, (%rdi)
22+
; SSE2-NEXT: retq
23+
;
824
; SSE42-LABEL: foo:
925
; SSE42: # %bb.0:
1026
; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
@@ -39,22 +55,22 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
3955
}
4056

4157
define <16 x i64> @catcat(<4 x i64> %x) {
42-
; SSE42-LABEL: catcat:
43-
; SSE42: # %bb.0:
44-
; SSE42-NEXT: movq %rdi, %rax
45-
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
46-
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
47-
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
48-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
49-
; SSE42-NEXT: movdqa %xmm1, 112(%rdi)
50-
; SSE42-NEXT: movdqa %xmm1, 96(%rdi)
51-
; SSE42-NEXT: movdqa %xmm3, 80(%rdi)
52-
; SSE42-NEXT: movdqa %xmm3, 64(%rdi)
53-
; SSE42-NEXT: movdqa %xmm0, 48(%rdi)
54-
; SSE42-NEXT: movdqa %xmm0, 32(%rdi)
55-
; SSE42-NEXT: movdqa %xmm2, 16(%rdi)
56-
; SSE42-NEXT: movdqa %xmm2, (%rdi)
57-
; SSE42-NEXT: retq
58+
; SSE-LABEL: catcat:
59+
; SSE: # %bb.0:
60+
; SSE-NEXT: movq %rdi, %rax
61+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
62+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
63+
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
64+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
65+
; SSE-NEXT: movdqa %xmm1, 112(%rdi)
66+
; SSE-NEXT: movdqa %xmm1, 96(%rdi)
67+
; SSE-NEXT: movdqa %xmm3, 80(%rdi)
68+
; SSE-NEXT: movdqa %xmm3, 64(%rdi)
69+
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
70+
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
71+
; SSE-NEXT: movdqa %xmm2, 16(%rdi)
72+
; SSE-NEXT: movdqa %xmm2, (%rdi)
73+
; SSE-NEXT: retq
5874
;
5975
; AVX1-LABEL: catcat:
6076
; AVX1: # %bb.0:
@@ -93,24 +109,24 @@ define <16 x i64> @catcat(<4 x i64> %x) {
93109
}
94110

95111
define <16 x i64> @load_catcat(<4 x i64>* %p) {
96-
; SSE42-LABEL: load_catcat:
97-
; SSE42: # %bb.0:
98-
; SSE42-NEXT: movq %rdi, %rax
99-
; SSE42-NEXT: movdqa (%rsi), %xmm0
100-
; SSE42-NEXT: movdqa 16(%rsi), %xmm1
101-
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
102-
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
103-
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
104-
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
105-
; SSE42-NEXT: movdqa %xmm1, 112(%rdi)
106-
; SSE42-NEXT: movdqa %xmm1, 96(%rdi)
107-
; SSE42-NEXT: movdqa %xmm3, 80(%rdi)
108-
; SSE42-NEXT: movdqa %xmm3, 64(%rdi)
109-
; SSE42-NEXT: movdqa %xmm0, 48(%rdi)
110-
; SSE42-NEXT: movdqa %xmm0, 32(%rdi)
111-
; SSE42-NEXT: movdqa %xmm2, 16(%rdi)
112-
; SSE42-NEXT: movdqa %xmm2, (%rdi)
113-
; SSE42-NEXT: retq
112+
; SSE-LABEL: load_catcat:
113+
; SSE: # %bb.0:
114+
; SSE-NEXT: movq %rdi, %rax
115+
; SSE-NEXT: movdqa (%rsi), %xmm0
116+
; SSE-NEXT: movdqa 16(%rsi), %xmm1
117+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
118+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
119+
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
120+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
121+
; SSE-NEXT: movdqa %xmm1, 112(%rdi)
122+
; SSE-NEXT: movdqa %xmm1, 96(%rdi)
123+
; SSE-NEXT: movdqa %xmm3, 80(%rdi)
124+
; SSE-NEXT: movdqa %xmm3, 64(%rdi)
125+
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
126+
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
127+
; SSE-NEXT: movdqa %xmm2, 16(%rdi)
128+
; SSE-NEXT: movdqa %xmm2, (%rdi)
129+
; SSE-NEXT: retq
114130
;
115131
; AVX1-LABEL: load_catcat:
116132
; AVX1: # %bb.0:
@@ -147,11 +163,11 @@ define <16 x i64> @load_catcat(<4 x i64>* %p) {
147163
; the source ops are not an even multiple size of the result.
148164

149165
define <4 x i32> @cat_ext_straddle(<6 x i32>* %px, <6 x i32>* %py) {
150-
; SSE42-LABEL: cat_ext_straddle:
151-
; SSE42: # %bb.0:
152-
; SSE42-NEXT: movaps 16(%rdi), %xmm0
153-
; SSE42-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
154-
; SSE42-NEXT: retq
166+
; SSE-LABEL: cat_ext_straddle:
167+
; SSE: # %bb.0:
168+
; SSE-NEXT: movaps 16(%rdi), %xmm0
169+
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
170+
; SSE-NEXT: retq
155171
;
156172
; AVX-LABEL: cat_ext_straddle:
157173
; AVX: # %bb.0:

0 commit comments

Comments
 (0)