|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE42 |
| 2 | +; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2 |
| 3 | +; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 |
3 | 4 | ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
|
4 | 5 | ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
5 | 6 | ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
|
6 | 7 |
|
7 | 8 | define void @foo(<4 x float> %in, <4 x i8>* %out) {
|
| 9 | +; SSE2-LABEL: foo: |
| 10 | +; SSE2: # %bb.0: |
| 11 | +; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 |
| 12 | +; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| 13 | +; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax |
| 14 | +; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx |
| 15 | +; SSE2-NEXT: shll $8, %ecx |
| 16 | +; SSE2-NEXT: orl %eax, %ecx |
| 17 | +; SSE2-NEXT: movd %ecx, %xmm0 |
| 18 | +; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00 |
| 19 | +; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax |
| 20 | +; SSE2-NEXT: pinsrw $1, %eax, %xmm0 |
| 21 | +; SSE2-NEXT: movd %xmm0, (%rdi) |
| 22 | +; SSE2-NEXT: retq |
| 23 | +; |
8 | 24 | ; SSE42-LABEL: foo:
|
9 | 25 | ; SSE42: # %bb.0:
|
10 | 26 | ; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
|
@@ -39,22 +55,22 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
|
39 | 55 | }
|
40 | 56 |
|
41 | 57 | define <16 x i64> @catcat(<4 x i64> %x) {
|
42 |
| -; SSE42-LABEL: catcat: |
43 |
| -; SSE42: # %bb.0: |
44 |
| -; SSE42-NEXT: movq %rdi, %rax |
45 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] |
46 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] |
47 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1] |
48 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] |
49 |
| -; SSE42-NEXT: movdqa %xmm1, 112(%rdi) |
50 |
| -; SSE42-NEXT: movdqa %xmm1, 96(%rdi) |
51 |
| -; SSE42-NEXT: movdqa %xmm3, 80(%rdi) |
52 |
| -; SSE42-NEXT: movdqa %xmm3, 64(%rdi) |
53 |
| -; SSE42-NEXT: movdqa %xmm0, 48(%rdi) |
54 |
| -; SSE42-NEXT: movdqa %xmm0, 32(%rdi) |
55 |
| -; SSE42-NEXT: movdqa %xmm2, 16(%rdi) |
56 |
| -; SSE42-NEXT: movdqa %xmm2, (%rdi) |
57 |
| -; SSE42-NEXT: retq |
| 58 | +; SSE-LABEL: catcat: |
| 59 | +; SSE: # %bb.0: |
| 60 | +; SSE-NEXT: movq %rdi, %rax |
| 61 | +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] |
| 62 | +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] |
| 63 | +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1] |
| 64 | +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] |
| 65 | +; SSE-NEXT: movdqa %xmm1, 112(%rdi) |
| 66 | +; SSE-NEXT: movdqa %xmm1, 96(%rdi) |
| 67 | +; SSE-NEXT: movdqa %xmm3, 80(%rdi) |
| 68 | +; SSE-NEXT: movdqa %xmm3, 64(%rdi) |
| 69 | +; SSE-NEXT: movdqa %xmm0, 48(%rdi) |
| 70 | +; SSE-NEXT: movdqa %xmm0, 32(%rdi) |
| 71 | +; SSE-NEXT: movdqa %xmm2, 16(%rdi) |
| 72 | +; SSE-NEXT: movdqa %xmm2, (%rdi) |
| 73 | +; SSE-NEXT: retq |
58 | 74 | ;
|
59 | 75 | ; AVX1-LABEL: catcat:
|
60 | 76 | ; AVX1: # %bb.0:
|
@@ -93,24 +109,24 @@ define <16 x i64> @catcat(<4 x i64> %x) {
|
93 | 109 | }
|
94 | 110 |
|
95 | 111 | define <16 x i64> @load_catcat(<4 x i64>* %p) {
|
96 |
| -; SSE42-LABEL: load_catcat: |
97 |
| -; SSE42: # %bb.0: |
98 |
| -; SSE42-NEXT: movq %rdi, %rax |
99 |
| -; SSE42-NEXT: movdqa (%rsi), %xmm0 |
100 |
| -; SSE42-NEXT: movdqa 16(%rsi), %xmm1 |
101 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] |
102 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] |
103 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1] |
104 |
| -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] |
105 |
| -; SSE42-NEXT: movdqa %xmm1, 112(%rdi) |
106 |
| -; SSE42-NEXT: movdqa %xmm1, 96(%rdi) |
107 |
| -; SSE42-NEXT: movdqa %xmm3, 80(%rdi) |
108 |
| -; SSE42-NEXT: movdqa %xmm3, 64(%rdi) |
109 |
| -; SSE42-NEXT: movdqa %xmm0, 48(%rdi) |
110 |
| -; SSE42-NEXT: movdqa %xmm0, 32(%rdi) |
111 |
| -; SSE42-NEXT: movdqa %xmm2, 16(%rdi) |
112 |
| -; SSE42-NEXT: movdqa %xmm2, (%rdi) |
113 |
| -; SSE42-NEXT: retq |
| 112 | +; SSE-LABEL: load_catcat: |
| 113 | +; SSE: # %bb.0: |
| 114 | +; SSE-NEXT: movq %rdi, %rax |
| 115 | +; SSE-NEXT: movdqa (%rsi), %xmm0 |
| 116 | +; SSE-NEXT: movdqa 16(%rsi), %xmm1 |
| 117 | +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] |
| 118 | +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] |
| 119 | +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1] |
| 120 | +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] |
| 121 | +; SSE-NEXT: movdqa %xmm1, 112(%rdi) |
| 122 | +; SSE-NEXT: movdqa %xmm1, 96(%rdi) |
| 123 | +; SSE-NEXT: movdqa %xmm3, 80(%rdi) |
| 124 | +; SSE-NEXT: movdqa %xmm3, 64(%rdi) |
| 125 | +; SSE-NEXT: movdqa %xmm0, 48(%rdi) |
| 126 | +; SSE-NEXT: movdqa %xmm0, 32(%rdi) |
| 127 | +; SSE-NEXT: movdqa %xmm2, 16(%rdi) |
| 128 | +; SSE-NEXT: movdqa %xmm2, (%rdi) |
| 129 | +; SSE-NEXT: retq |
114 | 130 | ;
|
115 | 131 | ; AVX1-LABEL: load_catcat:
|
116 | 132 | ; AVX1: # %bb.0:
|
@@ -147,11 +163,11 @@ define <16 x i64> @load_catcat(<4 x i64>* %p) {
|
147 | 163 | ; the source ops are not an even multiple size of the result.
|
148 | 164 |
|
149 | 165 | define <4 x i32> @cat_ext_straddle(<6 x i32>* %px, <6 x i32>* %py) {
|
150 |
| -; SSE42-LABEL: cat_ext_straddle: |
151 |
| -; SSE42: # %bb.0: |
152 |
| -; SSE42-NEXT: movaps 16(%rdi), %xmm0 |
153 |
| -; SSE42-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
154 |
| -; SSE42-NEXT: retq |
| 166 | +; SSE-LABEL: cat_ext_straddle: |
| 167 | +; SSE: # %bb.0: |
| 168 | +; SSE-NEXT: movaps 16(%rdi), %xmm0 |
| 169 | +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] |
| 170 | +; SSE-NEXT: retq |
155 | 171 | ;
|
156 | 172 | ; AVX-LABEL: cat_ext_straddle:
|
157 | 173 | ; AVX: # %bb.0:
|
|
0 commit comments