@@ -86,47 +86,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
86
86
.macro COPY4x8
87
87
ldr q0 , [ A01 ], # 16
88
88
ldr q1 , [ A02 ], # 16
89
- ins v8.s [ 0 ], v0.s [ 0 ]
90
- ins v10.s [ 0 ], v0.s [ 1 ]
91
- ins v12.s [ 0 ], v0.s [ 2 ]
92
- ins v14.s [ 0 ], v0.s [ 3 ]
93
- ins v8.s [ 1 ], v1.s [ 0 ]
94
- ins v10.s [ 1 ], v1.s [ 1 ]
95
- ins v12.s [ 1 ], v1.s [ 2 ]
96
- ins v14.s [ 1 ], v1.s [ 3 ]
97
-
98
89
ldr q2 , [ A03 ], # 16
99
90
ldr q3 , [ A04 ], # 16
100
- ins v8.s [ 2 ], v2.s [ 0 ]
101
- ins v10.s [ 2 ], v2.s [ 1 ]
102
- ins v12.s [ 2 ], v2.s [ 2 ]
103
- ins v14.s [ 2 ], v2.s [ 3 ]
104
- ins v8.s [ 3 ], v3.s [ 0 ]
105
- ins v10.s [ 3 ], v3.s [ 1 ]
106
- ins v12.s [ 3 ], v3.s [ 2 ]
107
- ins v14.s [ 3 ], v3.s [ 3 ]
91
+
92
+ zip1 v16.4s , v0.4s , v1.4s
93
+ zip1 v17.4s , v2.4s , v3.4s
94
+ zip2 v18.4s , v0.4s , v1.4s
95
+ zip2 v19.4s , v2.4s , v3.4s
96
+
97
+ zip1 v8.2d , v16.2d , v17.2d
98
+ zip2 v10.2d , v16.2d , v17.2d
99
+ zip1 v12.2d , v18.2d , v19.2d
100
+ zip2 v14.2d , v18.2d , v19.2d
108
101
109
102
ldr q4 , [ A05 ], # 16
110
103
ldr q5 , [ A06 ], # 16
111
- ins v9.s [ 0 ], v4.s [ 0 ]
112
- ins v11.s [ 0 ], v4.s [ 1 ]
113
- ins v13.s [ 0 ], v4.s [ 2 ]
114
- ins v15.s [ 0 ], v4.s [ 3 ]
115
- ins v9.s [ 1 ], v5.s [ 0 ]
116
- ins v11.s [ 1 ], v5.s [ 1 ]
117
- ins v13.s [ 1 ], v5.s [ 2 ]
118
- ins v15.s [ 1 ], v5.s [ 3 ]
119
-
120
104
ldr q6 , [ A07 ], # 16
121
105
ldr q7 , [ A08 ], # 16
122
- ins v9.s [ 2 ], v6.s [ 0 ]
123
- ins v11.s [ 2 ], v6.s [ 1 ]
124
- ins v13.s [ 2 ], v6.s [ 2 ]
125
- ins v15.s [ 2 ], v6.s [ 3 ]
126
- ins v9.s [ 3 ], v7.s [ 0 ]
127
- ins v11.s [ 3 ], v7.s [ 1 ]
128
- ins v13.s [ 3 ], v7.s [ 2 ]
129
- ins v15.s [ 3 ], v7.s [ 3 ]
106
+
107
+ zip1 v16.4s , v4.4s , v5.4s
108
+ zip1 v17.4s , v6.4s , v7.4s
109
+ zip2 v18.4s , v4.4s , v5.4s
110
+ zip2 v19.4s , v6.4s , v7.4s
111
+
112
+ zip1 v9.2d , v16.2d , v17.2d
113
+ zip2 v11.2d , v16.2d , v17.2d
114
+ zip1 v13.2d , v18.2d , v19.2d
115
+ zip2 v15.2d , v18.2d , v19.2d
130
116
131
117
st1 {v8.4s , v9.4s , v10.4s , v11.4s} , [ B00 ], # 64
132
118
st1 {v12.4s , v13.4s , v14.4s , v15.4s} , [ B00 ], # 64
@@ -135,31 +121,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
135
121
.macro COPY2x8
136
122
ldr d0 , [ A01 ], # 8
137
123
ldr d1 , [ A02 ], # 8
138
- ins v8.s [ 0 ], v0.s [ 0 ]
139
- ins v10.s [ 0 ], v0.s [ 1 ]
140
- ins v8.s [ 1 ], v1.s [ 0 ]
141
- ins v10.s [ 1 ], v1.s [ 1 ]
142
-
143
124
ldr d2 , [ A03 ], # 8
144
125
ldr d3 , [ A04 ], # 8
145
- ins v8.s [ 2 ], v2.s [ 0 ]
146
- ins v10.s [ 2 ], v2.s [ 1 ]
147
- ins v8.s [ 3 ], v3.s [ 0 ]
148
- ins v10.s [ 3 ], v3.s [ 1 ]
126
+
127
+ zip1 v12.4s , v0.4s , v1.4s
128
+ zip1 v13.4s , v2.4s , v3.4s
129
+
130
+ zip1 v8.2d , v12.2d , v13.2d
131
+ zip2 v10.2d , v12.2d , v13.2d
149
132
150
133
ldr d4 , [ A05 ], # 8
151
134
ldr d5 , [ A06 ], # 8
152
- ins v9.s [ 0 ], v4.s [ 0 ]
153
- ins v11.s [ 0 ], v4.s [ 1 ]
154
- ins v9.s [ 1 ], v5.s [ 0 ]
155
- ins v11.s [ 1 ], v5.s [ 1 ]
156
-
157
135
ldr d6 , [ A07 ], # 8
158
136
ldr d7 , [ A08 ], # 8
159
- ins v9.s [ 2 ], v6.s [ 0 ]
160
- ins v11.s [ 2 ], v6.s [ 1 ]
161
- ins v9.s [ 3 ], v7.s [ 0 ]
162
- ins v11.s [ 3 ], v7.s [ 1 ]
137
+
138
+ zip1 v12.4s , v4.4s , v5.4s
139
+ zip1 v13.4s , v6.4s , v7.4s
140
+
141
+ zip1 v9.2d , v12.2d , v13.2d
142
+ zip2 v11.2d , v12.2d , v13.2d
163
143
164
144
st1 {v8.4s , v9.4s , v10.4s , v11.4s} , [ B00 ], # 64
165
145
.endm
@@ -191,43 +171,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
191
171
.macro COPY4x4
192
172
ldr q0 , [ A01 ], # 16
193
173
ldr q1 , [ A02 ], # 16
194
- ins v8.s [ 0 ], v0.s [ 0 ]
195
- ins v9.s [ 0 ], v0.s [ 1 ]
196
- ins v10.s [ 0 ], v0.s [ 2 ]
197
- ins v11.s [ 0 ], v0.s [ 3 ]
198
- ins v8.s [ 1 ], v1.s [ 0 ]
199
- ins v9.s [ 1 ], v1.s [ 1 ]
200
- ins v10.s [ 1 ], v1.s [ 2 ]
201
- ins v11.s [ 1 ], v1.s [ 3 ]
202
-
203
174
ldr q2 , [ A03 ], # 16
204
175
ldr q3 , [ A04 ], # 16
205
- ins v8.s [ 2 ], v2.s [ 0 ]
206
- ins v9.s [ 2 ], v2.s [ 1 ]
207
- ins v10.s [ 2 ], v2.s [ 2 ]
208
- ins v11.s [ 2 ], v2.s [ 3 ]
209
- ins v8.s [ 3 ], v3.s [ 0 ]
210
- ins v9.s [ 3 ], v3.s [ 1 ]
211
- ins v10.s [ 3 ], v3.s [ 2 ]
212
- ins v11.s [ 3 ], v3.s [ 3 ]
176
+
177
+ zip1 v12.4s , v0.4s , v1.4s
178
+ zip1 v13.4s , v2.4s , v3.4s
179
+ zip2 v14.4s , v0.4s , v1.4s
180
+ zip2 v15.4s , v2.4s , v3.4s
181
+
182
+ zip1 v8.2d , v12.2d , v13.2d
183
+ zip2 v9.2d , v12.2d , v13.2d
184
+ zip1 v10.2d , v14.2d , v15.2d
185
+ zip2 v11.2d , v14.2d , v15.2d
213
186
214
187
st1 {v8.4s , v9.4s , v10.4s , v11.4s} , [ B00 ], # 64
215
188
.endm
216
189
217
190
.macro COPY2x4
218
191
ldr d0 , [ A01 ], # 8
219
192
ldr d1 , [ A02 ], # 8
220
- ins v8.s [ 0 ], v0.s [ 0 ]
221
- ins v9.s [ 0 ], v0.s [ 1 ]
222
- ins v8.s [ 1 ], v1.s [ 0 ]
223
- ins v9.s [ 1 ], v1.s [ 1 ]
224
-
225
193
ldr d2 , [ A03 ], # 8
226
194
ldr d3 , [ A04 ], # 8
227
- ins v8.s [ 2 ], v2.s [ 0 ]
228
- ins v9.s [ 2 ], v2.s [ 1 ]
229
- ins v8.s [ 3 ], v3.s [ 0 ]
230
- ins v9.s [ 3 ], v3.s [ 1 ]
195
+
196
+ zip1 v10.4s , v0.4s , v1.4s
197
+ zip1 v11.4s , v2.4s , v3.4s
198
+
199
+ zip1 v8.2d , v10.2d , v11.2d
200
+ zip2 v9.2d , v10.2d , v11.2d
231
201
232
202
st1 {v8.4s , v9.4s} , [ B00 ], # 32
233
203
.endm
@@ -249,25 +219,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
249
219
.macro COPY4x2
250
220
ldr q0 , [ A01 ], # 16
251
221
ldr q1 , [ A02 ], # 16
252
- ins v8.s [ 0 ], v0.s [ 0 ]
253
- ins v9.s [ 0 ] , v0.s [ 1 ]
254
- ins v10.s [ 0 ] , v0.s [ 2 ]
255
- ins v11.s [ 0 ], v0.s [ 3 ]
256
- ins v8.s [ 1 ], v1.s [ 0 ]
257
- ins v9.s [ 1 ], v1.s [ 1 ]
258
- ins v10.s [ 1 ], v1.s [ 2 ]
259
- ins v11.s [ 1 ], v1.s [ 3 ]
222
+
223
+ zip1 v12.4s , v0.4s , v1.4s
224
+ zip2 v13.4s , v0.4s , v1.4s
225
+
226
+ dup v8.2d , v12.d [ 0 ]
227
+ dup v9.2d , v12.d [ 1 ]
228
+ dup v10.2d , v13.d [ 0 ]
229
+ dup v11.2d , v13.d [ 1 ]
260
230
261
231
st1 {v8.2s , v9.2s , v10.2s , v11.2s} , [ B00 ], # 32
262
232
.endm
263
233
264
234
.macro COPY2x2
265
235
ldr d0 , [ A01 ], # 8
266
236
ldr d1 , [ A02 ], # 8
267
- ins v8.s [ 0 ], v0.s [ 0 ]
268
- ins v9.s [ 0 ], v0.s [ 1 ]
269
- ins v8.s [ 1 ], v1.s [ 0 ]
270
- ins v9.s [ 1 ], v1.s [ 1 ]
237
+
238
+ zip1 v8.2s , v0.2s , v1.2s
239
+ zip2 v9.2s , v0.2s , v1.2s
271
240
272
241
st1 {v8.2s , v9.2s} , [ B00 ], # 16
273
242
.endm
0 commit comments