@@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52
52
bge $r0, N, .L999
53
53
bge $r0, INCX, .L999
54
54
li.d TEMP, 1
55
+ ld.d t1, $sp, 0 // Load dummp2
55
56
movgr2fr.d a1, $r0
56
57
FFINT a1, a1
57
58
movgr2fr.d a2, TEMP
58
59
FFINT a2, a2
59
60
slli.d TEMP, TEMP, BASE_SHIFT
60
61
slli.d INCX, INCX, BASE_SHIFT
62
+ slli.d t1, t1, BASE_SHIFT
63
+ CMPEQ $fcc0, ALPHA, a1
64
+ bcnez $fcc0, .L20 //ALPHA==0
61
65
CMPEQ $fcc0, ALPHA, a2
62
66
bcnez $fcc0, .L999 //ALPHA==1 return
63
-
67
+ .L1:
64
68
srai.d I, N, 3
65
- beq INCX, TEMP, .L30 //ALPHA!= 1 and INCX==1
69
+ beq INCX, TEMP, .L30 //ALPHA !=0| 1 and INCX==1
66
70
MTG TEMP, ALPHA
67
71
#ifdef DOUBLE
68
72
xvreplgr2vr.d VALPHA, TEMP
@@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
72
76
move XX, X
73
77
.align 3
74
78
75
- .L10: //ALPHA!= 1 and INCX!=1
79
+ .L10: //ALPHA !=0| 1 and INCX!=1
76
80
bge $r0, I, .L32
77
81
.align 3
78
82
.L11:
@@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
165
169
blt $r0, I, .L11
166
170
b .L32
167
171
.align 3
172
+
173
+ .L20:
174
+ beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0
175
+ srai.d I, N, 3
176
+ beq INCX, TEMP, .L24
177
+ bge $r0, I, .L22
178
+ .align 3
179
+
180
+ .L21:
181
+ ST a1, X, 0
182
+ add .d X, X, INCX
183
+ ST a1, X, 0
184
+ add .d X, X, INCX
185
+ ST a1, X, 0
186
+ add .d X, X, INCX
187
+ ST a1, X, 0
188
+ add .d X, X, INCX
189
+ ST a1, X, 0
190
+ add .d X, X, INCX
191
+ ST a1, X, 0
192
+ add .d X, X, INCX
193
+ ST a1, X, 0
194
+ add .d X, X, INCX
195
+ ST a1, X, 0
196
+ add .d X, X, INCX
197
+ addi.d I, I, -1
198
+ blt $r0, I, .L21
199
+ .align 3
200
+
201
+ .L22:
202
+ andi I, N, 7
203
+ bge $r0, I, .L999
204
+ .align 3
205
+
206
+ .L23:
207
+ ST a1, X, 0 * SIZE
208
+ addi.d I, I, -1
209
+ add .d X, X, INCX
210
+ blt $r0, I, .L23
211
+ jirl $r0, $r1, 0
212
+ .align 3
213
+
214
+ .L24:
215
+ bge $r0, I, .L26 /*N<8 INCX==1*/
216
+ .align 3
217
+ .L25:
218
+ xvxor.v VX0, VX0, VX0
219
+ xvst VX0, X, 0 * SIZE
220
+ #ifdef DOUBLE
221
+ xvst VX0, X, 4 * SIZE
222
+ #endif
223
+ addi.d I, I, -1
224
+ addi.d X, X, 8 * SIZE
225
+ blt $r0, I, .L25
226
+ .align 3
227
+
228
+ .L26:
229
+ andi I, N, 7
230
+ bge $r0, I, .L999
231
+ .align 3
232
+
233
+ .L27:
234
+ ST a1, X, 0 * SIZE
235
+ addi.d I, I, -1
236
+ addi.d X, X, SIZE
237
+ blt $r0, I, .L27
238
+ jirl $r0, $r1, 0
239
+ .align 3
240
+
168
241
.L30:
169
242
bge $r0, I, .L32/*N<8 INCX==1*/
170
243
MTG TEMP, ALPHA
0 commit comments