Skip to content

Commit 3f39c8f

Browse files
committed
LoongArch: Fixed numpy CI failure
1 parent f3cebb3 commit 3f39c8f

File tree

3 files changed

+233
-5
lines changed

3 files changed

+233
-5
lines changed

kernel/loongarch64/scal.S

+77
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,86 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5353
PROLOGUE
5454

5555
li.d TEMP, SIZE
56+
ld.d XX, $sp, 0 // Load dummy2
57+
slli.d XX, XX, BASE_SHIFT
5658
MTC a1, $r0
5759
slli.d INCX, INCX, BASE_SHIFT
5860
bge $r0, N, .L999
61+
CMPEQ $fcc0, ALPHA, a1
62+
bceqz $fcc0, .L50
63+
beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0
64+
srai.d I, N, 3
65+
bne INCX, TEMP, .L20
66+
bge $r0, I, .L15
67+
.align 3
68+
69+
.L12:
70+
ST a1, X, 0 * SIZE
71+
ST a1, X, 1 * SIZE
72+
ST a1, X, 2 * SIZE
73+
ST a1, X, 3 * SIZE
74+
ST a1, X, 4 * SIZE
75+
ST a1, X, 5 * SIZE
76+
ST a1, X, 6 * SIZE
77+
ST a1, X, 7 * SIZE
78+
addi.w I, I, -1
79+
addi.d X, X, 8 * SIZE
80+
blt $r0, I, .L12
81+
.align 3
82+
83+
.L15:
84+
andi I, N, 7
85+
bge $r0, I, .L999
86+
.align 3
87+
.L16:
88+
ST a1, X, 0 * SIZE
89+
addi.d I, I, -1
90+
addi.d X, X, SIZE
91+
blt $r0, I, .L16
92+
move $r4, $r17
93+
fmov.d $f0, $f22
94+
jirl $r0, $r1, 0x0
95+
.align 3
96+
97+
.L20:
98+
srai.d I, N, 3
99+
bge $r0, I, .L25
100+
.align 3
101+
102+
.L22:
103+
ST a1, X, 0 * SIZE
104+
add.d X, X, INCX
105+
ST a1, X, 0 * SIZE
106+
add.d X, X, INCX
107+
ST a1, X, 0 * SIZE
108+
add.d X, X, INCX
109+
ST a1, X, 0 * SIZE
110+
add.d X, X, INCX
111+
ST a1, X, 0 * SIZE
112+
add.d X, X, INCX
113+
ST a1, X, 0 * SIZE
114+
add.d X, X, INCX
115+
ST a1, X, 0 * SIZE
116+
add.d X, X, INCX
117+
ST a1, X, 0 * SIZE
118+
addi.d I, I, -1
119+
add.d X, X, INCX
120+
blt $r0, I, .L22
121+
.align 3
122+
123+
.L25:
124+
andi I, N, 7
125+
bge $r0, I, .L999
126+
.align 3
127+
.L26:
128+
addi.d I, I, -1
129+
ST a1, X, 0 * SIZE
130+
add.d X, X, INCX
131+
blt $r0, I, .L26
132+
move $r4, $r17
133+
fmov.d $f0, $f22
134+
jirl $r0, $r1, 0x0
135+
.align 3
59136

60137
.L50:
61138
srai.d I, N, 3

kernel/loongarch64/scal_lasx.S

+76-3
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5252
bge $r0, N, .L999
5353
bge $r0, INCX, .L999
5454
li.d TEMP, 1
55+
ld.d t1, $sp, 0 // Load dummp2
5556
movgr2fr.d a1, $r0
5657
FFINT a1, a1
5758
movgr2fr.d a2, TEMP
5859
FFINT a2, a2
5960
slli.d TEMP, TEMP, BASE_SHIFT
6061
slli.d INCX, INCX, BASE_SHIFT
62+
slli.d t1, t1, BASE_SHIFT
63+
CMPEQ $fcc0, ALPHA, a1
64+
bcnez $fcc0, .L20 //ALPHA==0
6165
CMPEQ $fcc0, ALPHA, a2
6266
bcnez $fcc0, .L999 //ALPHA==1 return
63-
67+
.L1:
6468
srai.d I, N, 3
65-
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1
69+
beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1
6670
MTG TEMP, ALPHA
6771
#ifdef DOUBLE
6872
xvreplgr2vr.d VALPHA, TEMP
@@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7276
move XX, X
7377
.align 3
7478

75-
.L10: //ALPHA!=1 and INCX!=1
79+
.L10: //ALPHA !=0|1 and INCX!=1
7680
bge $r0, I, .L32
7781
.align 3
7882
.L11:
@@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
165169
blt $r0, I, .L11
166170
b .L32
167171
.align 3
172+
173+
.L20:
174+
beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0
175+
srai.d I, N, 3
176+
beq INCX, TEMP, .L24
177+
bge $r0, I, .L22
178+
.align 3
179+
180+
.L21:
181+
ST a1, X, 0
182+
add.d X, X, INCX
183+
ST a1, X, 0
184+
add.d X, X, INCX
185+
ST a1, X, 0
186+
add.d X, X, INCX
187+
ST a1, X, 0
188+
add.d X, X, INCX
189+
ST a1, X, 0
190+
add.d X, X, INCX
191+
ST a1, X, 0
192+
add.d X, X, INCX
193+
ST a1, X, 0
194+
add.d X, X, INCX
195+
ST a1, X, 0
196+
add.d X, X, INCX
197+
addi.d I, I, -1
198+
blt $r0, I, .L21
199+
.align 3
200+
201+
.L22:
202+
andi I, N, 7
203+
bge $r0, I, .L999
204+
.align 3
205+
206+
.L23:
207+
ST a1, X, 0 * SIZE
208+
addi.d I, I, -1
209+
add.d X, X, INCX
210+
blt $r0, I, .L23
211+
jirl $r0, $r1, 0
212+
.align 3
213+
214+
.L24:
215+
bge $r0, I, .L26 /*N<8 INCX==1*/
216+
.align 3
217+
.L25:
218+
xvxor.v VX0, VX0, VX0
219+
xvst VX0, X, 0 * SIZE
220+
#ifdef DOUBLE
221+
xvst VX0, X, 4 * SIZE
222+
#endif
223+
addi.d I, I, -1
224+
addi.d X, X, 8 * SIZE
225+
blt $r0, I, .L25
226+
.align 3
227+
228+
.L26:
229+
andi I, N, 7
230+
bge $r0, I, .L999
231+
.align 3
232+
233+
.L27:
234+
ST a1, X, 0 * SIZE
235+
addi.d I, I, -1
236+
addi.d X, X, SIZE
237+
blt $r0, I, .L27
238+
jirl $r0, $r1, 0
239+
.align 3
240+
168241
.L30:
169242
bge $r0, I, .L32/*N<8 INCX==1*/
170243
MTG TEMP, ALPHA

kernel/loongarch64/scal_lsx.S

+80-2
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5151

5252
bge $r0, N, .L999
5353
bge $r0, INCX, .L999
54+
ld.d t1, $sp, 0 // Load dummy2
5455
li.d TEMP, 1
5556
movgr2fr.d a1, $r0
5657
FFINT a1, a1
5758
movgr2fr.d a2, TEMP
5859
FFINT a2, a2
5960
slli.d TEMP, TEMP, BASE_SHIFT
6061
slli.d INCX, INCX, BASE_SHIFT
62+
slli.d t1, t1, BASE_SHIFT
63+
CMPEQ $fcc0, ALPHA, a1
64+
bcnez $fcc0, .L20 //ALPHA==0
6165
CMPEQ $fcc0, ALPHA, a2
6266
bcnez $fcc0, .L999 //ALPHA==1 return
67+
.L1:
6368
srai.d I, N, 3
64-
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1
69+
beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1
6570
MTG TEMP, ALPHA
6671
#ifdef DOUBLE
6772
vreplgr2vr.d VALPHA, TEMP
@@ -71,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7176
move XX, X
7277
.align 3
7378

74-
.L10: //ALPHA!=1 and INCX!=1
79+
.L10: //ALPHA !=0|1 and INCX!=1
7580
bge $r0, I, .L32
7681
.align 3
7782

@@ -169,6 +174,79 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
169174
b .L32
170175
.align 3
171176

177+
.L20:
178+
beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0
179+
srai.d I, N, 3
180+
beq INCX, TEMP, .L24
181+
bge $r0, I, .L22
182+
.align 3
183+
184+
.L21:
185+
ST a1, X, 0
186+
add.d X, X, INCX
187+
ST a1, X, 0
188+
add.d X, X, INCX
189+
ST a1, X, 0
190+
add.d X, X, INCX
191+
ST a1, X, 0
192+
add.d X, X, INCX
193+
ST a1, X, 0
194+
add.d X, X, INCX
195+
ST a1, X, 0
196+
add.d X, X, INCX
197+
ST a1, X, 0
198+
add.d X, X, INCX
199+
ST a1, X, 0
200+
add.d X, X, INCX
201+
addi.d I, I, -1
202+
blt $r0, I, .L21
203+
.align 3
204+
205+
.L22:
206+
andi I, N, 7
207+
bge $r0, I, .L999
208+
.align 3
209+
210+
.L23:
211+
ST a1, X, 0 * SIZE
212+
addi.d I, I, -1
213+
add.d X, X, INCX
214+
blt $r0, I, .L23
215+
jirl $r0, $r1, 0
216+
.align 3
217+
218+
.L24:
219+
bge $r0, I, .L26 /*N<8 INCX==1*/
220+
.align 3
221+
222+
.L25:
223+
vxor.v VX0, VX0, VX0
224+
vst VX0, X, 0 * SIZE
225+
#ifdef DOUBLE
226+
vst VX0, X, 2 * SIZE
227+
vst VX0, X, 4 * SIZE
228+
vst VX0, X, 6 * SIZE
229+
#else
230+
vst VX0, X, 4 * SIZE
231+
#endif
232+
addi.d I, I, -1
233+
addi.d X, X, 8 * SIZE
234+
blt $r0, I, .L25
235+
.align 3
236+
237+
.L26:
238+
andi I, N, 7
239+
bge $r0, I, .L999
240+
.align 3
241+
242+
.L27:
243+
ST a1, X, 0 * SIZE
244+
addi.d I, I, -1
245+
addi.d X, X, SIZE
246+
blt $r0, I, .L27
247+
jirl $r0, $r1, 0
248+
.align 3
249+
172250
.L30:
173251
bge $r0, I, .L32/*N<8 INCX==1*/
174252
MTG TEMP, ALPHA

0 commit comments

Comments
 (0)