@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
99
99
temp7 += vx1 * va7_1 + vx2 * va7_2 ;
100
100
}
101
101
102
-
102
+ #if defined(POWER8 )
103
+ y [0 ] += alpha * (temp0 [0 ] + temp0 [1 ]+ temp0 [2 ] + temp0 [3 ]);
104
+ y [1 ] += alpha * (temp1 [0 ] + temp1 [1 ]+ temp1 [2 ] + temp1 [3 ]);
105
+ y [2 ] += alpha * (temp2 [0 ] + temp2 [1 ]+ temp2 [2 ] + temp2 [3 ]);
106
+ y [3 ] += alpha * (temp3 [0 ] + temp3 [1 ]+ temp3 [2 ] + temp3 [3 ]);
107
+
108
+ y [4 ] += alpha * (temp4 [0 ] + temp4 [1 ]+ temp4 [2 ] + temp4 [3 ]);
109
+ y [5 ] += alpha * (temp5 [0 ] + temp5 [1 ]+ temp5 [2 ] + temp5 [3 ]);
110
+ y [6 ] += alpha * (temp6 [0 ] + temp6 [1 ]+ temp6 [2 ] + temp6 [3 ]);
111
+ y [7 ] += alpha * (temp7 [0 ] + temp7 [1 ]+ temp7 [2 ] + temp7 [3 ]);
112
+ #else
103
113
register __vector float t0 , t1 , t2 , t3 ;
104
114
register __vector float a = { alpha , alpha , alpha , alpha };
105
115
__vector float * v_y = (__vector float * ) y ;
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
126
136
127
137
v_y [0 ] += a * temp0 ;
128
138
v_y [1 ] += a * temp4 ;
129
-
139
+ #endif
130
140
}
131
141
132
142
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
153
163
temp2 += v_x [i ] * va2 [i ] + v_x [i + 1 ] * va2 [i + 1 ];
154
164
temp3 += v_x [i ] * va3 [i ] + v_x [i + 1 ] * va3 [i + 1 ];
155
165
}
156
-
166
+
167
+ #if defined(POWER8 )
168
+ y [0 ] += alpha * (temp0 [0 ] + temp0 [1 ]+ temp0 [2 ] + temp0 [3 ]);
169
+ y [1 ] += alpha * (temp1 [0 ] + temp1 [1 ]+ temp1 [2 ] + temp1 [3 ]);
170
+ y [2 ] += alpha * (temp2 [0 ] + temp2 [1 ]+ temp2 [2 ] + temp2 [3 ]);
171
+ y [3 ] += alpha * (temp3 [0 ] + temp3 [1 ]+ temp3 [2 ] + temp3 [3 ]);
172
+ #else
157
173
register __vector float t0 , t1 , t2 , t3 ;
158
174
register __vector float a = { alpha , alpha , alpha , alpha };
159
175
__vector float * v_y = (__vector float * ) y ;
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
169
185
temp0 += temp1 + temp2 + temp3 ;
170
186
171
187
v_y [0 ] += a * temp0 ;
172
-
188
+ #endif
173
189
}
174
190
175
191
0 commit comments