-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
Copy pathgemv_t_loongson3a.c
93 lines (85 loc) · 2.41 KB
/
gemv_t_loongson3a.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include "common.h"
//These are auto-tuning codes on Loongson-3A platform.
//#define prefetch(x) __builtin_prefetch(x)
//#define prefetch(x) do {_mm_prefetch((char *)(x), _MM_HINT_T0);} while(0)
#define prefetch(x) __asm__ __volatile__("ld $0, %0"::"m"(x))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define spec_loop_alpha1 do {Y[k] += A[LDA * j + i] * X[i]; i++;} while(0)
#define spec_loop do {Y[k] += ALPHA * A[LDA * j + i] * X[i]; i++;} while(0)
#define norm_loop_alpha1 do {Y[k] += A[LDA * j + i] * X[h]; i++; h += INCX;} while(0)
#define norm_loop do {Y[k] += ALPHA * A[LDA * j + i] * X[h]; i++; h += INCX;} while(0)
int CNAME(BLASLONG M, BLASLONG N, BLASLONG UNUSED, FLOAT ALPHA, FLOAT *A, BLASLONG LDA, FLOAT *X, BLASLONG INCX, FLOAT *Y, BLASLONG INCY, FLOAT *BUFFER) {
if(!ALPHA)
return 0;
// if(INCX < 0)
// INCX = -INCX;
// if(INCY < 0)
// INCY = -INCY;
BLASLONG fahead = 30;
BLASLONG spec_unroll = 3;
BLASLONG tMQ = M - M % spec_unroll;
BLASLONG j = 0, k = 0;
if(ALPHA == 1) {
if(INCX == 1) {
for(; likely(j < N); j++, k += INCY) {
BLASLONG i = 0;
for(; likely(i < tMQ);) {
prefetch(A[LDA * j + i + fahead]);
prefetch(X[i + fahead]);
/*loop_mark*/ spec_loop_alpha1;
/*loop_mark*/ spec_loop_alpha1;
/*loop_mark*/ spec_loop_alpha1;
}
for(; likely(i < M);) {
spec_loop_alpha1;
}
}
} else {
for(; likely(j < N); j++, k += INCY) {
BLASLONG i = 0, h = 0;
for(; likely(i < tMQ);) {
prefetch(A[LDA * j + i + fahead]);
prefetch(X[h + fahead]);
/*loop_mark*/ norm_loop_alpha1;
/*loop_mark*/ norm_loop_alpha1;
/*loop_mark*/ norm_loop_alpha1;
}
for(; likely(i < M);) {
norm_loop_alpha1;
}
}
}
} else {
if(INCX == 1) {
for(; likely(j < N); j++, k += INCY) {
BLASLONG i = 0;
for(; likely(i < tMQ);) {
prefetch(A[LDA * j + i + fahead]);
prefetch(X[i + fahead]);
/*loop_mark*/ spec_loop;
/*loop_mark*/ spec_loop;
/*loop_mark*/ spec_loop;
}
for(; likely(i < M);) {
spec_loop;
}
}
} else {
for(; likely(j < N); j++, k += INCY) {
BLASLONG i = 0, h = 0;
for(; likely(i < tMQ);) {
prefetch(A[LDA * j + i + fahead]);
prefetch(X[h + fahead]);
/*loop_mark*/ norm_loop;
/*loop_mark*/ norm_loop;
/*loop_mark*/ norm_loop;
}
for(; likely(i < M);) {
norm_loop;
}
}
}
}
return 0;
}