Skip to content

Commit 4e3afa7

Browse files
authored
Merge pull request #5175 from shubhamsvc/dgemv_thread_throttling
Add thread throttling profile for DGEMV on NEOVERSEV1
2 parents 51c244a + 8e289ec commit 4e3afa7

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

interface/gemv.c

+18-7
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
7070

7171
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
7272
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
73-
return
74-
MN < 25600L ? 1
75-
: MN < 63001L ? MIN(ncpu, 4)
76-
: MN < 459684L ? MIN(ncpu, 16)
77-
: ncpu;
73+
#ifdef DOUBLE
74+
return (MN < 8100L) ? 1
75+
: (MN < 12100L) ? MIN(ncpu, 2)
76+
: (MN < 36100L) ? MIN(ncpu, 4)
77+
: (MN < 84100L) ? MIN(ncpu, 8)
78+
: (MN < 348100L) ? MIN(ncpu, 16)
79+
: (MN < 435600L) ? MIN(ncpu, 24)
80+
: (MN < 810000L) ? MIN(ncpu, 32)
81+
: (MN < 1050625L) ? MIN(ncpu, 40)
82+
: ncpu;
83+
#else
84+
return (MN < 25600L) ? 1
85+
: (MN < 63001L) ? MIN(ncpu, 4)
86+
: (MN < 459684L) ? MIN(ncpu, 16)
87+
: ncpu;
88+
#endif
7889
}
7990
#endif
8091

@@ -96,11 +107,11 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
96107
return num_cpu_avail(4);
97108
return 1;
98109
#endif
99-
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
110+
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16)
100111
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
101112
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
102113
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
103-
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
114+
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16)
104115
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
105116
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
106117
}

0 commit comments

Comments
 (0)