You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: lib/node_modules/@stdlib/blas/ext/base/sdsnansum/README.md
-8
Original file line number
Diff line number
Diff line change
@@ -165,14 +165,8 @@ console.log( v );
165
165
166
166
<!-- /.examples -->
167
167
168
-
* * *
169
-
170
168
<sectionclass="references">
171
169
172
-
## References
173
-
174
-
- Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050][@higham:1993a].
// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
38
30
*
39
-
* ## Method
40
-
*
41
-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
42
-
*
43
-
* ## References
44
-
*
45
-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
46
-
*
47
31
* @param {PositiveInteger} N - number of indexed elements
48
32
* @param {Float32Array} x - input array
49
33
* @param {integer} stride - stride length
@@ -61,95 +45,7 @@ var BLOCKSIZE = 128;
61
45
* // returns 5.0
62
46
*/
63
47
functionsdsnansum(N,x,stride,offset){
64
-
varix;
65
-
vars0;
66
-
vars1;
67
-
vars2;
68
-
vars3;
69
-
vars4;
70
-
vars5;
71
-
vars6;
72
-
vars7;
73
-
varM;
74
-
vars;
75
-
varn;
76
-
vari;
77
-
78
-
if(N<=0){
79
-
return0.0;
80
-
}
81
-
if(N===1||stride===0){
82
-
if(isnanf(x[offset])){
83
-
return0.0;
84
-
}
85
-
returnx[offset];
86
-
}
87
-
ix=offset;
88
-
if(N<8){
89
-
// Use simple summation...
90
-
s=0.0;
91
-
for(i=0;i<N;i++){
92
-
if(isnanf(x[ix])===false){
93
-
s+=x[ix];
94
-
}
95
-
ix+=stride;
96
-
}
97
-
returnfloat64ToFloat32(s);
98
-
}
99
-
if(N<=BLOCKSIZE){
100
-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
101
-
s0=(isnanf(x[ix])) ? 0.0 : x[ix];
102
-
ix+=stride;
103
-
s1=(isnanf(x[ix])) ? 0.0 : x[ix];
104
-
ix+=stride;
105
-
s2=(isnanf(x[ix])) ? 0.0 : x[ix];
106
-
ix+=stride;
107
-
s3=(isnanf(x[ix])) ? 0.0 : x[ix];
108
-
ix+=stride;
109
-
s4=(isnanf(x[ix])) ? 0.0 : x[ix];
110
-
ix+=stride;
111
-
s5=(isnanf(x[ix])) ? 0.0 : x[ix];
112
-
ix+=stride;
113
-
s6=(isnanf(x[ix])) ? 0.0 : x[ix];
114
-
ix+=stride;
115
-
s7=(isnanf(x[ix])) ? 0.0 : x[ix];
116
-
ix+=stride;
117
-
118
-
M=N%8;
119
-
for(i=8;i<N-M;i+=8){
120
-
s0+=(isnanf(x[ix])) ? 0.0 : x[ix];
121
-
ix+=stride;
122
-
s1+=(isnanf(x[ix])) ? 0.0 : x[ix];
123
-
ix+=stride;
124
-
s2+=(isnanf(x[ix])) ? 0.0 : x[ix];
125
-
ix+=stride;
126
-
s3+=(isnanf(x[ix])) ? 0.0 : x[ix];
127
-
ix+=stride;
128
-
s4+=(isnanf(x[ix])) ? 0.0 : x[ix];
129
-
ix+=stride;
130
-
s5+=(isnanf(x[ix])) ? 0.0 : x[ix];
131
-
ix+=stride;
132
-
s6+=(isnanf(x[ix])) ? 0.0 : x[ix];
133
-
ix+=stride;
134
-
s7+=(isnanf(x[ix])) ? 0.0 : x[ix];
135
-
ix+=stride;
136
-
}
137
-
// Pairwise sum the accumulators:
138
-
s=((s0+s1)+(s2+s3))+((s4+s5)+(s6+s7));
139
-
140
-
// Clean-up loop...
141
-
for(i;i<N;i++){
142
-
if(isnanf(x[ix])===false){
143
-
s+=x[ix];
144
-
}
145
-
ix+=stride;
146
-
}
147
-
returnfloat64ToFloat32(s);
148
-
}
149
-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
32
30
*
33
-
* ## Method
34
-
*
35
-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
36
-
*
37
-
* ## References
38
-
*
39
-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
40
-
*
41
31
* @param {PositiveInteger} N - number of indexed elements
42
32
* @param {Float32Array} x - input array
43
33
* @param {integer} stride - stride length
@@ -53,36 +43,7 @@ var sum = require( './ndarray.js' );
Copy file name to clipboardExpand all lines: lib/node_modules/@stdlib/blas/ext/base/sdsnansum/src/sdsnansum.c
+2-112
Original file line number
Diff line number
Diff line change
@@ -17,127 +17,17 @@
17
17
*/
18
18
19
19
#include"stdlib/blas/ext/base/sdsnansum.h"
20
-
#include"stdlib/math/base/assert/is_nanf.h"
20
+
#include"stdlib/blas/ext/base/sdsnansumpw.h"
21
21
#include<stdint.h>
22
22
23
23
/**
24
24
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
25
25
*
26
-
* ## Method
27
-
*
28
-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
29
-
*
30
-
* ## References
31
-
*
32
-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
82
-
if ( N <= 128 ) {
83
-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
84
-
s0= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
85
-
ix+=stride;
86
-
s1= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
87
-
ix+=stride;
88
-
s2= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
89
-
ix+=stride;
90
-
s3= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
91
-
ix+=stride;
92
-
s4= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
93
-
ix+=stride;
94
-
s5= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
95
-
ix+=stride;
96
-
s6= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
97
-
ix+=stride;
98
-
s7= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
99
-
ix+=stride;
100
-
101
-
M=N % 8;
102
-
for ( i=8; i<N-M; i+=8 ) {
103
-
s0+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
104
-
ix+=stride;
105
-
s1+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
106
-
ix+=stride;
107
-
s2+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
108
-
ix+=stride;
109
-
s3+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
110
-
ix+=stride;
111
-
s4+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
112
-
ix+=stride;
113
-
s5+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
114
-
ix+=stride;
115
-
s6+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
116
-
ix+=stride;
117
-
s7+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
118
-
ix+=stride;
119
-
}
120
-
// Pairwise sum the accumulators:
121
-
sum= ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
122
-
123
-
// Clean-up loop...
124
-
for (; i<N; i++ ) {
125
-
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
126
-
sum+=X[ ix ];
127
-
}
128
-
ix+=stride;
129
-
}
130
-
returnsum;
131
-
}
132
-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
133
-
n=N / 2;
134
-
n-=n % 8;
135
-
if ( stride<0 ) {
136
-
xp1= (float*)X+ ( (n-N)*stride );
137
-
xp2= (float*)X;
138
-
} else {
139
-
xp1= (float*)X;
140
-
xp2= (float*)X+ ( n*stride );
141
-
}
142
-
returnstdlib_strided_sdsnansum( n, xp1, stride ) +stdlib_strided_sdsnansum( N-n, xp2, stride );
0 commit comments