You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: lib/node_modules/@stdlib/blas/ext/base/dsnansum/README.md
-8
Original file line number
Diff line number
Diff line change
@@ -165,14 +165,8 @@ console.log( v );
165
165
166
166
<!-- /.examples -->
167
167
168
-
* * *
169
-
170
168
<sectionclass="references">
171
169
172
-
## References
173
-
174
-
- Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050][@higham:1993a].
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using extended accumulation, and returning an extended precision result.
31
30
*
32
-
* ## Method
33
-
*
34
-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
35
-
*
36
-
* ## References
37
-
*
38
-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
39
-
*
40
31
* @param {PositiveInteger} N - number of indexed elements
41
32
* @param {Float32Array} x - input array
42
33
* @param {integer} stride - stride length
@@ -52,36 +43,7 @@ var sum = require( './ndarray.js' );
// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using extended accumulation, and returning an extended precision result.
37
30
*
38
-
* ## Method
39
-
*
40
-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
41
-
*
42
-
* ## References
43
-
*
44
-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
45
-
*
46
31
* @param {PositiveInteger} N - number of indexed elements
47
32
* @param {Float32Array} x - input array
48
33
* @param {integer} stride - stride length
@@ -60,95 +45,7 @@ var BLOCKSIZE = 128;
60
45
* // returns 5.0
61
46
*/
62
47
functiondsnansum(N,x,stride,offset){
63
-
varix;
64
-
vars0;
65
-
vars1;
66
-
vars2;
67
-
vars3;
68
-
vars4;
69
-
vars5;
70
-
vars6;
71
-
vars7;
72
-
varM;
73
-
vars;
74
-
varn;
75
-
vari;
76
-
77
-
if(N<=0){
78
-
return0.0;
79
-
}
80
-
if(N===1||stride===0){
81
-
if(isnanf(x[offset])){
82
-
return0.0;
83
-
}
84
-
returnx[offset];
85
-
}
86
-
ix=offset;
87
-
if(N<8){
88
-
// Use simple summation...
89
-
s=0.0;
90
-
for(i=0;i<N;i++){
91
-
if(isnanf(x[ix])===false){
92
-
s+=x[ix];
93
-
}
94
-
ix+=stride;
95
-
}
96
-
returns;
97
-
}
98
-
if(N<=BLOCKSIZE){
99
-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
100
-
s0=(isnanf(x[ix])) ? 0.0 : x[ix];
101
-
ix+=stride;
102
-
s1=(isnanf(x[ix])) ? 0.0 : x[ix];
103
-
ix+=stride;
104
-
s2=(isnanf(x[ix])) ? 0.0 : x[ix];
105
-
ix+=stride;
106
-
s3=(isnanf(x[ix])) ? 0.0 : x[ix];
107
-
ix+=stride;
108
-
s4=(isnanf(x[ix])) ? 0.0 : x[ix];
109
-
ix+=stride;
110
-
s5=(isnanf(x[ix])) ? 0.0 : x[ix];
111
-
ix+=stride;
112
-
s6=(isnanf(x[ix])) ? 0.0 : x[ix];
113
-
ix+=stride;
114
-
s7=(isnanf(x[ix])) ? 0.0 : x[ix];
115
-
ix+=stride;
116
-
117
-
M=N%8;
118
-
for(i=8;i<N-M;i+=8){
119
-
s0+=(isnanf(x[ix])) ? 0.0 : x[ix];
120
-
ix+=stride;
121
-
s1+=(isnanf(x[ix])) ? 0.0 : x[ix];
122
-
ix+=stride;
123
-
s2+=(isnanf(x[ix])) ? 0.0 : x[ix];
124
-
ix+=stride;
125
-
s3+=(isnanf(x[ix])) ? 0.0 : x[ix];
126
-
ix+=stride;
127
-
s4+=(isnanf(x[ix])) ? 0.0 : x[ix];
128
-
ix+=stride;
129
-
s5+=(isnanf(x[ix])) ? 0.0 : x[ix];
130
-
ix+=stride;
131
-
s6+=(isnanf(x[ix])) ? 0.0 : x[ix];
132
-
ix+=stride;
133
-
s7+=(isnanf(x[ix])) ? 0.0 : x[ix];
134
-
ix+=stride;
135
-
}
136
-
// Pairwise sum the accumulators:
137
-
s=((s0+s1)+(s2+s3))+((s4+s5)+(s6+s7));
138
-
139
-
// Clean-up loop...
140
-
for(i;i<N;i++){
141
-
if(isnanf(x[ix])===false){
142
-
s+=x[ix];
143
-
}
144
-
ix+=stride;
145
-
}
146
-
returns;
147
-
}
148
-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
Copy file name to clipboardExpand all lines: lib/node_modules/@stdlib/blas/ext/base/dsnansum/src/dsnansum.c
+2-112
Original file line number
Diff line number
Diff line change
@@ -17,127 +17,17 @@
17
17
*/
18
18
19
19
#include"stdlib/blas/ext/base/dsnansum.h"
20
-
#include"stdlib/math/base/assert/is_nanf.h"
20
+
#include"stdlib/blas/ext/base/dsnansumpw.h"
21
21
#include<stdint.h>
22
22
23
23
/**
24
24
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using extended accumulation, and returning an extended precision result.
25
25
*
26
-
* ## Method
27
-
*
28
-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
29
-
*
30
-
* ## References
31
-
*
32
-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
82
-
if ( N <= 128 ) {
83
-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
84
-
s0= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
85
-
ix+=stride;
86
-
s1= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
87
-
ix+=stride;
88
-
s2= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
89
-
ix+=stride;
90
-
s3= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
91
-
ix+=stride;
92
-
s4= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
93
-
ix+=stride;
94
-
s5= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
95
-
ix+=stride;
96
-
s6= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
97
-
ix+=stride;
98
-
s7= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
99
-
ix+=stride;
100
-
101
-
M=N % 8;
102
-
for ( i=8; i<N-M; i+=8 ) {
103
-
s0+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
104
-
ix+=stride;
105
-
s1+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
106
-
ix+=stride;
107
-
s2+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
108
-
ix+=stride;
109
-
s3+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
110
-
ix+=stride;
111
-
s4+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
112
-
ix+=stride;
113
-
s5+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
114
-
ix+=stride;
115
-
s6+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
116
-
ix+=stride;
117
-
s7+= ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
118
-
ix+=stride;
119
-
}
120
-
// Pairwise sum the accumulators:
121
-
sum= ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
122
-
123
-
// Clean-up loop...
124
-
for (; i<N; i++ ) {
125
-
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
126
-
sum+= (double)X[ ix ];
127
-
}
128
-
ix+=stride;
129
-
}
130
-
returnsum;
131
-
}
132
-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
133
-
n=N / 2;
134
-
n-=n % 8;
135
-
if ( stride<0 ) {
136
-
xp1= (float*)X+ ( (n-N)*stride );
137
-
xp2= (float*)X;
138
-
} else {
139
-
xp1= (float*)X;
140
-
xp2= (float*)X+ ( n*stride );
141
-
}
142
-
returnstdlib_strided_dsnansum( n, xp1, stride ) +stdlib_strided_dsnansum( N-n, xp2, stride );
0 commit comments