Skip to content

Commit e099afd

Browse files
committed
Refactor to wrap a specific implementation
1 parent d6d2112 commit e099afd

File tree

6 files changed

+7
-268
lines changed

6 files changed

+7
-268
lines changed

lib/node_modules/@stdlib/blas/ext/base/dsnansum/README.md

-8
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,8 @@ console.log( v );
165165

166166
<!-- /.examples -->
167167

168-
* * *
169-
170168
<section class="references">
171169

172-
## References
173-
174-
- Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050][@higham:1993a].
175-
176170
</section>
177171

178172
<!-- /.references -->
@@ -183,8 +177,6 @@ console.log( v );
183177

184178
[mdn-typed-array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
185179

186-
[@higham:1993a]: https://doi.org/10.1137/0914050
187-
188180
</section>
189181

190182
<!-- /.links -->

lib/node_modules/@stdlib/blas/ext/base/dsnansum/lib/dsnansum.js

+2-40
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,14 @@
2020

2121
// MODULES //
2222

23-
var isnanf = require( '@stdlib/math/base/assert/is-nanf' );
24-
var sum = require( './ndarray.js' );
23+
var dsnansumpw = require( '@stdlib/blas/ext/base/dsnansumpw' );
2524

2625

2726
// MAIN //
2827

2928
/**
3029
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using extended accumulation, and returning an extended precision result.
3130
*
32-
* ## Method
33-
*
34-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
35-
*
36-
* ## References
37-
*
38-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
39-
*
4031
* @param {PositiveInteger} N - number of indexed elements
4132
* @param {Float32Array} x - input array
4233
* @param {integer} stride - stride length
@@ -52,36 +43,7 @@ var sum = require( './ndarray.js' );
5243
* // returns 1.0
5344
*/
5445
function dsnansum( N, x, stride ) {
55-
var ix;
56-
var s;
57-
var i;
58-
59-
if ( N <= 0 ) {
60-
return 0.0;
61-
}
62-
if ( N === 1 || stride === 0 ) {
63-
if ( isnanf( x[ 0 ] ) ) {
64-
return 0.0;
65-
}
66-
return x[ 0 ];
67-
}
68-
if ( stride < 0 ) {
69-
ix = (1-N) * stride;
70-
} else {
71-
ix = 0;
72-
}
73-
if ( N < 8 ) {
74-
// Use simple summation...
75-
s = 0.0;
76-
for ( i = 0; i < N; i++ ) {
77-
if ( isnanf( x[ ix ] ) === false ) {
78-
s += x[ ix ];
79-
}
80-
ix += stride;
81-
}
82-
return s;
83-
}
84-
return sum( N, x, stride, ix );
46+
return dsnansumpw( N, x, stride );
8547
}
8648

8749

lib/node_modules/@stdlib/blas/ext/base/dsnansum/lib/ndarray.js

+2-105
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,14 @@
2020

2121
// MODULES //
2222

23-
var isnanf = require( '@stdlib/math/base/assert/is-nanf' );
24-
var floor = require( '@stdlib/math/base/special/floor' );
25-
26-
27-
// VARIABLES //
28-
29-
// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
30-
var BLOCKSIZE = 128;
23+
var dsnansumpw = require( '@stdlib/blas/ext/base/dsnansumpw' ).ndarray;
3124

3225

3326
// MAIN //
3427

3528
/**
3629
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using extended accumulation, and returning an extended precision result.
3730
*
38-
* ## Method
39-
*
40-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
41-
*
42-
* ## References
43-
*
44-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
45-
*
4631
* @param {PositiveInteger} N - number of indexed elements
4732
* @param {Float32Array} x - input array
4833
* @param {integer} stride - stride length
@@ -60,95 +45,7 @@ var BLOCKSIZE = 128;
6045
* // returns 5.0
6146
*/
6247
function dsnansum( N, x, stride, offset ) {
63-
var ix;
64-
var s0;
65-
var s1;
66-
var s2;
67-
var s3;
68-
var s4;
69-
var s5;
70-
var s6;
71-
var s7;
72-
var M;
73-
var s;
74-
var n;
75-
var i;
76-
77-
if ( N <= 0 ) {
78-
return 0.0;
79-
}
80-
if ( N === 1 || stride === 0 ) {
81-
if ( isnanf( x[ offset ] ) ) {
82-
return 0.0;
83-
}
84-
return x[ offset ];
85-
}
86-
ix = offset;
87-
if ( N < 8 ) {
88-
// Use simple summation...
89-
s = 0.0;
90-
for ( i = 0; i < N; i++ ) {
91-
if ( isnanf( x[ ix ] ) === false ) {
92-
s += x[ ix ];
93-
}
94-
ix += stride;
95-
}
96-
return s;
97-
}
98-
if ( N <= BLOCKSIZE ) {
99-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
100-
s0 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
101-
ix += stride;
102-
s1 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
103-
ix += stride;
104-
s2 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
105-
ix += stride;
106-
s3 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
107-
ix += stride;
108-
s4 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
109-
ix += stride;
110-
s5 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
111-
ix += stride;
112-
s6 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
113-
ix += stride;
114-
s7 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
115-
ix += stride;
116-
117-
M = N % 8;
118-
for ( i = 8; i < N-M; i += 8 ) {
119-
s0 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
120-
ix += stride;
121-
s1 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
122-
ix += stride;
123-
s2 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
124-
ix += stride;
125-
s3 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
126-
ix += stride;
127-
s4 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
128-
ix += stride;
129-
s5 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
130-
ix += stride;
131-
s6 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
132-
ix += stride;
133-
s7 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
134-
ix += stride;
135-
}
136-
// Pairwise sum the accumulators:
137-
s = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
138-
139-
// Clean-up loop...
140-
for ( i; i < N; i++ ) {
141-
if ( isnanf( x[ ix ] ) === false ) {
142-
s += x[ ix ];
143-
}
144-
ix += stride;
145-
}
146-
return s;
147-
}
148-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
149-
n = floor( N/2 );
150-
n -= n % 8;
151-
return dsnansum( n, x, stride, ix ) + dsnansum( N-n, x, stride, ix+(n*stride) ); // eslint-disable-line max-len
48+
return dsnansumpw( N, x, stride, offset );
15249
}
15350

15451

lib/node_modules/@stdlib/blas/ext/base/dsnansum/manifest.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
],
3636
"libpath": [],
3737
"dependencies": [
38-
"@stdlib/math/base/assert/is-nanf"
38+
"@stdlib/blas/ext/base/dsnansumpw"
3939
]
4040
}
4141
]

lib/node_modules/@stdlib/blas/ext/base/dsnansum/package.json

-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@
6363
"sum",
6464
"total",
6565
"summation",
66-
"pairwise",
67-
"pw",
6866
"strided",
6967
"strided array",
7068
"typed",

lib/node_modules/@stdlib/blas/ext/base/dsnansum/src/dsnansum.c

+2-112
Original file line numberDiff line numberDiff line change
@@ -17,127 +17,17 @@
1717
*/
1818

1919
#include "stdlib/blas/ext/base/dsnansum.h"
20-
#include "stdlib/math/base/assert/is_nanf.h"
20+
#include "stdlib/blas/ext/base/dsnansumpw.h"
2121
#include <stdint.h>
2222

2323
/**
2424
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using extended accumulation, and returning an extended precision result.
2525
*
26-
* ## Method
27-
*
28-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
29-
*
30-
* ## References
31-
*
32-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
33-
*
3426
* @param N number of indexed elements
3527
* @param X input array
3628
* @param stride stride length
3729
* @return output value
3830
*/
3931
double stdlib_strided_dsnansum( const int64_t N, const float *X, const int64_t stride ) {
40-
float *xp1;
41-
float *xp2;
42-
double sum;
43-
int64_t ix;
44-
int64_t M;
45-
int64_t n;
46-
int64_t i;
47-
double s0;
48-
double s1;
49-
double s2;
50-
double s3;
51-
double s4;
52-
double s5;
53-
double s6;
54-
double s7;
55-
56-
if ( N <= 0 ) {
57-
return 0.0;
58-
}
59-
if ( N == 1 || stride == 0 ) {
60-
if ( stdlib_base_is_nanf( X[ 0 ] ) ) {
61-
return 0.0;
62-
}
63-
return X[ 0 ];
64-
}
65-
if ( stride < 0 ) {
66-
ix = (1-N) * stride;
67-
} else {
68-
ix = 0;
69-
}
70-
if ( N < 8 ) {
71-
// Use simple summation...
72-
sum = 0.0;
73-
for ( i = 0; i < N; i++ ) {
74-
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
75-
sum += (double)X[ ix ];
76-
}
77-
ix += stride;
78-
}
79-
return sum;
80-
}
81-
// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
82-
if ( N <= 128 ) {
83-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
84-
s0 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
85-
ix += stride;
86-
s1 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
87-
ix += stride;
88-
s2 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
89-
ix += stride;
90-
s3 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
91-
ix += stride;
92-
s4 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
93-
ix += stride;
94-
s5 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
95-
ix += stride;
96-
s6 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
97-
ix += stride;
98-
s7 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
99-
ix += stride;
100-
101-
M = N % 8;
102-
for ( i = 8; i < N-M; i += 8 ) {
103-
s0 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
104-
ix += stride;
105-
s1 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
106-
ix += stride;
107-
s2 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
108-
ix += stride;
109-
s3 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
110-
ix += stride;
111-
s4 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
112-
ix += stride;
113-
s5 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
114-
ix += stride;
115-
s6 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
116-
ix += stride;
117-
s7 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ];
118-
ix += stride;
119-
}
120-
// Pairwise sum the accumulators:
121-
sum = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
122-
123-
// Clean-up loop...
124-
for (; i < N; i++ ) {
125-
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
126-
sum += (double)X[ ix ];
127-
}
128-
ix += stride;
129-
}
130-
return sum;
131-
}
132-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
133-
n = N / 2;
134-
n -= n % 8;
135-
if ( stride < 0 ) {
136-
xp1 = (float *)X + ( (n-N)*stride );
137-
xp2 = (float *)X;
138-
} else {
139-
xp1 = (float *)X;
140-
xp2 = (float *)X + ( n*stride );
141-
}
142-
return stdlib_strided_dsnansum( n, xp1, stride ) + stdlib_strided_dsnansum( N-n, xp2, stride );
32+
return stdlib_strided_dsnansumpw( N, X, stride );
14333
}

0 commit comments

Comments
 (0)