Skip to content

Commit 951685e

Browse files
committed
Refactor to wrap specific implementation
1 parent d8d87b3 commit 951685e

File tree

6 files changed

+7
-270
lines changed

6 files changed

+7
-270
lines changed

lib/node_modules/@stdlib/blas/ext/base/sdsnansum/README.md

-8
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,8 @@ console.log( v );
165165

166166
<!-- /.examples -->
167167

168-
* * *
169-
170168
<section class="references">
171169

172-
## References
173-
174-
- Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050][@higham:1993a].
175-
176170
</section>
177171

178172
<!-- /.references -->
@@ -183,8 +177,6 @@ console.log( v );
183177

184178
[mdn-typed-array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
185179

186-
[@higham:1993a]: https://doi.org/10.1137/0914050
187-
188180
</section>
189181

190182
<!-- /.links -->

lib/node_modules/@stdlib/blas/ext/base/sdsnansum/lib/ndarray.js

+2-106
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,14 @@
2020

2121
// MODULES //
2222

23-
var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
24-
var isnanf = require( '@stdlib/math/base/assert/is-nanf' );
25-
var floor = require( '@stdlib/math/base/special/floor' );
26-
27-
28-
// VARIABLES //
29-
30-
// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
31-
var BLOCKSIZE = 128;
23+
var sdsnansumpw = require( '@stdlib/blas/ext/base/sdsnansumpw' ).ndarray;
3224

3325

3426
// MAIN //
3527

3628
/**
3729
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
3830
*
39-
* ## Method
40-
*
41-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
42-
*
43-
* ## References
44-
*
45-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
46-
*
4731
* @param {PositiveInteger} N - number of indexed elements
4832
* @param {Float32Array} x - input array
4933
* @param {integer} stride - stride length
@@ -61,95 +45,7 @@ var BLOCKSIZE = 128;
6145
* // returns 5.0
6246
*/
6347
function sdsnansum( N, x, stride, offset ) {
64-
var ix;
65-
var s0;
66-
var s1;
67-
var s2;
68-
var s3;
69-
var s4;
70-
var s5;
71-
var s6;
72-
var s7;
73-
var M;
74-
var s;
75-
var n;
76-
var i;
77-
78-
if ( N <= 0 ) {
79-
return 0.0;
80-
}
81-
if ( N === 1 || stride === 0 ) {
82-
if ( isnanf( x[ offset ] ) ) {
83-
return 0.0;
84-
}
85-
return x[ offset ];
86-
}
87-
ix = offset;
88-
if ( N < 8 ) {
89-
// Use simple summation...
90-
s = 0.0;
91-
for ( i = 0; i < N; i++ ) {
92-
if ( isnanf( x[ ix ] ) === false ) {
93-
s += x[ ix ];
94-
}
95-
ix += stride;
96-
}
97-
return float64ToFloat32( s );
98-
}
99-
if ( N <= BLOCKSIZE ) {
100-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
101-
s0 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
102-
ix += stride;
103-
s1 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
104-
ix += stride;
105-
s2 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
106-
ix += stride;
107-
s3 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
108-
ix += stride;
109-
s4 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
110-
ix += stride;
111-
s5 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
112-
ix += stride;
113-
s6 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
114-
ix += stride;
115-
s7 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
116-
ix += stride;
117-
118-
M = N % 8;
119-
for ( i = 8; i < N-M; i += 8 ) {
120-
s0 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
121-
ix += stride;
122-
s1 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
123-
ix += stride;
124-
s2 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
125-
ix += stride;
126-
s3 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
127-
ix += stride;
128-
s4 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
129-
ix += stride;
130-
s5 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
131-
ix += stride;
132-
s6 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
133-
ix += stride;
134-
s7 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
135-
ix += stride;
136-
}
137-
// Pairwise sum the accumulators:
138-
s = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
139-
140-
// Clean-up loop...
141-
for ( i; i < N; i++ ) {
142-
if ( isnanf( x[ ix ] ) === false ) {
143-
s += x[ ix ];
144-
}
145-
ix += stride;
146-
}
147-
return float64ToFloat32( s );
148-
}
149-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
150-
n = floor( N/2 );
151-
n -= n % 8;
152-
return float64ToFloat32( sdsnansum( n, x, stride, ix ) + sdsnansum( N-n, x, stride, ix+(n*stride) ) ); // eslint-disable-line max-len
48+
return sdsnansumpw( N, x, stride, offset );
15349
}
15450

15551

lib/node_modules/@stdlib/blas/ext/base/sdsnansum/lib/sdsnansum.js

+2-41
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,14 @@
2020

2121
// MODULES //
2222

23-
var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
24-
var isnanf = require( '@stdlib/math/base/assert/is-nanf' );
25-
var sum = require( './ndarray.js' );
23+
var sdsnansumpw = require( '@stdlib/blas/ext/base/sdsnansumpw' );
2624

2725

2826
// MAIN //
2927

3028
/**
3129
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
3230
*
33-
* ## Method
34-
*
35-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
36-
*
37-
* ## References
38-
*
39-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
40-
*
4131
* @param {PositiveInteger} N - number of indexed elements
4232
* @param {Float32Array} x - input array
4333
* @param {integer} stride - stride length
@@ -53,36 +43,7 @@ var sum = require( './ndarray.js' );
5343
* // returns 1.0
5444
*/
5545
function sdsnansum( N, x, stride ) {
56-
var ix;
57-
var s;
58-
var i;
59-
60-
if ( N <= 0 ) {
61-
return 0.0;
62-
}
63-
if ( N === 1 || stride === 0 ) {
64-
if ( isnanf( x[ 0 ] ) ) {
65-
return 0.0;
66-
}
67-
return x[ 0 ];
68-
}
69-
if ( stride < 0 ) {
70-
ix = (1-N) * stride;
71-
} else {
72-
ix = 0;
73-
}
74-
if ( N < 8 ) {
75-
// Use simple summation...
76-
s = 0.0;
77-
for ( i = 0; i < N; i++ ) {
78-
if ( isnanf( x[ ix ] ) === false ) {
79-
s += x[ ix ];
80-
}
81-
ix += stride;
82-
}
83-
return float64ToFloat32( s );
84-
}
85-
return sum( N, x, stride, ix );
46+
return sdsnansumpw( N, x, stride );
8647
}
8748

8849

lib/node_modules/@stdlib/blas/ext/base/sdsnansum/manifest.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
],
3636
"libpath": [],
3737
"dependencies": [
38-
"@stdlib/math/base/assert/is-nanf"
38+
"@stdlib/blas/ext/base/sdsnansumpw"
3939
]
4040
}
4141
]

lib/node_modules/@stdlib/blas/ext/base/sdsnansum/package.json

-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@
6363
"sum",
6464
"total",
6565
"summation",
66-
"pairwise",
67-
"pw",
6866
"strided",
6967
"strided array",
7068
"typed",

lib/node_modules/@stdlib/blas/ext/base/sdsnansum/src/sdsnansum.c

+2-112
Original file line numberDiff line numberDiff line change
@@ -17,127 +17,17 @@
1717
*/
1818

1919
#include "stdlib/blas/ext/base/sdsnansum.h"
20-
#include "stdlib/math/base/assert/is_nanf.h"
20+
#include "stdlib/blas/ext/base/sdsnansumpw.h"
2121
#include <stdint.h>
2222

2323
/**
2424
* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
2525
*
26-
* ## Method
27-
*
28-
* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
29-
*
30-
* ## References
31-
*
32-
* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
33-
*
3426
* @param N number of indexed elements
3527
* @param X input array
3628
* @param stride stride length
3729
* @return output value
3830
*/
3931
float stdlib_strided_sdsnansum( const int64_t N, const float *X, const int64_t stride ) {
40-
float *xp1;
41-
float *xp2;
42-
double sum;
43-
int64_t ix;
44-
int64_t M;
45-
int64_t n;
46-
int64_t i;
47-
double s0;
48-
double s1;
49-
double s2;
50-
double s3;
51-
double s4;
52-
double s5;
53-
double s6;
54-
double s7;
55-
56-
if ( N <= 0 ) {
57-
return 0.0f;
58-
}
59-
if ( N == 1 || stride == 0 ) {
60-
if ( stdlib_base_is_nanf( X[ 0 ] ) ) {
61-
return 0.0f;
62-
}
63-
return X[ 0 ];
64-
}
65-
if ( stride < 0 ) {
66-
ix = (1-N) * stride;
67-
} else {
68-
ix = 0;
69-
}
70-
if ( N < 8 ) {
71-
// Use simple summation...
72-
sum = 0.0;
73-
for ( i = 0; i < N; i++ ) {
74-
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
75-
sum += X[ ix ];
76-
}
77-
ix += stride;
78-
}
79-
return sum;
80-
}
81-
// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
82-
if ( N <= 128 ) {
83-
// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
84-
s0 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
85-
ix += stride;
86-
s1 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
87-
ix += stride;
88-
s2 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
89-
ix += stride;
90-
s3 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
91-
ix += stride;
92-
s4 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
93-
ix += stride;
94-
s5 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
95-
ix += stride;
96-
s6 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
97-
ix += stride;
98-
s7 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
99-
ix += stride;
100-
101-
M = N % 8;
102-
for ( i = 8; i < N-M; i += 8 ) {
103-
s0 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
104-
ix += stride;
105-
s1 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
106-
ix += stride;
107-
s2 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
108-
ix += stride;
109-
s3 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
110-
ix += stride;
111-
s4 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
112-
ix += stride;
113-
s5 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
114-
ix += stride;
115-
s6 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
116-
ix += stride;
117-
s7 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
118-
ix += stride;
119-
}
120-
// Pairwise sum the accumulators:
121-
sum = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
122-
123-
// Clean-up loop...
124-
for (; i < N; i++ ) {
125-
if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
126-
sum += X[ ix ];
127-
}
128-
ix += stride;
129-
}
130-
return sum;
131-
}
132-
// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
133-
n = N / 2;
134-
n -= n % 8;
135-
if ( stride < 0 ) {
136-
xp1 = (float *)X + ( (n-N)*stride );
137-
xp2 = (float *)X;
138-
} else {
139-
xp1 = (float *)X;
140-
xp2 = (float *)X + ( n*stride );
141-
}
142-
return stdlib_strided_sdsnansum( n, xp1, stride ) + stdlib_strided_sdsnansum( N-n, xp2, stride );
32+
return stdlib_strided_sdsnansumpw( N, X, stride );
14333
}

0 commit comments

Comments
 (0)