Refactor to wrap specific implementation

kgryte · kgryte · commit 951685e606d1 · 2020-06-17T12:20:01.000-07:00
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/README.md b/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/README.md
@@ -165,14 +165,8 @@ console.log( v );
 
 <!-- /.examples -->
 
-* * *
-
 <section class="references">
 
-## References
-
--   Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050][@higham:1993a].
-
 </section>
 
 <!-- /.references -->
@@ -183,8 +177,6 @@ console.log( v );
 
 [mdn-typed-array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
 
-[@higham:1993a]: https://doi.org/10.1137/0914050
-
 </section>
 
 <!-- /.links -->
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/lib/ndarray.js b/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/lib/ndarray.js
@@ -20,30 +20,14 @@
 
 // MODULES //
 
-var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
-var isnanf = require( '@stdlib/math/base/assert/is-nanf' );
-var floor = require( '@stdlib/math/base/special/floor' );
-
-
-// VARIABLES //
-
-// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
-var BLOCKSIZE = 128;
+var sdsnansumpw = require( '@stdlib/blas/ext/base/sdsnansumpw' ).ndarray;
 
 
 // MAIN //
 
 /**
 * Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
 *
-* ## Method
-*
-* -   This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
-*
-* ## References
-*
-* -   Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
-*
 * @param {PositiveInteger} N - number of indexed elements
 * @param {Float32Array} x - input array
 * @param {integer} stride - stride length
@@ -61,95 +45,7 @@ var BLOCKSIZE = 128;
 * // returns 5.0
 */
 function sdsnansum( N, x, stride, offset ) {
-	var ix;
-	var s0;
-	var s1;
-	var s2;
-	var s3;
-	var s4;
-	var s5;
-	var s6;
-	var s7;
-	var M;
-	var s;
-	var n;
-	var i;
-
-	if ( N <= 0 ) {
-		return 0.0;
-	}
-	if ( N === 1 || stride === 0 ) {
-		if ( isnanf( x[ offset ] ) ) {
-			return 0.0;
-		}
-		return x[ offset ];
-	}
-	ix = offset;
-	if ( N < 8 ) {
-		// Use simple summation...
-		s = 0.0;
-		for ( i = 0; i < N; i++ ) {
-			if ( isnanf( x[ ix ] ) === false ) {
-				s += x[ ix ];
-			}
-			ix += stride;
-		}
-		return float64ToFloat32( s );
-	}
-	if ( N <= BLOCKSIZE ) {
-		// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
-		s0 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s1 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s2 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s3 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s4 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s5 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s6 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-		s7 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-		ix += stride;
-
-		M = N % 8;
-		for ( i = 8; i < N-M; i += 8 ) {
-			s0 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s1 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s2 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s3 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s4 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s5 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s6 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-			s7 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ];
-			ix += stride;
-		}
-		// Pairwise sum the accumulators:
-		s = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
-
-		// Clean-up loop...
-		for ( i; i < N; i++ ) {
-			if ( isnanf( x[ ix ] ) === false ) {
-				s += x[ ix ];
-			}
-			ix += stride;
-		}
-		return float64ToFloat32( s );
-	}
-	// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
-	n = floor( N/2 );
-	n -= n % 8;
-	return float64ToFloat32( sdsnansum( n, x, stride, ix ) + sdsnansum( N-n, x, stride, ix+(n*stride) ) ); // eslint-disable-line max-len
+	return sdsnansumpw( N, x, stride, offset );
 }
 
 
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/lib/sdsnansum.js b/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/lib/sdsnansum.js
@@ -20,24 +20,14 @@
 
 // MODULES //
 
-var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
-var isnanf = require( '@stdlib/math/base/assert/is-nanf' );
-var sum = require( './ndarray.js' );
+var sdsnansumpw = require( '@stdlib/blas/ext/base/sdsnansumpw' );
 
 
 // MAIN //
 
 /**
 * Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
 *
-* ## Method
-*
-* -   This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
-*
-* ## References
-*
-* -   Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
-*
 * @param {PositiveInteger} N - number of indexed elements
 * @param {Float32Array} x - input array
 * @param {integer} stride - stride length
@@ -53,36 +43,7 @@ var sum = require( './ndarray.js' );
 * // returns 1.0
 */
 function sdsnansum( N, x, stride ) {
-	var ix;
-	var s;
-	var i;
-
-	if ( N <= 0 ) {
-		return 0.0;
-	}
-	if ( N === 1 || stride === 0 ) {
-		if ( isnanf( x[ 0 ] ) ) {
-			return 0.0;
-		}
-		return x[ 0 ];
-	}
-	if ( stride < 0 ) {
-		ix = (1-N) * stride;
-	} else {
-		ix = 0;
-	}
-	if ( N < 8 ) {
-		// Use simple summation...
-		s = 0.0;
-		for ( i = 0; i < N; i++ ) {
-			if ( isnanf( x[ ix ] ) === false ) {
-				s += x[ ix ];
-			}
-			ix += stride;
-		}
-		return float64ToFloat32( s );
-	}
-	return sum( N, x, stride, ix );
+	return sdsnansumpw( N, x, stride );
 }
 
 
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/manifest.json b/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/manifest.json
@@ -35,7 +35,7 @@
 			],
 			"libpath": [],
 			"dependencies": [
-				"@stdlib/math/base/assert/is-nanf"
+				"@stdlib/blas/ext/base/sdsnansumpw"
 			]
 		}
 	]
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/package.json b/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/package.json
@@ -63,8 +63,6 @@
     "sum",
     "total",
     "summation",
-    "pairwise",
-    "pw",
     "strided",
     "strided array",
     "typed",
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/src/sdsnansum.c b/lib/node_modules/@stdlib/blas/ext/base/sdsnansum/src/sdsnansum.c
@@ -17,127 +17,17 @@
 */
 
 #include "stdlib/blas/ext/base/sdsnansum.h"
-#include "stdlib/math/base/assert/is_nanf.h"
+#include "stdlib/blas/ext/base/sdsnansumpw.h"
 #include <stdint.h>
 
 /**
 * Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation.
 *
-* ## Method
-*
-* -   This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`.
-*
-* ## References
-*
-* -   Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050).
-*
 * @param N       number of indexed elements
 * @param X       input array
 * @param stride  stride length
 * @return        output value
 */
 float stdlib_strided_sdsnansum( const int64_t N, const float *X, const int64_t stride ) {
-	float *xp1;
-	float *xp2;
-	double sum;
-	int64_t ix;
-	int64_t M;
-	int64_t n;
-	int64_t i;
-	double s0;
-	double s1;
-	double s2;
-	double s3;
-	double s4;
-	double s5;
-	double s6;
-	double s7;
-
-	if ( N <= 0 ) {
-		return 0.0f;
-	}
-	if ( N == 1 || stride == 0 ) {
-		if ( stdlib_base_is_nanf( X[ 0 ] ) ) {
-			return 0.0f;
-		}
-		return X[ 0 ];
-	}
-	if ( stride < 0 ) {
-		ix = (1-N) * stride;
-	} else {
-		ix = 0;
-	}
-	if ( N < 8 ) {
-		// Use simple summation...
-		sum = 0.0;
-		for ( i = 0; i < N; i++ ) {
-			if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
-				sum += X[ ix ];
-			}
-			ix += stride;
-		}
-		return sum;
-	}
-	// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
-	if ( N <= 128 ) {
-		// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
-		s0 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s1 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s2 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s3 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s4 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s5 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s6 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-		s7 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-		ix += stride;
-
-		M = N % 8;
-		for ( i = 8; i < N-M; i += 8 ) {
-			s0 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s1 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s2 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s3 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s4 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s5 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s6 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-			s7 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : X[ ix ];
-			ix += stride;
-		}
-		// Pairwise sum the accumulators:
-		sum = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7));
-
-		// Clean-up loop...
-		for (; i < N; i++ ) {
-			if ( !stdlib_base_is_nanf( X[ ix ] ) ) {
-				sum += X[ ix ];
-			}
-			ix += stride;
-		}
-		return sum;
-	}
-	// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
-	n = N / 2;
-	n -= n % 8;
-	if ( stride < 0 ) {
-		xp1 = (float *)X + ( (n-N)*stride );
-		xp2 = (float *)X;
-	} else {
-		xp1 = (float *)X;
-		xp2 = (float *)X + ( n*stride );
-	}
-	return stdlib_strided_sdsnansum( n, xp1, stride ) + stdlib_strided_sdsnansum( N-n, xp2, stride );
+	return stdlib_strided_sdsnansumpw( N, X, stride );
 }

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@`
`35`	`35`	`],`
`36`	`36`	`"libpath": [],`
`37`	`37`	`"dependencies": [`
`38`		`- "@stdlib/math/base/assert/is-nanf"`
	`38`	`+ "@stdlib/blas/ext/base/sdsnansumpw"`
`39`	`39`	`]`
`40`	`40`	`}`
`41`	`41`	`]`