Skip to content

Commit 5c98c2a

Browse files
committed
Add package to split a string into array of sentences
1 parent 79789b1 commit 5c98c2a

File tree

10 files changed

+742
-0
lines changed

10 files changed

+742
-0
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
<!--
2+
3+
@license Apache-2.0
4+
5+
Copyright (c) 2023 The Stdlib Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
# sentencize
22+
23+
> Split a string into an array of sentences.
24+
25+
<section class="intro">
26+
27+
</section>
28+
29+
<!-- /.intro -->
30+
31+
<section class="usage">
32+
33+
## Usage
34+
35+
```javascript
36+
var sentencize = require( '@stdlib/nlp/sentencize' );
37+
```
38+
39+
#### sentencize( str )
40+
41+
Splits a string into an array of sentences.
42+
43+
```javascript
44+
var str = 'Hello Mrs. Maple, could you call me back? I need to talk to you about something.';
45+
var out = sentencize( str );
46+
// returns [ 'Hello Mrs. Maple, could you call me back?', 'I need to talk to you about something.' ]
47+
48+
str = 'As of 2.14.2023, the average price of apples is $1.31 per pound! Inflation has been a strain on the economy.';
49+
out = sentencize( str );
50+
// returns [ 'As of 2.14.2023, the average price of apples is $1.31 per pound!', 'Inflation has been a strain on the economy.' ]
51+
```
52+
53+
</section>
54+
55+
<!-- /.usage -->
56+
57+
<section class="examples">
58+
59+
## Examples
60+
61+
<!-- eslint no-undef: "error" -->
62+
63+
```javascript
64+
var sentencize = require( '@stdlib/nlp/sentencize' );
65+
66+
var sentences = [
67+
'Dr. John Smith, Jr. who is a renowned expert in the field of Artificial Intelligence, and who has published numerous papers on the subject, e.g. "Machine Learning for Dummies", "The Future of AI", etc., gave a lecture at the annual AI conference yesterday and stated that AI technology is rapidly advancing, but we must be cautious of its potential consequences such as job displacement, privacy concerns, and ethical dilemmas.',
68+
'In addition, AI also has many benefits such as improved efficiency and accuracy, and the ability to process large amounts of data.',
69+
'Dr. Smith emphasized that it is crucial for society to have open and honest discussions about the development and deployment of AI to ensure its responsible and beneficial use for all.',
70+
'He also stressed the importance of considering the implications and consequences of AI, i.e. it must not be developed or used in a way that is harmful or unethical.',
71+
'The audience was in agreement, nodding their heads and taking notes throughout the lecture.'
72+
];
73+
var str = sentences.join( ' ' );
74+
75+
var out = sentencize( str );
76+
// returns [...]
77+
78+
var bool = ( out.length === 5 );
79+
// returns true
80+
81+
bool = ( out[ 0 ] === sentences[ 0 ] );
82+
// returns true
83+
```
84+
85+
</section>
86+
87+
<!-- /.examples -->
88+
89+
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
90+
91+
<section class="related">
92+
93+
</section>
94+
95+
<!-- /.related -->
96+
97+
<!-- Section for all links. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
98+
99+
<section class="links">
100+
101+
</section>
102+
103+
<!-- /.links -->
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2023 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var bench = require( '@stdlib/bench' );
24+
var isArray = require( '@stdlib/assert/is-array' );
25+
var fromCodePoint = require( '@stdlib/string/from-code-point' );
26+
var pkg = require( './../package.json' ).name;
27+
var sentencize = require( './../lib' );
28+
29+
30+
// MAIN //
31+
32+
bench( pkg, function benchmark( b ) {
33+
var speech;
34+
var out;
35+
var str;
36+
var i;
37+
38+
speech = 'To be, or not to be, that is the question. Whether tis nobler in the mind to suffer. The slings and arrows of outrageous fortune. Or to take arms against a sea of troubles. And by opposing end them. To die, to sleep, no more.';
39+
40+
b.tic();
41+
for ( i = 0; i < b.iterations; i++ ) {
42+
str = speech + '(' + fromCodePoint( i%126 ) + ')';
43+
out = sentencize( str );
44+
if ( typeof out !== 'object' ) {
45+
b.fail( 'should return an array' );
46+
}
47+
}
48+
b.toc();
49+
if ( !isArray( out ) ) {
50+
b.fail( 'should return an array' );
51+
}
52+
b.pass( 'benchmark finished' );
53+
b.end();
54+
});
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
{{alias}}( str )
3+
Splits a string into an array of sentences.
4+
5+
Parameters
6+
----------
7+
str: string
8+
Input string.
9+
10+
Returns
11+
-------
12+
out: Array
13+
Array of sentences.
14+
15+
Examples
16+
--------
17+
> var out = {{alias}}( 'Hello Mrs. Maple, could you call me back?' )
18+
[ 'Hello Mrs. Maple, could you call me back?' ]
19+
20+
> out = {{alias}}( 'Hello World! How are you?' )
21+
[ 'Hello World!', 'How are you?' ]
22+
23+
See Also
24+
--------
25+
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2023 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
// TypeScript Version: 2.0
20+
21+
/**
22+
* Splits a string into an array of sentences.
23+
*
24+
* @param str - input string
25+
* @returns array of sentences
26+
*
27+
* @example
28+
* var str = 'Hello World! How are you?';
29+
* var out = sentencize( str );
30+
* // returns [ 'Hello World!', 'How are you?' ]
31+
*
32+
* @example
33+
* var str = '';
34+
* var out = sentencize( str );
35+
* // returns []
36+
*
37+
* @example
38+
* var str = 'Hello Mrs. Maple, could you call me back?';
39+
* var out = sentencize( str );
40+
* // returns [ 'Hello Mrs. Maple, could you call me back?' ]
41+
*/
42+
declare function sentencize( str: string ): Array<string>;
43+
44+
45+
// EXPORTS //
46+
47+
export = sentencize;
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2023 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import sentencize = require( './index' );
20+
21+
22+
// TESTS //
23+
24+
// The function returns an array of strings...
25+
{
26+
sentencize( 'Hello World! How are you?' ); // $ExpectType string[]
27+
}
28+
29+
// The compiler throws an error if the function is provided values of invalid types...
30+
{
31+
sentencize( true ); // $ExpectError
32+
sentencize( false ); // $ExpectError
33+
sentencize( 3 ); // $ExpectError
34+
sentencize( [] ); // $ExpectError
35+
sentencize( {} ); // $ExpectError
36+
sentencize( ( x: number ): number => x ); // $ExpectError
37+
}
38+
39+
// The compiler throws an error if the function is provided insufficient arguments...
40+
{
41+
sentencize(); // $ExpectError
42+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2023 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
var sentencize = require( './../lib' );
22+
23+
var sentences = [
24+
'Dr. John Smith, Jr. who is a renowned expert in the field of Artificial Intelligence, and who has published numerous papers on the subject, e.g. "Machine Learning for Dummies", "The Future of AI", etc., gave a lecture at the annual AI conference yesterday and stated that AI technology is rapidly advancing, but we must be cautious of its potential consequences such as job displacement, privacy concerns, and ethical dilemmas.',
25+
'In addition, AI also has many benefits such as improved efficiency and accuracy, and the ability to process large amounts of data.',
26+
'Dr. Smith emphasized that it is crucial for society to have open and honest discussions about the development and deployment of AI to ensure its responsible and beneficial use for all.',
27+
'He also stressed the importance of considering the implications and consequences of AI, i.e. it must not be developed or used in a way that is harmful or unethical.',
28+
'The audience was in agreement, nodding their heads and taking notes throughout the lecture.'
29+
];
30+
var str = sentences.join( ' ' );
31+
32+
var out = sentencize( str );
33+
console.log( out );
34+
// => [...]
35+
36+
var bool = ( out.length === 5 );
37+
console.log( bool );
38+
// => true
39+
40+
bool = ( out[ 0 ] === sentences[ 0 ] );
41+
console.log( bool );
42+
// => true
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2023 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
/**
22+
* Split a string into an array of sentences.
23+
*
24+
* @module @stdlib/nlp/sentencize
25+
*
26+
* @example
27+
* var sentencize = require( '@stdlib/nlp/sentencize' );
28+
*
29+
* var str = 'Hello Mrs. Maple, could you call me back? I need to talk to you about something.';
30+
* var out = sentencize( str );
31+
* // returns [ 'Hello Mrs. Maple, could you call me back?', 'I need to talk to you about something.' ]
32+
*/
33+
34+
// MODULES //
35+
36+
var main = require( './main.js' );
37+
38+
39+
// EXPORTS //
40+
41+
module.exports = main;

0 commit comments

Comments
 (0)