Skip to content

Commit f430c31

Browse files
committedOct 21, 2019
Add KMP and relevant unit tests
1 parent 43ed1bc commit f430c31

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed
 
+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
2+
// Longest prefix suffix - generate an array of the longest previous suffix for each pattern array value
3+
const createLPS = (pattern, patternLength, lps) => {
4+
// initialise the current longest prefix suffix length and iterator index values
5+
lps[0] = 0;
6+
let length = 0;
7+
let i = 1;
8+
// while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1
9+
while (i < patternLength) {
10+
/* *
11+
* if the pattern character at position i matches the pattern character at position length, then increment length, update
12+
* the lps to the incremted length value and iterate to the next index i.
13+
* */
14+
if (pattern.charAt(i) === pattern.charAt(length)) {
15+
length++;
16+
lps[i] = length;
17+
i++;
18+
}
19+
// if a match is not found
20+
else {
21+
// if the length value is not 0, then set the length to be the lps value of index length - 1
22+
if (length !== 0) {
23+
length = lps[length - 1];
24+
}
25+
// else if length is 0, then set the lps at position i to length, i.e. 0 and increment i.
26+
else {
27+
lps[i] = length;
28+
i++;
29+
}
30+
}
31+
}
32+
return lps;
33+
}
34+
35+
/* *
36+
* Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this uses a precomputed prefix-suffix
37+
* array/table to essentially skip chunks of the text that we know will match the pattern.
38+
* This algorithm will return true if the pattern is a subset of the text, else it will return false.
39+
* This algorithm accepts two strings, the pattern and text.
40+
* */
41+
const KMPSearch = (pattern, text) => {
42+
const patternLength = pattern.length; // Often referred to as M
43+
const textLength = text.length; // Often referred to as N
44+
45+
let lps = [patternLength]; // Longest Pattern Suffix - array containing the lps for all pattern value positions
46+
lps = createLPS(pattern, patternLength, lps); // This is preprocessed - before the text is searched for the pattern.
47+
48+
let patternIndex = 0; // Referred to as P
49+
let textIndex = 0; // Referred to as T
50+
let found = false;
51+
52+
// While there is still text left to iterate over and the pattern has not yet been found
53+
while (textIndex < textLength && found === false) {
54+
// if the pattern character at pattern index P equals the text character at text index T, then increment the text and pattern indexes
55+
if (pattern.charAt(patternIndex) === text.charAt(textIndex)) {
56+
textIndex++;
57+
patternIndex++;
58+
}
59+
/* *
60+
* if the pattern index equals the pattern length then the pattern has been successfully found, as such the pattern is a subset of
61+
* the text the pattern index is set to the longest pattern suffix value (the index is decremented due to being zero indexed).
62+
* */
63+
if (patternIndex === patternLength) {
64+
// console.log(`Pattern found at index ${textIndex-patternIndex}`);
65+
patternIndex = lps[patternIndex - 1];
66+
found = true;
67+
}
68+
/* *
69+
* else if there is still text left to iterate over and the pattern character does not match the text character at their respective
70+
* index positions, then check of the pattern Index is 0, i.e. if it is the first pattern position. If so then jump to the next text
71+
* character, else (this is not the first pattern position), then update the pattern index using the generated longest pattern suffix,
72+
* to skip ahead of matching values. This logic will only be encountered after T number of mismatches.
73+
* */
74+
else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) {
75+
if (patternIndex === 0)
76+
textIndex = textIndex + 1;
77+
else
78+
patternIndex = lps[patternIndex - 1];
79+
}
80+
}
81+
// Pattern has not been found, return false. Else return true.
82+
if (!found) {
83+
// console.log('The pattern was not found!')
84+
return false
85+
}
86+
return true
87+
};
88+
89+
module.exports = {
90+
KMPSearch
91+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
const { KMPSearch } = require('.');
2+
3+
describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => {
4+
describe('KMPSearch', () =>{
5+
it('Should return true when the pattern equals the text', () => {
6+
expect(KMPSearch('A', 'A')).toEqual(true);
7+
});
8+
it('Should return true when the pattern is a single character and is contained within the text', () => {
9+
expect(KMPSearch('S', 'TEST')).toEqual(true);
10+
});
11+
it('Should return true when the pattern is multiple characters and in the middle of the text', () => {
12+
expect(KMPSearch('WORLD', 'TESTWORLDTEST')).toEqual(true);
13+
});
14+
it('Should return true when the pattern is present multiple times within the text', () => {
15+
expect(KMPSearch('ST', 'TESTWORLDTEST')).toEqual(true);
16+
});
17+
it('Should return true when the pattern is a single character and is present at the start of the text', () => {
18+
expect(KMPSearch('A', 'ABABABAABCABCABC')).toEqual(true);
19+
});
20+
it('Should return true when the pattern is multiple characters and is present at the start of the text', () => {
21+
expect(KMPSearch('AB', 'ABABABAABCABCABC')).toEqual(true);
22+
});
23+
it('Should return true when the pattern contains repeating characters, and is present in the middle of the text', () => {
24+
expect(KMPSearch('AAABAAAA', 'AAAAAAAAAAABAAAAAA')).toEqual(true);
25+
});
26+
it('Should return true when the pattern is contained within the text and the pattern contains non alphabetic characters', () => {
27+
expect(KMPSearch('AAA123! ', 'AAAAAA123! AAAAABAAAAAA')).toEqual(true);
28+
});
29+
it('Should return false when the pattern does not equal the text', () => {
30+
expect(KMPSearch('A', 'B')).toEqual(false);
31+
});
32+
it('Should return false when the pattern is not contained within the text', () => {
33+
expect(KMPSearch('AD', 'ABABABAABCABCABC')).toEqual(false);
34+
});
35+
it('Should return false when the pattern is longer than the text', () => {
36+
expect(KMPSearch('AAAAAAAA', 'AAAAAA')).toEqual(false);
37+
});
38+
});
39+
});

0 commit comments

Comments
 (0)