Skip to content

Commit 7b8fc82

Browse files
authored
Merge pull request knaxus#125 from jonathanmcchesney/master
KMP classic algorithm and unit tests
2 parents 1aaa484 + c1b47dc commit 7b8fc82

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed
+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
2+
/* *
3+
* The time complexity of KMP algorithm is O(n) in the worst case
4+
* Example use case: Pattern = ABCABCACA Text = AAABCBAABCABCACACABBCA
5+
* LPSArray = [ 0, 0, 0, 1, 2, 3, 4, 0, 1 ]
6+
* Found = true, at index 7
7+
* */
8+
9+
// Longest prefix suffix - generate an array of the lps for each pattern array value
10+
const createLPS = (pattern, patternLength) => {
11+
// initialise the current longest prefix suffix length and iterator index values
12+
const lps = [patternLength];
13+
lps[0] = 0;
14+
15+
let length = 0;
16+
let i = 1;
17+
// while there is still pattern to iterate over - calculate the lps for i = 1 to patternLength - 1
18+
while (i < patternLength) {
19+
/* *
20+
* if the pattern character at position i matches the pattern character at position length,
21+
* then increment length, update
22+
* the lps to the incremted length value and iterate to the next index i.
23+
* */
24+
if (pattern.charAt(i) === pattern.charAt(length)) {
25+
length += 1;
26+
lps[i] = length;
27+
i += 1;
28+
// if not matching
29+
} else if (length !== 0) {
30+
// if the length value is not 0, then set the length to be the lps value of index length - 1
31+
length = lps[length - 1];
32+
} else {
33+
// else if length is 0, then set the lps at position i to length, i.e. 0 and increment i.
34+
lps[i] = length;
35+
i += 1;
36+
}
37+
}
38+
return lps;
39+
};
40+
41+
/* *
42+
* Invoke the Knuth-Morris-Pratt pattern matching algorithm to find a Pattern with a Text - this
43+
* uses a precomputed prefix-suffix array/table to essentially skip chunks of the text that we
44+
* know will match the pattern. This algorithm will return true if the pattern is a subset of
45+
* the text, else it will return false.
46+
* This algorithm accepts two strings, the pattern and text.
47+
* The time complexity of the KMP algorithm is O(n) in the worst case.
48+
* */
49+
const KMPSearch = (pattern, text) => {
50+
const patternLength = pattern.length; // Often referred to as M
51+
const textLength = text.length; // Often referred to as N
52+
53+
// Longest Pattern Suffix - array containing the lps for all pattern value positions
54+
const lps = createLPS(pattern, patternLength); // This is preprocessed.
55+
// console.log({ lpsArray: lps })
56+
57+
let patternIndex = 0; // Referred to as P
58+
let textIndex = 0; // Referred to as T
59+
let found = false;
60+
61+
// While there is still text left to iterate over and the pattern has not yet been found
62+
while (textIndex < textLength && found === false) {
63+
// if the pattern char at index pos P equals the text char at text pos T, then increment indexes
64+
if (pattern.charAt(patternIndex) === text.charAt(textIndex)) {
65+
textIndex += 1;
66+
patternIndex += 1;
67+
}
68+
/* *
69+
* if the pattern index equals the pattern length then the pattern has been successfully
70+
* found, as such the pattern is a subset of the text the pattern index is set to the longest
71+
* pattern suffix value (the index is decremented due to being zero indexed).
72+
* */
73+
if (patternIndex === patternLength) {
74+
// console.log(`Pattern found at index ${textIndex-patternIndex}`);
75+
patternIndex = lps[patternIndex - 1];
76+
found = true;
77+
} else if (textIndex < textLength && pattern.charAt(patternIndex) !== text.charAt(textIndex)) {
78+
/* *
79+
* else if there is still text left to iterate over and the pattern character does not match
80+
* the text characterat their respective index positions, then check of the pattern Index is 0,
81+
* i.e. if it is the first pattern position. If so then jump to the next text character, else
82+
* (this is not the first pattern position), then update the pattern index using the generated
83+
* longest prefix suffix, to skip ahead of matching values. This logic will only be encountered
84+
* after T number of mismatches.
85+
* */
86+
if (patternIndex === 0) textIndex += 1;
87+
else patternIndex = lps[patternIndex - 1];
88+
}
89+
}
90+
// Pattern has not been found, return false. Else return true.
91+
if (!found) {
92+
// console.log('The pattern was not found!')
93+
return false;
94+
}
95+
return true;
96+
};
97+
98+
module.exports = {
99+
KMPSearch,
100+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
const { KMPSearch } = require('.');
2+
3+
describe('Pattern Matching Classic Algorithm: Knuth-Morris-Pratt', () => {
4+
describe('KMPSearch', () => {
5+
it('Should return true when the pattern equals the text', () => {
6+
expect(KMPSearch('A', 'A')).toEqual(true);
7+
});
8+
it('Should return true when the pattern is a single character and is contained within the text', () => {
9+
expect(KMPSearch('S', 'TEST')).toEqual(true);
10+
});
11+
it('Should return true when the pattern is multiple characters and in the middle of the text', () => {
12+
expect(KMPSearch('WORLD', 'TESTWORLDTEST')).toEqual(true);
13+
});
14+
it('Should return true when the pattern is present multiple times within the text', () => {
15+
expect(KMPSearch('ST', 'TESTWORLDTEST')).toEqual(true);
16+
});
17+
it('Should return true when the pattern is a single character and is present at the start of the text', () => {
18+
expect(KMPSearch('A', 'ABABABAABCABCABC')).toEqual(true);
19+
});
20+
it('Should return true when the pattern is multiple characters and is present at the start of the text', () => {
21+
expect(KMPSearch('AB', 'ABABABAABCABCABC')).toEqual(true);
22+
});
23+
it('Should return true when the pattern contains repeating characters, and is present in the middle of the text', () => {
24+
expect(KMPSearch('AAABAAAA', 'AAAAAAAAAAABAAAAAA')).toEqual(true);
25+
});
26+
it('Should return true when the pattern is contained within the text and the pattern contains non alphabetic characters', () => {
27+
expect(KMPSearch('AAA123! ', 'AAAAAA123! AAAAABAAAAAA')).toEqual(true);
28+
});
29+
it('Should return false when the pattern does not equal the text', () => {
30+
expect(KMPSearch('A', 'B')).toEqual(false);
31+
});
32+
it('Should return false when the pattern is not contained within the text', () => {
33+
expect(KMPSearch('AD', 'ABABABAABCABCABC')).toEqual(false);
34+
});
35+
it('Should return false when the pattern is longer than the text', () => {
36+
expect(KMPSearch('AAAAAAAA', 'AAAAAA')).toEqual(false);
37+
});
38+
});
39+
});

0 commit comments

Comments
 (0)