Skip to content

Commit c3da55f

Browse files
sozelfistvil02
andauthored
Refactor Z Algorithm (TheAlgorithms#807)
ref: refactor z algorithm Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com>
1 parent 596697c commit c3da55f

File tree

1 file changed

+169
-65
lines changed

1 file changed

+169
-65
lines changed

src/string/z_algorithm.rs

Lines changed: 169 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,83 @@
1+
//! This module provides functionalities to match patterns in strings
2+
//! and compute the Z-array for a given input string.
3+
4+
/// Calculates the Z-value for a given substring of the input string
5+
/// based on a specified pattern.
6+
///
7+
/// # Parameters
8+
/// - `input_string`: A slice of elements that represents the input string.
9+
/// - `pattern`: A slice of elements representing the pattern to match.
10+
/// - `start_index`: The index in the input string to start checking for matches.
11+
/// - `z_value`: The initial Z-value to be computed.
12+
///
13+
/// # Returns
14+
/// The computed Z-value indicating the length of the matching prefix.
15+
fn calculate_z_value<T: Eq>(
16+
input_string: &[T],
17+
pattern: &[T],
18+
start_index: usize,
19+
mut z_value: usize,
20+
) -> usize {
21+
let size = input_string.len();
22+
let pattern_size = pattern.len();
23+
24+
while (start_index + z_value) < size && z_value < pattern_size {
25+
if input_string[start_index + z_value] != pattern[z_value] {
26+
break;
27+
}
28+
z_value += 1;
29+
}
30+
z_value
31+
}
32+
33+
/// Initializes the Z-array value based on a previous match and updates
34+
/// it to optimize further calculations.
35+
///
36+
/// # Parameters
37+
/// - `z_array`: A mutable slice of the Z-array to be updated.
38+
/// - `i`: The current index in the input string.
39+
/// - `match_end`: The index of the last character matched in the pattern.
40+
/// - `last_match`: The index of the last match found.
41+
///
42+
/// # Returns
43+
/// The initialized Z-array value for the current index.
44+
fn initialize_z_array_from_previous_match(
45+
z_array: &mut [usize],
46+
i: usize,
47+
match_end: usize,
48+
last_match: usize,
49+
) -> usize {
50+
std::cmp::min(z_array[i - last_match], match_end - i + 1)
51+
}
52+
53+
/// Finds the starting indices of all full matches of the pattern
54+
/// in the Z-array.
55+
///
56+
/// # Parameters
57+
/// - `z_array`: A slice of the Z-array containing computed Z-values.
58+
/// - `pattern_size`: The length of the pattern to find in the Z-array.
59+
///
60+
/// # Returns
61+
/// A vector containing the starting indices of full matches.
62+
fn find_full_matches(z_array: &[usize], pattern_size: usize) -> Vec<usize> {
63+
z_array
64+
.iter()
65+
.enumerate()
66+
.filter_map(|(idx, &z_value)| (z_value == pattern_size).then_some(idx))
67+
.collect()
68+
}
69+
70+
/// Matches the occurrences of a pattern in an input string starting
71+
/// from a specified index.
72+
///
73+
/// # Parameters
74+
/// - `input_string`: A slice of elements to search within.
75+
/// - `pattern`: A slice of elements that represents the pattern to match.
76+
/// - `start_index`: The index in the input string to start the search.
77+
/// - `only_full_matches`: If true, only full matches of the pattern will be returned.
78+
///
79+
/// # Returns
80+
/// A vector containing the starting indices of the matches.
181
fn match_with_z_array<T: Eq>(
282
input_string: &[T],
383
pattern: &[T],
@@ -8,41 +88,54 @@ fn match_with_z_array<T: Eq>(
888
let pattern_size = pattern.len();
989
let mut last_match: usize = 0;
1090
let mut match_end: usize = 0;
11-
let mut array = vec![0usize; size];
91+
let mut z_array = vec![0usize; size];
92+
1293
for i in start_index..size {
13-
// getting plain z array of a string requires matching from index
14-
// 1 instead of 0 (which gives a trivial result instead)
1594
if i <= match_end {
16-
array[i] = std::cmp::min(array[i - last_match], match_end - i + 1);
17-
}
18-
while (i + array[i]) < size && array[i] < pattern_size {
19-
if input_string[i + array[i]] != pattern[array[i]] {
20-
break;
21-
}
22-
array[i] += 1;
95+
z_array[i] =
96+
initialize_z_array_from_previous_match(&mut z_array, i, match_end, last_match);
2397
}
24-
if (i + array[i]) > (match_end + 1) {
25-
match_end = i + array[i] - 1;
98+
99+
z_array[i] = calculate_z_value(input_string, pattern, i, z_array[i]);
100+
101+
if i + z_array[i] > match_end + 1 {
102+
match_end = i + z_array[i] - 1;
26103
last_match = i;
27104
}
28105
}
106+
29107
if !only_full_matches {
30-
array
108+
z_array
31109
} else {
32-
let mut answer: Vec<usize> = vec![];
33-
for (idx, number) in array.iter().enumerate() {
34-
if *number == pattern_size {
35-
answer.push(idx);
36-
}
37-
}
38-
answer
110+
find_full_matches(&z_array, pattern_size)
39111
}
40112
}
41113

114+
/// Constructs the Z-array for the given input string.
115+
///
116+
/// The Z-array is an array where the i-th element is the length of the longest
117+
/// substring starting from s[i] that is also a prefix of s.
118+
///
119+
/// # Parameters
120+
/// - `input`: A slice of the input string for which the Z-array is to be constructed.
121+
///
122+
/// # Returns
123+
/// A vector representing the Z-array of the input string.
42124
pub fn z_array<T: Eq>(input: &[T]) -> Vec<usize> {
43125
match_with_z_array(input, input, 1, false)
44126
}
45127

128+
/// Matches the occurrences of a given pattern in an input string.
129+
///
130+
/// This function acts as a wrapper around `match_with_z_array` to provide a simpler
131+
/// interface for pattern matching, returning only full matches.
132+
///
133+
/// # Parameters
134+
/// - `input`: A slice of the input string where the pattern will be searched.
135+
/// - `pattern`: A slice of the pattern to search for in the input string.
136+
///
137+
/// # Returns
138+
/// A vector of indices where the pattern matches the input string.
46139
pub fn match_pattern<T: Eq>(input: &[T], pattern: &[T]) -> Vec<usize> {
47140
match_with_z_array(input, pattern, 0, true)
48141
}
@@ -51,56 +144,67 @@ pub fn match_pattern<T: Eq>(input: &[T], pattern: &[T]) -> Vec<usize> {
51144
mod tests {
52145
use super::*;
53146

54-
#[test]
55-
fn test_z_array() {
56-
let string = "aabaabab";
57-
let array = z_array(string.as_bytes());
58-
assert_eq!(array, vec![0, 1, 0, 4, 1, 0, 1, 0]);
147+
macro_rules! test_match_pattern {
148+
($($name:ident: ($input:expr, $pattern:expr, $expected:expr),)*) => {
149+
$(
150+
#[test]
151+
fn $name() {
152+
let (input, pattern, expected) = ($input, $pattern, $expected);
153+
assert_eq!(match_pattern(input.as_bytes(), pattern.as_bytes()), expected);
154+
}
155+
)*
156+
};
59157
}
60158

61-
#[test]
62-
fn pattern_in_text() {
63-
let text: &str = concat!(
64-
"lorem ipsum dolor sit amet, consectetur ",
65-
"adipiscing elit, sed do eiusmod tempor ",
66-
"incididunt ut labore et dolore magna aliqua"
67-
);
68-
let pattern1 = "rem";
69-
let pattern2 = "em";
70-
let pattern3 = ";alksdjfoiwer";
71-
let pattern4 = "m";
72-
73-
assert_eq!(match_pattern(text.as_bytes(), pattern1.as_bytes()), vec![2]);
74-
assert_eq!(
75-
match_pattern(text.as_bytes(), pattern2.as_bytes()),
76-
vec![3, 73]
77-
);
78-
assert_eq!(match_pattern(text.as_bytes(), pattern3.as_bytes()), vec![]);
79-
assert_eq!(
80-
match_pattern(text.as_bytes(), pattern4.as_bytes()),
81-
vec![4, 10, 23, 68, 74, 110]
82-
);
159+
macro_rules! test_z_array_cases {
160+
($($name:ident: ($input:expr, $expected:expr),)*) => {
161+
$(
162+
#[test]
163+
fn $name() {
164+
let (input, expected) = ($input, $expected);
165+
assert_eq!(z_array(input.as_bytes()), expected);
166+
}
167+
)*
168+
};
169+
}
83170

84-
let text2 = "aaaaaaaa";
85-
let pattern5 = "aaa";
86-
assert_eq!(
87-
match_pattern(text2.as_bytes(), pattern5.as_bytes()),
171+
test_match_pattern! {
172+
simple_match: ("abcabcabc", "abc", vec![0, 3, 6]),
173+
no_match: ("abcdef", "xyz", vec![]),
174+
single_char_match: ("aaaaaa", "a", vec![0, 1, 2, 3, 4, 5]),
175+
overlapping_match: ("abababa", "aba", vec![0, 2, 4]),
176+
full_string_match: ("pattern", "pattern", vec![0]),
177+
empty_pattern: ("nonempty", " ", vec![]),
178+
pattern_larger_than_text: ("small", "largerpattern", vec![]),
179+
repeated_pattern_in_text: (
180+
"aaaaaaaa",
181+
"aaa",
88182
vec![0, 1, 2, 3, 4, 5]
89-
)
183+
),
184+
pattern_not_in_lipsum: (
185+
concat!(
186+
"lorem ipsum dolor sit amet, consectetur ",
187+
"adipiscing elit, sed do eiusmod tempor ",
188+
"incididunt ut labore et dolore magna aliqua"
189+
),
190+
";alksdjfoiwer",
191+
vec![]
192+
),
193+
pattern_in_lipsum: (
194+
concat!(
195+
"lorem ipsum dolor sit amet, consectetur ",
196+
"adipiscing elit, sed do eiusmod tempor ",
197+
"incididunt ut labore et dolore magna aliqua"
198+
),
199+
"m",
200+
vec![4, 10, 23, 68, 74, 110]
201+
),
90202
}
91203

92-
#[test]
93-
fn long_pattern_in_text() {
94-
let text = vec![65u8; 1e5 as usize];
95-
let pattern = vec![65u8; 5e4 as usize];
96-
97-
let mut expected_answer = vec![0usize; (1e5 - 5e4 + 1f64) as usize];
98-
for (idx, i) in expected_answer.iter_mut().enumerate() {
99-
*i = idx;
100-
}
101-
assert_eq!(
102-
match_pattern(text.as_slice(), pattern.as_slice()),
103-
expected_answer
104-
);
204+
test_z_array_cases! {
205+
basic_z_array: ("aabaabab", vec![0, 1, 0, 4, 1, 0, 1, 0]),
206+
empty_string: ("", vec![]),
207+
single_char_z_array: ("a", vec![0]),
208+
repeated_char_z_array: ("aaaaaa", vec![0, 5, 4, 3, 2, 1]),
105209
}
106210
}

0 commit comments

Comments
 (0)