1
+ //! This module provides functionalities to match patterns in strings
2
+ //! and compute the Z-array for a given input string.
3
+
4
+ /// Calculates the Z-value for a given substring of the input string
5
+ /// based on a specified pattern.
6
+ ///
7
+ /// # Parameters
8
+ /// - `input_string`: A slice of elements that represents the input string.
9
+ /// - `pattern`: A slice of elements representing the pattern to match.
10
+ /// - `start_index`: The index in the input string to start checking for matches.
11
+ /// - `z_value`: The initial Z-value to be computed.
12
+ ///
13
+ /// # Returns
14
+ /// The computed Z-value indicating the length of the matching prefix.
15
+ fn calculate_z_value < T : Eq > (
16
+ input_string : & [ T ] ,
17
+ pattern : & [ T ] ,
18
+ start_index : usize ,
19
+ mut z_value : usize ,
20
+ ) -> usize {
21
+ let size = input_string. len ( ) ;
22
+ let pattern_size = pattern. len ( ) ;
23
+
24
+ while ( start_index + z_value) < size && z_value < pattern_size {
25
+ if input_string[ start_index + z_value] != pattern[ z_value] {
26
+ break ;
27
+ }
28
+ z_value += 1 ;
29
+ }
30
+ z_value
31
+ }
32
+
33
+ /// Initializes the Z-array value based on a previous match and updates
34
+ /// it to optimize further calculations.
35
+ ///
36
+ /// # Parameters
37
+ /// - `z_array`: A mutable slice of the Z-array to be updated.
38
+ /// - `i`: The current index in the input string.
39
+ /// - `match_end`: The index of the last character matched in the pattern.
40
+ /// - `last_match`: The index of the last match found.
41
+ ///
42
+ /// # Returns
43
+ /// The initialized Z-array value for the current index.
44
+ fn initialize_z_array_from_previous_match (
45
+ z_array : & mut [ usize ] ,
46
+ i : usize ,
47
+ match_end : usize ,
48
+ last_match : usize ,
49
+ ) -> usize {
50
+ std:: cmp:: min ( z_array[ i - last_match] , match_end - i + 1 )
51
+ }
52
+
53
+ /// Finds the starting indices of all full matches of the pattern
54
+ /// in the Z-array.
55
+ ///
56
+ /// # Parameters
57
+ /// - `z_array`: A slice of the Z-array containing computed Z-values.
58
+ /// - `pattern_size`: The length of the pattern to find in the Z-array.
59
+ ///
60
+ /// # Returns
61
+ /// A vector containing the starting indices of full matches.
62
+ fn find_full_matches ( z_array : & [ usize ] , pattern_size : usize ) -> Vec < usize > {
63
+ z_array
64
+ . iter ( )
65
+ . enumerate ( )
66
+ . filter_map ( |( idx, & z_value) | ( z_value == pattern_size) . then_some ( idx) )
67
+ . collect ( )
68
+ }
69
+
70
+ /// Matches the occurrences of a pattern in an input string starting
71
+ /// from a specified index.
72
+ ///
73
+ /// # Parameters
74
+ /// - `input_string`: A slice of elements to search within.
75
+ /// - `pattern`: A slice of elements that represents the pattern to match.
76
+ /// - `start_index`: The index in the input string to start the search.
77
+ /// - `only_full_matches`: If true, only full matches of the pattern will be returned.
78
+ ///
79
+ /// # Returns
80
+ /// A vector containing the starting indices of the matches.
1
81
fn match_with_z_array < T : Eq > (
2
82
input_string : & [ T ] ,
3
83
pattern : & [ T ] ,
@@ -8,41 +88,54 @@ fn match_with_z_array<T: Eq>(
8
88
let pattern_size = pattern. len ( ) ;
9
89
let mut last_match: usize = 0 ;
10
90
let mut match_end: usize = 0 ;
11
- let mut array = vec ! [ 0usize ; size] ;
91
+ let mut z_array = vec ! [ 0usize ; size] ;
92
+
12
93
for i in start_index..size {
13
- // getting plain z array of a string requires matching from index
14
- // 1 instead of 0 (which gives a trivial result instead)
15
94
if i <= match_end {
16
- array[ i] = std:: cmp:: min ( array[ i - last_match] , match_end - i + 1 ) ;
17
- }
18
- while ( i + array[ i] ) < size && array[ i] < pattern_size {
19
- if input_string[ i + array[ i] ] != pattern[ array[ i] ] {
20
- break ;
21
- }
22
- array[ i] += 1 ;
95
+ z_array[ i] =
96
+ initialize_z_array_from_previous_match ( & mut z_array, i, match_end, last_match) ;
23
97
}
24
- if ( i + array[ i] ) > ( match_end + 1 ) {
25
- match_end = i + array[ i] - 1 ;
98
+
99
+ z_array[ i] = calculate_z_value ( input_string, pattern, i, z_array[ i] ) ;
100
+
101
+ if i + z_array[ i] > match_end + 1 {
102
+ match_end = i + z_array[ i] - 1 ;
26
103
last_match = i;
27
104
}
28
105
}
106
+
29
107
if !only_full_matches {
30
- array
108
+ z_array
31
109
} else {
32
- let mut answer: Vec < usize > = vec ! [ ] ;
33
- for ( idx, number) in array. iter ( ) . enumerate ( ) {
34
- if * number == pattern_size {
35
- answer. push ( idx) ;
36
- }
37
- }
38
- answer
110
+ find_full_matches ( & z_array, pattern_size)
39
111
}
40
112
}
41
113
114
+ /// Constructs the Z-array for the given input string.
115
+ ///
116
+ /// The Z-array is an array where the i-th element is the length of the longest
117
+ /// substring starting from s[i] that is also a prefix of s.
118
+ ///
119
+ /// # Parameters
120
+ /// - `input`: A slice of the input string for which the Z-array is to be constructed.
121
+ ///
122
+ /// # Returns
123
+ /// A vector representing the Z-array of the input string.
42
124
pub fn z_array < T : Eq > ( input : & [ T ] ) -> Vec < usize > {
43
125
match_with_z_array ( input, input, 1 , false )
44
126
}
45
127
128
+ /// Matches the occurrences of a given pattern in an input string.
129
+ ///
130
+ /// This function acts as a wrapper around `match_with_z_array` to provide a simpler
131
+ /// interface for pattern matching, returning only full matches.
132
+ ///
133
+ /// # Parameters
134
+ /// - `input`: A slice of the input string where the pattern will be searched.
135
+ /// - `pattern`: A slice of the pattern to search for in the input string.
136
+ ///
137
+ /// # Returns
138
+ /// A vector of indices where the pattern matches the input string.
46
139
pub fn match_pattern < T : Eq > ( input : & [ T ] , pattern : & [ T ] ) -> Vec < usize > {
47
140
match_with_z_array ( input, pattern, 0 , true )
48
141
}
@@ -51,56 +144,67 @@ pub fn match_pattern<T: Eq>(input: &[T], pattern: &[T]) -> Vec<usize> {
51
144
mod tests {
52
145
use super :: * ;
53
146
54
- #[ test]
55
- fn test_z_array ( ) {
56
- let string = "aabaabab" ;
57
- let array = z_array ( string. as_bytes ( ) ) ;
58
- assert_eq ! ( array, vec![ 0 , 1 , 0 , 4 , 1 , 0 , 1 , 0 ] ) ;
147
+ macro_rules! test_match_pattern {
148
+ ( $( $name: ident: ( $input: expr, $pattern: expr, $expected: expr) , ) * ) => {
149
+ $(
150
+ #[ test]
151
+ fn $name( ) {
152
+ let ( input, pattern, expected) = ( $input, $pattern, $expected) ;
153
+ assert_eq!( match_pattern( input. as_bytes( ) , pattern. as_bytes( ) ) , expected) ;
154
+ }
155
+ ) *
156
+ } ;
59
157
}
60
158
61
- #[ test]
62
- fn pattern_in_text ( ) {
63
- let text: & str = concat ! (
64
- "lorem ipsum dolor sit amet, consectetur " ,
65
- "adipiscing elit, sed do eiusmod tempor " ,
66
- "incididunt ut labore et dolore magna aliqua"
67
- ) ;
68
- let pattern1 = "rem" ;
69
- let pattern2 = "em" ;
70
- let pattern3 = ";alksdjfoiwer" ;
71
- let pattern4 = "m" ;
72
-
73
- assert_eq ! ( match_pattern( text. as_bytes( ) , pattern1. as_bytes( ) ) , vec![ 2 ] ) ;
74
- assert_eq ! (
75
- match_pattern( text. as_bytes( ) , pattern2. as_bytes( ) ) ,
76
- vec![ 3 , 73 ]
77
- ) ;
78
- assert_eq ! ( match_pattern( text. as_bytes( ) , pattern3. as_bytes( ) ) , vec![ ] ) ;
79
- assert_eq ! (
80
- match_pattern( text. as_bytes( ) , pattern4. as_bytes( ) ) ,
81
- vec![ 4 , 10 , 23 , 68 , 74 , 110 ]
82
- ) ;
159
+ macro_rules! test_z_array_cases {
160
+ ( $( $name: ident: ( $input: expr, $expected: expr) , ) * ) => {
161
+ $(
162
+ #[ test]
163
+ fn $name( ) {
164
+ let ( input, expected) = ( $input, $expected) ;
165
+ assert_eq!( z_array( input. as_bytes( ) ) , expected) ;
166
+ }
167
+ ) *
168
+ } ;
169
+ }
83
170
84
- let text2 = "aaaaaaaa" ;
85
- let pattern5 = "aaa" ;
86
- assert_eq ! (
87
- match_pattern( text2. as_bytes( ) , pattern5. as_bytes( ) ) ,
171
+ test_match_pattern ! {
172
+ simple_match: ( "abcabcabc" , "abc" , vec![ 0 , 3 , 6 ] ) ,
173
+ no_match: ( "abcdef" , "xyz" , vec![ ] ) ,
174
+ single_char_match: ( "aaaaaa" , "a" , vec![ 0 , 1 , 2 , 3 , 4 , 5 ] ) ,
175
+ overlapping_match: ( "abababa" , "aba" , vec![ 0 , 2 , 4 ] ) ,
176
+ full_string_match: ( "pattern" , "pattern" , vec![ 0 ] ) ,
177
+ empty_pattern: ( "nonempty" , " " , vec![ ] ) ,
178
+ pattern_larger_than_text: ( "small" , "largerpattern" , vec![ ] ) ,
179
+ repeated_pattern_in_text: (
180
+ "aaaaaaaa" ,
181
+ "aaa" ,
88
182
vec![ 0 , 1 , 2 , 3 , 4 , 5 ]
89
- )
183
+ ) ,
184
+ pattern_not_in_lipsum: (
185
+ concat!(
186
+ "lorem ipsum dolor sit amet, consectetur " ,
187
+ "adipiscing elit, sed do eiusmod tempor " ,
188
+ "incididunt ut labore et dolore magna aliqua"
189
+ ) ,
190
+ ";alksdjfoiwer" ,
191
+ vec![ ]
192
+ ) ,
193
+ pattern_in_lipsum: (
194
+ concat!(
195
+ "lorem ipsum dolor sit amet, consectetur " ,
196
+ "adipiscing elit, sed do eiusmod tempor " ,
197
+ "incididunt ut labore et dolore magna aliqua"
198
+ ) ,
199
+ "m" ,
200
+ vec![ 4 , 10 , 23 , 68 , 74 , 110 ]
201
+ ) ,
90
202
}
91
203
92
- #[ test]
93
- fn long_pattern_in_text ( ) {
94
- let text = vec ! [ 65u8 ; 1e5 as usize ] ;
95
- let pattern = vec ! [ 65u8 ; 5e4 as usize ] ;
96
-
97
- let mut expected_answer = vec ! [ 0usize ; ( 1e5 - 5e4 + 1f64 ) as usize ] ;
98
- for ( idx, i) in expected_answer. iter_mut ( ) . enumerate ( ) {
99
- * i = idx;
100
- }
101
- assert_eq ! (
102
- match_pattern( text. as_slice( ) , pattern. as_slice( ) ) ,
103
- expected_answer
104
- ) ;
204
+ test_z_array_cases ! {
205
+ basic_z_array: ( "aabaabab" , vec![ 0 , 1 , 0 , 4 , 1 , 0 , 1 , 0 ] ) ,
206
+ empty_string: ( "" , vec![ ] ) ,
207
+ single_char_z_array: ( "a" , vec![ 0 ] ) ,
208
+ repeated_char_z_array: ( "aaaaaa" , vec![ 0 , 5 , 4 , 3 , 2 , 1 ] ) ,
105
209
}
106
210
}
0 commit comments