@@ -8,6 +8,13 @@ use std::cmp::min;
8
8
///
9
9
/// This function iterates over the bytes in the string, so it may not behave
10
10
/// entirely as expected for non-ASCII strings.
11
+ ///
12
+ /// # Complexity
13
+ ///
14
+ /// - time complexity: O(nm),
15
+ /// - space complexity: O(nm),
16
+ ///
17
+ /// where n and m are lengths of `str_a` and `str_b`
11
18
pub fn edit_distance ( str_a : & str , str_b : & str ) -> u32 {
12
19
// distances[i][j] = distance between a[..i] and b[..j]
13
20
let mut distances = vec ! [ vec![ 0 ; str_b. len( ) + 1 ] ; str_a. len( ) + 1 ] ;
@@ -31,27 +38,79 @@ pub fn edit_distance(str_a: &str, str_b: &str) -> u32 {
31
38
distances[ str_a. len ( ) ] [ str_b. len ( ) ]
32
39
}
33
40
41
+ /// The space efficient version of the above algorithm.
42
+ ///
43
+ /// Instead of storing the `m * n` matrix expicitly, only one row (of length `n`) is stored.
44
+ /// It keeps overwriting itself based on its previous values with the help of two scalars,
45
+ /// gradually reaching the last row. Then, the score is `matrix[n]`.
46
+ ///
47
+ /// # Complexity
48
+ ///
49
+ /// - time complexity: O(nm),
50
+ /// - space complexity: O(n),
51
+ ///
52
+ /// where n and m are lengths of `str_a` and `str_b`
53
+ pub fn edit_distance_se ( str_a : & str , str_b : & str ) -> u32 {
54
+ let ( str_a, str_b) = ( str_a. as_bytes ( ) , str_b. as_bytes ( ) ) ;
55
+ let ( m, n) = ( str_a. len ( ) , str_b. len ( ) ) ;
56
+ let mut distances: Vec < u32 > = vec ! [ 0 ; n + 1 ] ; // the dynamic programming matrix (only 1 row stored)
57
+ let mut s: u32 ; // distances[i - 1][j - 1] or distances[i - 1][j]
58
+ let mut c: u32 ; // distances[i][j - 1] or distances[i][j]
59
+ let mut a: u8 ; // str_a[i - 1] the i-th character in str_a; only needs to be computed once per row
60
+ let mut b: u8 ; // str_b[j - 1] the j-th character in str_b
61
+
62
+ // 0th row
63
+ for j in 1 ..=n {
64
+ distances[ j] = j as u32 ;
65
+ }
66
+ // rows 1 to m
67
+ for i in 1 ..=m {
68
+ s = ( i - 1 ) as u32 ;
69
+ c = i as u32 ;
70
+ a = str_a[ i - 1 ] ;
71
+ for j in 1 ..=n {
72
+ // c is distances[i][j-1] and s is distances[i-1][j-1] at the beginning of each round of iteration
73
+ b = str_b[ j - 1 ] ;
74
+ c = min ( s + if a == b { 0 } else { 1 } , min ( c + 1 , distances[ j] + 1 ) ) ;
75
+ // c is updated to distances[i][j], and will thus become distances[i][j-1] for the next cell
76
+ s = distances[ j] ; // here distances[j] means distances[i-1][j] becuase it has not been overwritten yet
77
+ // s is updated to distances[i-1][j], and will thus become distances[i-1][j-1] for the next cell
78
+ distances[ j] = c; // now distances[j] is updated to distances[i][j], and will thus become distances[i-1][j] for the next ROW
79
+ }
80
+ }
81
+
82
+ distances[ n]
83
+ }
84
+
34
85
#[ cfg( test) ]
35
86
mod tests {
36
- use super :: edit_distance ;
87
+ use super :: * ;
37
88
38
89
#[ test]
39
90
fn equal_strings ( ) {
40
91
assert_eq ! ( 0 , edit_distance( "Hello, world!" , "Hello, world!" ) ) ;
92
+ assert_eq ! ( 0 , edit_distance_se( "Hello, world!" , "Hello, world!" ) ) ;
41
93
assert_eq ! ( 0 , edit_distance( "Test_Case_#1" , "Test_Case_#1" ) ) ;
94
+ assert_eq ! ( 0 , edit_distance_se( "Test_Case_#1" , "Test_Case_#1" ) ) ;
42
95
}
43
96
44
97
#[ test]
45
98
fn one_edit_difference ( ) {
46
99
assert_eq ! ( 1 , edit_distance( "Hello, world!" , "Hell, world!" ) ) ;
47
100
assert_eq ! ( 1 , edit_distance( "Test_Case_#1" , "Test_Case_#2" ) ) ;
48
101
assert_eq ! ( 1 , edit_distance( "Test_Case_#1" , "Test_Case_#10" ) ) ;
102
+ assert_eq ! ( 1 , edit_distance_se( "Hello, world!" , "Hell, world!" ) ) ;
103
+ assert_eq ! ( 1 , edit_distance_se( "Test_Case_#1" , "Test_Case_#2" ) ) ;
104
+ assert_eq ! ( 1 , edit_distance_se( "Test_Case_#1" , "Test_Case_#10" ) ) ;
49
105
}
50
106
51
107
#[ test]
52
108
fn several_differences ( ) {
53
109
assert_eq ! ( 2 , edit_distance( "My Cat" , "My Case" ) ) ;
54
110
assert_eq ! ( 7 , edit_distance( "Hello, world!" , "Goodbye, world!" ) ) ;
55
111
assert_eq ! ( 6 , edit_distance( "Test_Case_#3" , "Case #3" ) ) ;
112
+ assert_eq ! ( 2 , edit_distance_se( "My Cat" , "My Case" ) ) ;
113
+ assert_eq ! ( 7 , edit_distance_se( "Hello, world!" , "Goodbye, world!" ) ) ;
114
+ assert_eq ! ( 6 , edit_distance_se( "Test_Case_#3" , "Case #3" ) ) ;
56
115
}
57
116
}
0 commit comments