Skip to content

Commit 427ffe8

Browse files
space efficient edit distance algorithm (TheAlgorithms#149)
* space efficient edit distance algorithm * more docs * more comments * fmt
1 parent 56b7814 commit 427ffe8

File tree

1 file changed

+60
-1
lines changed

1 file changed

+60
-1
lines changed

src/dynamic_programming/edit_distance.rs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ use std::cmp::min;
88
///
99
/// This function iterates over the bytes in the string, so it may not behave
1010
/// entirely as expected for non-ASCII strings.
11+
///
12+
/// # Complexity
13+
///
14+
/// - time complexity: O(nm),
15+
/// - space complexity: O(nm),
16+
///
17+
/// where n and m are lengths of `str_a` and `str_b`
1118
pub fn edit_distance(str_a: &str, str_b: &str) -> u32 {
1219
// distances[i][j] = distance between a[..i] and b[..j]
1320
let mut distances = vec![vec![0; str_b.len() + 1]; str_a.len() + 1];
@@ -31,27 +38,79 @@ pub fn edit_distance(str_a: &str, str_b: &str) -> u32 {
3138
distances[str_a.len()][str_b.len()]
3239
}
3340

41+
/// The space efficient version of the above algorithm.
42+
///
43+
/// Instead of storing the `m * n` matrix expicitly, only one row (of length `n`) is stored.
44+
/// It keeps overwriting itself based on its previous values with the help of two scalars,
45+
/// gradually reaching the last row. Then, the score is `matrix[n]`.
46+
///
47+
/// # Complexity
48+
///
49+
/// - time complexity: O(nm),
50+
/// - space complexity: O(n),
51+
///
52+
/// where n and m are lengths of `str_a` and `str_b`
53+
pub fn edit_distance_se(str_a: &str, str_b: &str) -> u32 {
54+
let (str_a, str_b) = (str_a.as_bytes(), str_b.as_bytes());
55+
let (m, n) = (str_a.len(), str_b.len());
56+
let mut distances: Vec<u32> = vec![0; n + 1]; // the dynamic programming matrix (only 1 row stored)
57+
let mut s: u32; // distances[i - 1][j - 1] or distances[i - 1][j]
58+
let mut c: u32; // distances[i][j - 1] or distances[i][j]
59+
let mut a: u8; // str_a[i - 1] the i-th character in str_a; only needs to be computed once per row
60+
let mut b: u8; // str_b[j - 1] the j-th character in str_b
61+
62+
// 0th row
63+
for j in 1..=n {
64+
distances[j] = j as u32;
65+
}
66+
// rows 1 to m
67+
for i in 1..=m {
68+
s = (i - 1) as u32;
69+
c = i as u32;
70+
a = str_a[i - 1];
71+
for j in 1..=n {
72+
// c is distances[i][j-1] and s is distances[i-1][j-1] at the beginning of each round of iteration
73+
b = str_b[j - 1];
74+
c = min(s + if a == b { 0 } else { 1 }, min(c + 1, distances[j] + 1));
75+
// c is updated to distances[i][j], and will thus become distances[i][j-1] for the next cell
76+
s = distances[j]; // here distances[j] means distances[i-1][j] becuase it has not been overwritten yet
77+
// s is updated to distances[i-1][j], and will thus become distances[i-1][j-1] for the next cell
78+
distances[j] = c; // now distances[j] is updated to distances[i][j], and will thus become distances[i-1][j] for the next ROW
79+
}
80+
}
81+
82+
distances[n]
83+
}
84+
3485
#[cfg(test)]
3586
mod tests {
36-
use super::edit_distance;
87+
use super::*;
3788

3889
#[test]
3990
fn equal_strings() {
4091
assert_eq!(0, edit_distance("Hello, world!", "Hello, world!"));
92+
assert_eq!(0, edit_distance_se("Hello, world!", "Hello, world!"));
4193
assert_eq!(0, edit_distance("Test_Case_#1", "Test_Case_#1"));
94+
assert_eq!(0, edit_distance_se("Test_Case_#1", "Test_Case_#1"));
4295
}
4396

4497
#[test]
4598
fn one_edit_difference() {
4699
assert_eq!(1, edit_distance("Hello, world!", "Hell, world!"));
47100
assert_eq!(1, edit_distance("Test_Case_#1", "Test_Case_#2"));
48101
assert_eq!(1, edit_distance("Test_Case_#1", "Test_Case_#10"));
102+
assert_eq!(1, edit_distance_se("Hello, world!", "Hell, world!"));
103+
assert_eq!(1, edit_distance_se("Test_Case_#1", "Test_Case_#2"));
104+
assert_eq!(1, edit_distance_se("Test_Case_#1", "Test_Case_#10"));
49105
}
50106

51107
#[test]
52108
fn several_differences() {
53109
assert_eq!(2, edit_distance("My Cat", "My Case"));
54110
assert_eq!(7, edit_distance("Hello, world!", "Goodbye, world!"));
55111
assert_eq!(6, edit_distance("Test_Case_#3", "Case #3"));
112+
assert_eq!(2, edit_distance_se("My Cat", "My Case"));
113+
assert_eq!(7, edit_distance_se("Hello, world!", "Goodbye, world!"));
114+
assert_eq!(6, edit_distance_se("Test_Case_#3", "Case #3"));
56115
}
57116
}

0 commit comments

Comments
 (0)