Skip to content

Commit 2e3a6f9

Browse files
authored
add suffix tree algorithm (TheAlgorithms#381)
1 parent 9f5bd84 commit 2e3a6f9

File tree

3 files changed

+155
-0
lines changed

3 files changed

+155
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ These are for demonstration purposes only.
118118
- [x] [Reverse](./src/string/reverse.rs)
119119
- [x] [Run Length Encoding](.src/string/run_length_encoding.rs)
120120
- [x] [Hamming Distance](./src/string/hamming_distance.rs)
121+
- [x] [Suffix Tree](./src/string/suffix_tree.rs)
121122

122123
## [General](./src/general)
123124

src/string/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod manacher;
66
mod rabin_karp;
77
mod reverse;
88
mod run_length_encoding;
9+
mod suffix_tree;
910
mod z_algorithm;
1011

1112
pub use self::aho_corasick::AhoCorasick;
@@ -18,5 +19,6 @@ pub use self::manacher::manacher;
1819
pub use self::rabin_karp::rabin_karp;
1920
pub use self::reverse::reverse;
2021
pub use self::run_length_encoding::{run_length_decoding, run_length_encoding};
22+
pub use self::suffix_tree::{Node, SuffixTree};
2123
pub use self::z_algorithm::match_pattern;
2224
pub use self::z_algorithm::z_array;

src/string/suffix_tree.rs

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// In computer science, a suffix tree (also called PAT tree or, in an earlier form, position tree)
2+
// is a compressed trie containing all the suffixes of the given text as their keys and positions
3+
// in the text as their values. Suffix trees allow particularly fast implementations of many
4+
// important string operations. Source: https://en.wikipedia.org/wiki/Suffix_tree
5+
6+
#[derive(Debug, PartialEq, Eq, Clone)]
7+
pub struct Node {
8+
pub sub: String, // substring of input string
9+
pub ch: Vec<usize>, // vector of child nodes
10+
}
11+
12+
impl Node {
13+
fn new(sub: String, children: Vec<usize>) -> Self {
14+
Node {
15+
sub,
16+
ch: children.to_vec(),
17+
}
18+
}
19+
pub fn empty() -> Self {
20+
Node {
21+
sub: "".to_string(),
22+
ch: vec![],
23+
}
24+
}
25+
}
26+
27+
pub struct SuffixTree {
28+
pub nodes: Vec<Node>,
29+
}
30+
31+
impl SuffixTree {
32+
pub fn new(s: String) -> Self {
33+
let mut suf_tree = SuffixTree {
34+
nodes: vec![Node::empty()],
35+
};
36+
for i in 0..s.len() {
37+
let (_, substr) = s.split_at(i);
38+
suf_tree.add_suffix(substr);
39+
}
40+
suf_tree
41+
}
42+
fn add_suffix(&mut self, suf: &str) {
43+
let mut n = 0;
44+
let mut i = 0;
45+
while i < suf.len() {
46+
let b = suf.chars().nth(i);
47+
let mut x2 = 0;
48+
let mut n2: usize;
49+
loop {
50+
let children = &self.nodes[n].ch;
51+
if children.len() == x2 {
52+
n2 = self.nodes.len();
53+
self.nodes.push(Node::new(
54+
{
55+
let (_, sub) = suf.split_at(i);
56+
sub.to_string()
57+
},
58+
vec![],
59+
));
60+
self.nodes[n].ch.push(n2);
61+
return;
62+
}
63+
n2 = children[x2];
64+
if self.nodes[n2].sub.chars().next() == b {
65+
break;
66+
}
67+
x2 += 1;
68+
}
69+
let sub2 = self.nodes[n2].sub.clone();
70+
let mut j = 0;
71+
while j < sub2.len() {
72+
if suf.chars().nth(i + j) != sub2.chars().nth(j) {
73+
let n3 = n2;
74+
n2 = self.nodes.len();
75+
self.nodes.push(Node::new(
76+
{
77+
let (sub, _) = sub2.split_at(j);
78+
sub.to_string()
79+
},
80+
vec![n3],
81+
));
82+
let (_, temp_sub) = sub2.split_at(j);
83+
self.nodes[n3].sub = temp_sub.to_string();
84+
self.nodes[n].ch[x2] = n2;
85+
break;
86+
}
87+
j += 1;
88+
}
89+
i += j;
90+
n = n2;
91+
}
92+
}
93+
}
94+
95+
#[cfg(test)]
96+
mod tests {
97+
use super::*;
98+
99+
#[test]
100+
fn test_suffix_tree() {
101+
let suf_tree = SuffixTree::new("banana$".to_string());
102+
assert_eq!(
103+
suf_tree.nodes,
104+
vec![
105+
Node {
106+
sub: "".to_string(),
107+
ch: vec![1, 8, 6, 10]
108+
},
109+
Node {
110+
sub: "banana$".to_string(),
111+
ch: vec![]
112+
},
113+
Node {
114+
sub: "na$".to_string(),
115+
ch: vec![]
116+
},
117+
Node {
118+
sub: "na$".to_string(),
119+
ch: vec![]
120+
},
121+
Node {
122+
sub: "na".to_string(),
123+
ch: vec![2, 5]
124+
},
125+
Node {
126+
sub: "$".to_string(),
127+
ch: vec![]
128+
},
129+
Node {
130+
sub: "na".to_string(),
131+
ch: vec![3, 7]
132+
},
133+
Node {
134+
sub: "$".to_string(),
135+
ch: vec![]
136+
},
137+
Node {
138+
sub: "a".to_string(),
139+
ch: vec![4, 9]
140+
},
141+
Node {
142+
sub: "$".to_string(),
143+
ch: vec![]
144+
},
145+
Node {
146+
sub: "$".to_string(),
147+
ch: vec![]
148+
}
149+
]
150+
);
151+
}
152+
}

0 commit comments

Comments
 (0)