Skip to content

[pull] master from TheAlgorithms:master #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ These are for demonstration purposes only.
- [x] [Reverse](./src/string/reverse.rs)
- [x] [Run Length Encoding](.src/string/run_length_encoding.rs)
- [x] [Hamming Distance](./src/string/hamming_distance.rs)
- [x] [Suffix Tree](./src/string/suffix_tree.rs)

## [General](./src/general)

Expand Down
39 changes: 37 additions & 2 deletions src/string/aho_corasick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ impl AhoCorasick {
pub fn search<'a>(&self, s: &'a str) -> Vec<&'a str> {
let mut ans = vec![];
let mut cur = Rc::clone(&self.root);
for (i, c) in s.chars().enumerate() {
let mut position: usize = 0;
for (_, c) in s.chars().enumerate() {
loop {
if let Some(child) = Rc::clone(&cur).borrow().trans.get(&c) {
cur = Rc::clone(child);
Expand All @@ -76,8 +77,9 @@ impl AhoCorasick {
None => break,
}
}
position += c.len_utf8();
for &len in &cur.borrow().lengths {
ans.push(&s[i + 1 - len..=i]);
ans.push(&s[position - len..position]);
}
}
ans
Expand All @@ -95,4 +97,37 @@ mod tests {
let res = ac.search("ababcxyzacxy12678acxy6543");
assert_eq!(res, ["abc", "xyz", "acxy", "678", "acxy", "6543",]);
}

#[test]
fn test_aho_corasick_with_utf8() {
let dict = [
"abc",
"中文",
"abc中",
"abcd",
"xyz",
"acxy",
"efg",
"123",
"678",
"6543",
"ハンバーガー",
];
let ac = AhoCorasick::new(&dict);
let res = ac.search("ababc中xyzacxy12678acxyハンバーガー6543中文");
assert_eq!(
res,
[
"abc",
"abc中",
"xyz",
"acxy",
"678",
"acxy",
"ハンバーガー",
"6543",
"中文"
]
);
}
}
2 changes: 2 additions & 0 deletions src/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod manacher;
mod rabin_karp;
mod reverse;
mod run_length_encoding;
mod suffix_tree;
mod z_algorithm;

pub use self::aho_corasick::AhoCorasick;
Expand All @@ -18,5 +19,6 @@ pub use self::manacher::manacher;
pub use self::rabin_karp::rabin_karp;
pub use self::reverse::reverse;
pub use self::run_length_encoding::{run_length_decoding, run_length_encoding};
pub use self::suffix_tree::{Node, SuffixTree};
pub use self::z_algorithm::match_pattern;
pub use self::z_algorithm::z_array;
152 changes: 152 additions & 0 deletions src/string/suffix_tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// In computer science, a suffix tree (also called PAT tree or, in an earlier form, position tree)
// is a compressed trie containing all the suffixes of the given text as their keys and positions
// in the text as their values. Suffix trees allow particularly fast implementations of many
// important string operations. Source: https://en.wikipedia.org/wiki/Suffix_tree

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Node {
pub sub: String, // substring of input string
pub ch: Vec<usize>, // vector of child nodes
}

impl Node {
fn new(sub: String, children: Vec<usize>) -> Self {
Node {
sub,
ch: children.to_vec(),
}
}
pub fn empty() -> Self {
Node {
sub: "".to_string(),
ch: vec![],
}
}
}

pub struct SuffixTree {
pub nodes: Vec<Node>,
}

impl SuffixTree {
pub fn new(s: String) -> Self {
let mut suf_tree = SuffixTree {
nodes: vec![Node::empty()],
};
for i in 0..s.len() {
let (_, substr) = s.split_at(i);
suf_tree.add_suffix(substr);
}
suf_tree
}
fn add_suffix(&mut self, suf: &str) {
let mut n = 0;
let mut i = 0;
while i < suf.len() {
let b = suf.chars().nth(i);
let mut x2 = 0;
let mut n2: usize;
loop {
let children = &self.nodes[n].ch;
if children.len() == x2 {
n2 = self.nodes.len();
self.nodes.push(Node::new(
{
let (_, sub) = suf.split_at(i);
sub.to_string()
},
vec![],
));
self.nodes[n].ch.push(n2);
return;
}
n2 = children[x2];
if self.nodes[n2].sub.chars().next() == b {
break;
}
x2 += 1;
}
let sub2 = self.nodes[n2].sub.clone();
let mut j = 0;
while j < sub2.len() {
if suf.chars().nth(i + j) != sub2.chars().nth(j) {
let n3 = n2;
n2 = self.nodes.len();
self.nodes.push(Node::new(
{
let (sub, _) = sub2.split_at(j);
sub.to_string()
},
vec![n3],
));
let (_, temp_sub) = sub2.split_at(j);
self.nodes[n3].sub = temp_sub.to_string();
self.nodes[n].ch[x2] = n2;
break;
}
j += 1;
}
i += j;
n = n2;
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_suffix_tree() {
let suf_tree = SuffixTree::new("banana$".to_string());
assert_eq!(
suf_tree.nodes,
vec![
Node {
sub: "".to_string(),
ch: vec![1, 8, 6, 10]
},
Node {
sub: "banana$".to_string(),
ch: vec![]
},
Node {
sub: "na$".to_string(),
ch: vec![]
},
Node {
sub: "na$".to_string(),
ch: vec![]
},
Node {
sub: "na".to_string(),
ch: vec![2, 5]
},
Node {
sub: "$".to_string(),
ch: vec![]
},
Node {
sub: "na".to_string(),
ch: vec![3, 7]
},
Node {
sub: "$".to_string(),
ch: vec![]
},
Node {
sub: "a".to_string(),
ch: vec![4, 9]
},
Node {
sub: "$".to_string(),
ch: vec![]
},
Node {
sub: "$".to_string(),
ch: vec![]
}
]
);
}
}