# | Likes | Tech tags | Title | Creator | Created date |
---|---|---|---|---|---|
1 | 0 |
2022-11-20 21:51
|
A commented implementation of the suffix automaton data structure. Construction is done in \(O(n)\) time, and the automaton allows for queries of whether some other string is a substring of the base string in \(O(m)\) (\(n\) is the length of the base string, \(m\) is the length of the query string).
#include<map>
#include<deque>
#include<string>
struct SuffixAutomaton {
struct Node {
int index;
// Leftmost end-position of a maximal substring of this node.
// If string is 'dabcbcx', and node contains substrings 'bc', and 'c', then end_pos = 4 and max_len = 2
int end_pos;
int max_len; //
std::map<char, Node*> arcs; // Arcs for each character
Node* link = NULL; // Suffix link
Node(int index = 0, int end_pos = 0, int max_len = 0, Node* link = NULL) {
this->index = index;
this->end_pos = end_pos;
this->max_len = max_len;
this->link = link;
}
};
std::deque<Node> nodes_;
Node* terminal_node_;
SuffixAutomaton() {
nodes_.resize(1);
terminal_node_ = &nodes_.back();
}
Node* get_root() {
return &nodes_[0];
}
void add_char(char c) {
Node* cur_node = terminal_node_;
nodes_.push_back(Node(nodes_.size(), cur_node->end_pos+1, cur_node->max_len+1, &nodes_[0]));
terminal_node_ = &nodes_.back();
while(cur_node != NULL) {
if(cur_node->arcs.count(c) == 0) { // No arc from this node for the added character
cur_node->arcs[c] = terminal_node_; // Add arc to terminal
cur_node = cur_node->link;
} else {
Node* next = cur_node->arcs[c]; // Arc exists, so follow it
terminal_node_->link = next;
if(next->max_len > cur_node->max_len + 1) { // Terminal adds a new end position
// Thus we need to create a new node that accounts for the larger set of end positions
nodes_.push_back(*next);
Node* copy = &nodes_.back();
copy->index = nodes_.size()-1;
copy->max_len = cur_node->max_len + 1;
next->link = copy; // Copy contains suffixes of the previous arced node, so create a link
terminal_node_->link = copy;
while(cur_node != NULL && cur_node->arcs.count(c) && cur_node->arcs[c] == next) {
// Update the linked suffixes of the node that preceeded character addition
// They need to point to the new node that contains the updated set of possible endpoints
cur_node->arcs[c] = copy;
cur_node = cur_node->link;
}
}
return;
}
}
}
void init(std::string str) {
for(char c : str) {
add_char(c);
}
}
bool contains(std::string substr) {
Node* cur_node = get_root();
for(char c : substr) {
if(!cur_node->arcs.count(c)) {
return false;
}
cur_node = cur_node->arcs[c];
}
return true;
}
};
int main() {
SuffixAutomaton automaton;
automaton.init("ababcbdab");
bool result = automaton.contains("abcb");
return 0;
}
classes | ||
std::deque |
en.cppreference.com | cplusplus.com |
std::map |
en.cppreference.com | cplusplus.com |
std::string |
en.cppreference.com | cplusplus.com |
functions | ||
std::deque::back |
en.cppreference.com | cplusplus.com |
std::deque::operator[] |
en.cppreference.com | cplusplus.com |
std::deque::push_back |
en.cppreference.com | cplusplus.com |
std::deque::resize |
en.cppreference.com | cplusplus.com |
std::deque::size |
en.cppreference.com | cplusplus.com |
std::map::count |
en.cppreference.com | cplusplus.com |
std::map::operator[] |
en.cppreference.com | cplusplus.com |
Construct a suffix automaton for some string, and then use it to find whether some other string exists as a substring in the first one.