Algoteka
Log in to submit your own samples!

Suffix Automaton

Problem by oml1111
# Tech tags Title Creator Created date
1 0
2022-11-20 21:51
View all samples for this language (1 verified and 0 unverified)

O(n) construction O(m) query | C++ |

By oml1111 |
0 likes

A commented implementation of the suffix automaton data structure. Construction is done in \(O(n)\) time, and the automaton allows for queries of whether some other string is a substring of the base string in \(O(m)\) (\(n\) is the length of the base string, \(m\) is the length of the query string).

Code

#include<map>
#include<deque>
#include<string>


struct SuffixAutomaton {
    struct Node {
        int index;
        // Leftmost end-position of a maximal substring of this node.
        // If string is 'dabcbcx', and node contains substrings 'bc', and 'c', then end_pos = 4 and max_len = 2
        int end_pos;
        int max_len;  //
        std::map<char, Node*> arcs;  // Arcs for each character
        Node* link = NULL;  // Suffix link
        
        Node(int index = 0, int end_pos = 0, int max_len = 0, Node* link = NULL) {
            this->index = index;
            this->end_pos = end_pos;
            this->max_len = max_len;
            this->link = link;
        }
    };
    
    std::deque<Node> nodes_;
    Node* terminal_node_;
    
    SuffixAutomaton() {
        nodes_.resize(1);
        terminal_node_ = &nodes_.back();
    }
    
    Node* get_root() {
        return &nodes_[0];
    }
    
    void add_char(char c) {
        Node* cur_node = terminal_node_;
        nodes_.push_back(Node(nodes_.size(), cur_node->end_pos+1, cur_node->max_len+1, &nodes_[0]));
        terminal_node_ = &nodes_.back();
        
        while(cur_node != NULL) {
            if(cur_node->arcs.count(c) == 0) {  // No arc from this node for the added character
                cur_node->arcs[c] = terminal_node_;  // Add arc to terminal
                cur_node = cur_node->link;
            } else {
                Node* next = cur_node->arcs[c];  // Arc exists, so follow it
                terminal_node_->link = next;
                
                if(next->max_len > cur_node->max_len + 1) {  // Terminal adds a new end position
                    // Thus we need to create a new node that accounts for the larger set of end positions 
                    nodes_.push_back(*next);
                    Node* copy = &nodes_.back();
                    copy->index = nodes_.size()-1;
                    
                    copy->max_len = cur_node->max_len + 1;
                    next->link = copy;  // Copy contains suffixes of the previous arced node, so create a link
                    terminal_node_->link = copy;
                    
                    while(cur_node != NULL && cur_node->arcs.count(c) && cur_node->arcs[c] == next) {
                        // Update the linked suffixes of the node that preceeded character addition
                        // They need to point to the new node that contains the updated set of possible endpoints
                        cur_node->arcs[c] = copy;
                        cur_node = cur_node->link;
                    }
                }
                return;
            }
        }
    }
    
    void init(std::string str) {
        for(char c : str) {
            add_char(c);
        }
    }
    
    bool contains(std::string substr) {
        Node* cur_node = get_root();
        
        for(char c : substr) {
            if(!cur_node->arcs.count(c)) {
                return false;
            }
            cur_node = cur_node->arcs[c];
        }
        return true;
    }
};


int main() {
    SuffixAutomaton automaton;
    automaton.init("ababcbdab");
    bool result = automaton.contains("abcb");

    return 0;
}

Further reading

An Illustrated Tutorial on Suffix Automata - medium.com
Suffix Automation - cp-algorithms.com

References

classes
std::deque en.cppreference.com cplusplus.com
std::map en.cppreference.com cplusplus.com
std::string en.cppreference.com cplusplus.com
functions
std::deque::back en.cppreference.com cplusplus.com
std::deque::operator[] en.cppreference.com cplusplus.com
std::deque::push_back en.cppreference.com cplusplus.com
std::deque::resize en.cppreference.com cplusplus.com
std::deque::size en.cppreference.com cplusplus.com
std::map::count en.cppreference.com cplusplus.com
std::map::operator[] en.cppreference.com cplusplus.com

Problem Description

Construct a suffix automaton for some string, and then use it to find whether some other string exists as a substring in the first one.

Suffix automaton - wikipedia.org

View sample discussion (0 comments)
View problem discussion (0 comments)