1*c8dee2aaSAndroid Build Coastguard Worker /* 2*c8dee2aaSAndroid Build Coastguard Worker * Copyright 2017 Google Inc. 3*c8dee2aaSAndroid Build Coastguard Worker * 4*c8dee2aaSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license that can be 5*c8dee2aaSAndroid Build Coastguard Worker * found in the LICENSE file. 6*c8dee2aaSAndroid Build Coastguard Worker */ 7*c8dee2aaSAndroid Build Coastguard Worker #ifndef NFAtoDFA_DEFINED 8*c8dee2aaSAndroid Build Coastguard Worker #define NFAtoDFA_DEFINED 9*c8dee2aaSAndroid Build Coastguard Worker 10*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/DFA.h" 11*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/DFAState.h" 12*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/NFA.h" 13*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/NFAState.h" 14*c8dee2aaSAndroid Build Coastguard Worker 15*c8dee2aaSAndroid Build Coastguard Worker #include <algorithm> 16*c8dee2aaSAndroid Build Coastguard Worker #include <climits> 17*c8dee2aaSAndroid Build Coastguard Worker #include <memory> 18*c8dee2aaSAndroid Build Coastguard Worker #include <unordered_map> 19*c8dee2aaSAndroid Build Coastguard Worker #include <set> 20*c8dee2aaSAndroid Build Coastguard Worker #include <vector> 21*c8dee2aaSAndroid Build Coastguard Worker 22*c8dee2aaSAndroid Build Coastguard Worker /** 23*c8dee2aaSAndroid Build Coastguard Worker * Converts a nondeterministic finite automaton to a deterministic finite automaton. Since NFAs and 24*c8dee2aaSAndroid Build Coastguard Worker * DFAs differ only in that an NFA allows multiple states at the same time, we can find each 25*c8dee2aaSAndroid Build Coastguard Worker * possible combination of simultaneous NFA states and give this combination a label. These labelled 26*c8dee2aaSAndroid Build Coastguard Worker * nodes are our DFA nodes, since we can only be in one such unique set of NFA states at a time. 27*c8dee2aaSAndroid Build Coastguard Worker * 28*c8dee2aaSAndroid Build Coastguard Worker * As an NFA can end up in multiple accept states at the same time (for instance, the token "while" 29*c8dee2aaSAndroid Build Coastguard Worker * is valid for both WHILE and IDENTIFIER), we disambiguate by preferring the first matching regex 30*c8dee2aaSAndroid Build Coastguard Worker * (in terms of the order in which they were added to the NFA). 31*c8dee2aaSAndroid Build Coastguard Worker */ 32*c8dee2aaSAndroid Build Coastguard Worker class NFAtoDFA { 33*c8dee2aaSAndroid Build Coastguard Worker public: 34*c8dee2aaSAndroid Build Coastguard Worker inline static constexpr char START_CHAR = 9; 35*c8dee2aaSAndroid Build Coastguard Worker inline static constexpr char END_CHAR = 126; 36*c8dee2aaSAndroid Build Coastguard Worker NFAtoDFA(NFA * nfa)37*c8dee2aaSAndroid Build Coastguard Worker NFAtoDFA(NFA* nfa) 38*c8dee2aaSAndroid Build Coastguard Worker : fNFA(*nfa) {} 39*c8dee2aaSAndroid Build Coastguard Worker 40*c8dee2aaSAndroid Build Coastguard Worker /** 41*c8dee2aaSAndroid Build Coastguard Worker * Returns a DFA created from the NFA. 42*c8dee2aaSAndroid Build Coastguard Worker */ convert()43*c8dee2aaSAndroid Build Coastguard Worker DFA convert() { 44*c8dee2aaSAndroid Build Coastguard Worker // create state 0, the "reject" state 45*c8dee2aaSAndroid Build Coastguard Worker getState(DFAState::Label({})); 46*c8dee2aaSAndroid Build Coastguard Worker // create a state representing being in all of the NFA's start states at once 47*c8dee2aaSAndroid Build Coastguard Worker std::vector<int> startStates = fNFA.fStartStates; 48*c8dee2aaSAndroid Build Coastguard Worker std::sort(startStates.begin(), startStates.end()); 49*c8dee2aaSAndroid Build Coastguard Worker // this becomes state 1, our start state 50*c8dee2aaSAndroid Build Coastguard Worker DFAState* start = getState(DFAState::Label(startStates)); 51*c8dee2aaSAndroid Build Coastguard Worker this->scanState(start); 52*c8dee2aaSAndroid Build Coastguard Worker 53*c8dee2aaSAndroid Build Coastguard Worker this->computeMappings(); 54*c8dee2aaSAndroid Build Coastguard Worker 55*c8dee2aaSAndroid Build Coastguard Worker int stateCount = 0; 56*c8dee2aaSAndroid Build Coastguard Worker for (const auto& row : fTransitions) { 57*c8dee2aaSAndroid Build Coastguard Worker stateCount = std::max(stateCount, (int) row.size()); 58*c8dee2aaSAndroid Build Coastguard Worker } 59*c8dee2aaSAndroid Build Coastguard Worker return DFA(fCharMappings, fTransitions, fAccepts); 60*c8dee2aaSAndroid Build Coastguard Worker } 61*c8dee2aaSAndroid Build Coastguard Worker 62*c8dee2aaSAndroid Build Coastguard Worker private: 63*c8dee2aaSAndroid Build Coastguard Worker /** 64*c8dee2aaSAndroid Build Coastguard Worker * Returns an existing state with the given label, or creates a new one and returns it. 65*c8dee2aaSAndroid Build Coastguard Worker */ getState(DFAState::Label label)66*c8dee2aaSAndroid Build Coastguard Worker DFAState* getState(DFAState::Label label) { 67*c8dee2aaSAndroid Build Coastguard Worker auto found = fStates.find(label); 68*c8dee2aaSAndroid Build Coastguard Worker if (found == fStates.end()) { 69*c8dee2aaSAndroid Build Coastguard Worker int id = fStates.size(); 70*c8dee2aaSAndroid Build Coastguard Worker fStates[label] = std::unique_ptr<DFAState>(new DFAState(id, label)); 71*c8dee2aaSAndroid Build Coastguard Worker return fStates[label].get(); 72*c8dee2aaSAndroid Build Coastguard Worker } 73*c8dee2aaSAndroid Build Coastguard Worker return found->second.get(); 74*c8dee2aaSAndroid Build Coastguard Worker } 75*c8dee2aaSAndroid Build Coastguard Worker add(int nfaState,std::vector<int> * states)76*c8dee2aaSAndroid Build Coastguard Worker void add(int nfaState, std::vector<int>* states) { 77*c8dee2aaSAndroid Build Coastguard Worker NFAState state = fNFA.fStates[nfaState]; 78*c8dee2aaSAndroid Build Coastguard Worker if (state.fKind == NFAState::kRemapped_Kind) { 79*c8dee2aaSAndroid Build Coastguard Worker for (int next : state.fData) { 80*c8dee2aaSAndroid Build Coastguard Worker this->add(next, states); 81*c8dee2aaSAndroid Build Coastguard Worker } 82*c8dee2aaSAndroid Build Coastguard Worker } else { 83*c8dee2aaSAndroid Build Coastguard Worker for (int entry : *states) { 84*c8dee2aaSAndroid Build Coastguard Worker if (nfaState == entry) { 85*c8dee2aaSAndroid Build Coastguard Worker return; 86*c8dee2aaSAndroid Build Coastguard Worker } 87*c8dee2aaSAndroid Build Coastguard Worker } 88*c8dee2aaSAndroid Build Coastguard Worker states->push_back(nfaState); 89*c8dee2aaSAndroid Build Coastguard Worker } 90*c8dee2aaSAndroid Build Coastguard Worker } 91*c8dee2aaSAndroid Build Coastguard Worker addTransition(char c,int start,int next)92*c8dee2aaSAndroid Build Coastguard Worker void addTransition(char c, int start, int next) { 93*c8dee2aaSAndroid Build Coastguard Worker while (fTransitions.size() <= (size_t) c) { 94*c8dee2aaSAndroid Build Coastguard Worker fTransitions.push_back(std::vector<int>()); 95*c8dee2aaSAndroid Build Coastguard Worker } 96*c8dee2aaSAndroid Build Coastguard Worker std::vector<int>& row = fTransitions[c]; 97*c8dee2aaSAndroid Build Coastguard Worker while (row.size() <= (size_t) start) { 98*c8dee2aaSAndroid Build Coastguard Worker row.push_back(INVALID); 99*c8dee2aaSAndroid Build Coastguard Worker } 100*c8dee2aaSAndroid Build Coastguard Worker row[start] = next; 101*c8dee2aaSAndroid Build Coastguard Worker } 102*c8dee2aaSAndroid Build Coastguard Worker scanState(DFAState * state)103*c8dee2aaSAndroid Build Coastguard Worker void scanState(DFAState* state) { 104*c8dee2aaSAndroid Build Coastguard Worker state->fIsScanned = true; 105*c8dee2aaSAndroid Build Coastguard Worker for (char c = START_CHAR; c <= END_CHAR; ++c) { 106*c8dee2aaSAndroid Build Coastguard Worker std::vector<int> next; 107*c8dee2aaSAndroid Build Coastguard Worker int bestAccept = INT_MAX; 108*c8dee2aaSAndroid Build Coastguard Worker for (int idx : state->fLabel.fStates) { 109*c8dee2aaSAndroid Build Coastguard Worker const NFAState& nfaState = fNFA.fStates[idx]; 110*c8dee2aaSAndroid Build Coastguard Worker if (nfaState.accept(c)) { 111*c8dee2aaSAndroid Build Coastguard Worker for (int nextState : nfaState.fNext) { 112*c8dee2aaSAndroid Build Coastguard Worker if (fNFA.fStates[nextState].fKind == NFAState::kAccept_Kind) { 113*c8dee2aaSAndroid Build Coastguard Worker bestAccept = std::min(bestAccept, fNFA.fStates[nextState].fData[0]); 114*c8dee2aaSAndroid Build Coastguard Worker } 115*c8dee2aaSAndroid Build Coastguard Worker this->add(nextState, &next); 116*c8dee2aaSAndroid Build Coastguard Worker } 117*c8dee2aaSAndroid Build Coastguard Worker } 118*c8dee2aaSAndroid Build Coastguard Worker } 119*c8dee2aaSAndroid Build Coastguard Worker std::sort(next.begin(), next.end()); 120*c8dee2aaSAndroid Build Coastguard Worker DFAState* nextState = this->getState(DFAState::Label(next)); 121*c8dee2aaSAndroid Build Coastguard Worker this->addTransition(c, state->fId, nextState->fId); 122*c8dee2aaSAndroid Build Coastguard Worker if (bestAccept != INT_MAX) { 123*c8dee2aaSAndroid Build Coastguard Worker while (fAccepts.size() <= (size_t) nextState->fId) { 124*c8dee2aaSAndroid Build Coastguard Worker fAccepts.push_back(INVALID); 125*c8dee2aaSAndroid Build Coastguard Worker } 126*c8dee2aaSAndroid Build Coastguard Worker fAccepts[nextState->fId] = bestAccept; 127*c8dee2aaSAndroid Build Coastguard Worker } 128*c8dee2aaSAndroid Build Coastguard Worker if (!nextState->fIsScanned) { 129*c8dee2aaSAndroid Build Coastguard Worker this->scanState(nextState); 130*c8dee2aaSAndroid Build Coastguard Worker } 131*c8dee2aaSAndroid Build Coastguard Worker } 132*c8dee2aaSAndroid Build Coastguard Worker } 133*c8dee2aaSAndroid Build Coastguard Worker 134*c8dee2aaSAndroid Build Coastguard Worker // collapse rows with the same transitions to a single row. This is common, as each row 135*c8dee2aaSAndroid Build Coastguard Worker // represents a character and often there are many characters for which all transitions are 136*c8dee2aaSAndroid Build Coastguard Worker // identical (e.g. [0-9] are treated the same way by all lexer rules) computeMappings()137*c8dee2aaSAndroid Build Coastguard Worker void computeMappings() { 138*c8dee2aaSAndroid Build Coastguard Worker // mappings[<input row>] = <output row> 139*c8dee2aaSAndroid Build Coastguard Worker std::vector<std::vector<int>*> uniques; 140*c8dee2aaSAndroid Build Coastguard Worker // this could be done more efficiently, but O(n^2) is plenty fast for our purposes 141*c8dee2aaSAndroid Build Coastguard Worker for (size_t i = 0; i < fTransitions.size(); ++i) { 142*c8dee2aaSAndroid Build Coastguard Worker int found = -1; 143*c8dee2aaSAndroid Build Coastguard Worker for (size_t j = 0; j < uniques.size(); ++j) { 144*c8dee2aaSAndroid Build Coastguard Worker if (*uniques[j] == fTransitions[i]) { 145*c8dee2aaSAndroid Build Coastguard Worker found = j; 146*c8dee2aaSAndroid Build Coastguard Worker break; 147*c8dee2aaSAndroid Build Coastguard Worker } 148*c8dee2aaSAndroid Build Coastguard Worker } 149*c8dee2aaSAndroid Build Coastguard Worker if (found == -1) { 150*c8dee2aaSAndroid Build Coastguard Worker found = (int) uniques.size(); 151*c8dee2aaSAndroid Build Coastguard Worker uniques.push_back(&fTransitions[i]); 152*c8dee2aaSAndroid Build Coastguard Worker } 153*c8dee2aaSAndroid Build Coastguard Worker fCharMappings.push_back(found); 154*c8dee2aaSAndroid Build Coastguard Worker } 155*c8dee2aaSAndroid Build Coastguard Worker std::vector<std::vector<int>> newTransitions; 156*c8dee2aaSAndroid Build Coastguard Worker for (std::vector<int>* row : uniques) { 157*c8dee2aaSAndroid Build Coastguard Worker newTransitions.push_back(*row); 158*c8dee2aaSAndroid Build Coastguard Worker } 159*c8dee2aaSAndroid Build Coastguard Worker fTransitions = newTransitions; 160*c8dee2aaSAndroid Build Coastguard Worker } 161*c8dee2aaSAndroid Build Coastguard Worker 162*c8dee2aaSAndroid Build Coastguard Worker const NFA& fNFA; 163*c8dee2aaSAndroid Build Coastguard Worker std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates; 164*c8dee2aaSAndroid Build Coastguard Worker std::vector<std::vector<int>> fTransitions; 165*c8dee2aaSAndroid Build Coastguard Worker std::vector<int> fCharMappings; 166*c8dee2aaSAndroid Build Coastguard Worker std::vector<int> fAccepts; 167*c8dee2aaSAndroid Build Coastguard Worker }; 168*c8dee2aaSAndroid Build Coastguard Worker #endif // NFAtoDFA_DEFINED 169