| 1 | //===--------------------- DfaEmitter.h -----------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // Defines a generic automaton builder. This takes a set of transitions and |
| 9 | // states that represent a nondeterministic finite state automaton (NFA) and |
| 10 | // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can |
| 11 | // drive. |
| 12 | // |
| 13 | // See file llvm/TableGen/Automaton.td for the TableGen API definition. |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H |
| 18 | #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H |
| 19 | |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/UniqueVector.h" |
| 22 | #include <map> |
| 23 | #include <set> |
| 24 | #include <utility> |
| 25 | #include <vector> |
| 26 | |
| 27 | namespace llvm { |
| 28 | |
| 29 | class raw_ostream; |
| 30 | class StringRef; |
| 31 | |
| 32 | /// Construct a deterministic finite state automaton from possible |
| 33 | /// nondeterministic state and transition data. |
| 34 | /// |
| 35 | /// The state type is a 64-bit unsigned integer. The generated automaton is |
| 36 | /// invariant to the sparsity of the state representation - its size is only |
| 37 | /// a function of the cardinality of the set of states. |
| 38 | /// |
| 39 | /// The inputs to this emitter are considered to define a nondeterministic |
| 40 | /// finite state automaton (NFA). This is then converted to a DFA during |
| 41 | /// emission. The emitted tables can be used to by |
| 42 | /// include/llvm/Support/Automaton.h. |
| 43 | class DfaEmitter { |
| 44 | public: |
| 45 | // The type of an NFA state. The initial state is always zero. |
| 46 | using state_type = uint64_t; |
| 47 | // The type of an action. |
| 48 | using action_type = uint64_t; |
| 49 | |
| 50 | DfaEmitter() = default; |
| 51 | virtual ~DfaEmitter() = default; |
| 52 | |
| 53 | void addTransition(state_type From, state_type To, action_type A); |
| 54 | void emit(StringRef Name, raw_ostream &OS); |
| 55 | |
| 56 | protected: |
| 57 | /// Emit the C++ type of an action to OS. |
| 58 | virtual void printActionType(raw_ostream &OS); |
| 59 | /// Emit the C++ value of an action A to OS. |
| 60 | virtual void printActionValue(action_type A, raw_ostream &OS); |
| 61 | |
| 62 | private: |
| 63 | /// The state type of deterministic states. These are only used internally to |
| 64 | /// this class. This is an ID into the DfaStates UniqueVector. |
| 65 | using dfa_state_type = unsigned; |
| 66 | |
| 67 | /// The actual representation of a DFA state, which is a union of one or more |
| 68 | /// NFA states. |
| 69 | using DfaState = SmallVector<state_type, 4>; |
| 70 | |
| 71 | /// A DFA transition consists of a set of NFA states transitioning to a |
| 72 | /// new set of NFA states. The DfaTransitionInfo tracks, for every |
| 73 | /// transitioned-from NFA state, a set of valid transitioned-to states. |
| 74 | /// |
| 75 | /// Emission of this transition relation allows algorithmic determination of |
| 76 | /// the possible candidate NFA paths taken under a given input sequence to |
| 77 | /// reach a given DFA state. |
| 78 | using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>; |
| 79 | |
| 80 | /// The set of all possible actions. |
| 81 | std::set<action_type> Actions; |
| 82 | |
| 83 | /// The set of nondeterministic transitions. A state-action pair can |
| 84 | /// transition to multiple target states. |
| 85 | std::map<std::pair<state_type, action_type>, std::vector<state_type>> |
| 86 | NfaTransitions; |
| 87 | std::set<state_type> NfaStates; |
| 88 | unsigned NumNfaTransitions = 0; |
| 89 | |
| 90 | /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, |
| 91 | /// which is dfa_state_type. Note that because UniqueVector reserves state |
| 92 | /// zero, the initial DFA state is always 1. |
| 93 | UniqueVector<DfaState> DfaStates; |
| 94 | /// The set of deterministic transitions. A state-action pair has only a |
| 95 | /// single target state. |
| 96 | std::map<std::pair<dfa_state_type, action_type>, |
| 97 | std::pair<dfa_state_type, DfaTransitionInfo>> |
| 98 | DfaTransitions; |
| 99 | |
| 100 | /// Visit all NFA states and construct the DFA. |
| 101 | void constructDfa(); |
| 102 | /// Visit a single DFA state and construct all possible transitions to new DFA |
| 103 | /// states. |
| 104 | void visitDfaState(const DfaState &DS); |
| 105 | }; |
| 106 | |
| 107 | } // namespace llvm |
| 108 | |
| 109 | #endif |
| 110 | |