1 | //===--------------------- DfaEmitter.h -----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // Defines a generic automaton builder. This takes a set of transitions and |
9 | // states that represent a nondeterministic finite state automaton (NFA) and |
10 | // emits a determinized DFA in a form that include/llvm/Support/Automaton.h can |
11 | // drive. |
12 | // |
13 | // See file llvm/TableGen/Automaton.td for the TableGen API definition. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H |
18 | #define LLVM_UTILS_TABLEGEN_DFAEMITTER_H |
19 | |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/UniqueVector.h" |
22 | #include <map> |
23 | #include <set> |
24 | #include <utility> |
25 | #include <vector> |
26 | |
27 | namespace llvm { |
28 | |
29 | class raw_ostream; |
30 | class StringRef; |
31 | |
32 | /// Construct a deterministic finite state automaton from possible |
33 | /// nondeterministic state and transition data. |
34 | /// |
35 | /// The state type is a 64-bit unsigned integer. The generated automaton is |
36 | /// invariant to the sparsity of the state representation - its size is only |
37 | /// a function of the cardinality of the set of states. |
38 | /// |
39 | /// The inputs to this emitter are considered to define a nondeterministic |
40 | /// finite state automaton (NFA). This is then converted to a DFA during |
41 | /// emission. The emitted tables can be used to by |
42 | /// include/llvm/Support/Automaton.h. |
43 | class DfaEmitter { |
44 | public: |
45 | // The type of an NFA state. The initial state is always zero. |
46 | using state_type = uint64_t; |
47 | // The type of an action. |
48 | using action_type = uint64_t; |
49 | |
50 | DfaEmitter() = default; |
51 | virtual ~DfaEmitter() = default; |
52 | |
53 | void addTransition(state_type From, state_type To, action_type A); |
54 | void emit(StringRef Name, raw_ostream &OS); |
55 | |
56 | protected: |
57 | /// Emit the C++ type of an action to OS. |
58 | virtual void printActionType(raw_ostream &OS); |
59 | /// Emit the C++ value of an action A to OS. |
60 | virtual void printActionValue(action_type A, raw_ostream &OS); |
61 | |
62 | private: |
63 | /// The state type of deterministic states. These are only used internally to |
64 | /// this class. This is an ID into the DfaStates UniqueVector. |
65 | using dfa_state_type = unsigned; |
66 | |
67 | /// The actual representation of a DFA state, which is a union of one or more |
68 | /// NFA states. |
69 | using DfaState = SmallVector<state_type, 4>; |
70 | |
71 | /// A DFA transition consists of a set of NFA states transitioning to a |
72 | /// new set of NFA states. The DfaTransitionInfo tracks, for every |
73 | /// transitioned-from NFA state, a set of valid transitioned-to states. |
74 | /// |
75 | /// Emission of this transition relation allows algorithmic determination of |
76 | /// the possible candidate NFA paths taken under a given input sequence to |
77 | /// reach a given DFA state. |
78 | using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>; |
79 | |
80 | /// The set of all possible actions. |
81 | std::set<action_type> Actions; |
82 | |
83 | /// The set of nondeterministic transitions. A state-action pair can |
84 | /// transition to multiple target states. |
85 | std::map<std::pair<state_type, action_type>, std::vector<state_type>> |
86 | NfaTransitions; |
87 | std::set<state_type> NfaStates; |
88 | unsigned NumNfaTransitions = 0; |
89 | |
90 | /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, |
91 | /// which is dfa_state_type. Note that because UniqueVector reserves state |
92 | /// zero, the initial DFA state is always 1. |
93 | UniqueVector<DfaState> DfaStates; |
94 | /// The set of deterministic transitions. A state-action pair has only a |
95 | /// single target state. |
96 | std::map<std::pair<dfa_state_type, action_type>, |
97 | std::pair<dfa_state_type, DfaTransitionInfo>> |
98 | DfaTransitions; |
99 | |
100 | /// Visit all NFA states and construct the DFA. |
101 | void constructDfa(); |
102 | /// Visit a single DFA state and construct all possible transitions to new DFA |
103 | /// states. |
104 | void visitDfaState(const DfaState &DS); |
105 | }; |
106 | |
107 | } // namespace llvm |
108 | |
109 | #endif |
110 | |