1 | //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares the function that lexes the machine instruction source |
10 | // string. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H |
15 | #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H |
16 | |
17 | #include "llvm/ADT/APSInt.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include <string> |
20 | |
21 | namespace llvm { |
22 | |
23 | class Twine; |
24 | |
25 | /// A token produced by the machine instruction lexer. |
26 | struct MIToken { |
27 | enum TokenKind { |
28 | // Markers |
29 | Eof, |
30 | Error, |
31 | Newline, |
32 | |
33 | // Tokens with no info. |
34 | comma, |
35 | equal, |
36 | underscore, |
37 | colon, |
38 | coloncolon, |
39 | dot, |
40 | exclaim, |
41 | lparen, |
42 | rparen, |
43 | lbrace, |
44 | rbrace, |
45 | plus, |
46 | minus, |
47 | less, |
48 | greater, |
49 | |
50 | // Keywords |
51 | kw_implicit, |
52 | kw_implicit_define, |
53 | kw_def, |
54 | kw_dead, |
55 | kw_dereferenceable, |
56 | kw_killed, |
57 | kw_undef, |
58 | kw_internal, |
59 | kw_early_clobber, |
60 | kw_debug_use, |
61 | kw_renamable, |
62 | kw_tied_def, |
63 | kw_frame_setup, |
64 | kw_frame_destroy, |
65 | kw_nnan, |
66 | kw_ninf, |
67 | kw_nsz, |
68 | kw_arcp, |
69 | kw_contract, |
70 | kw_afn, |
71 | kw_reassoc, |
72 | kw_nusw, |
73 | kw_nuw, |
74 | kw_nsw, |
75 | kw_exact, |
76 | kw_nofpexcept, |
77 | kw_unpredictable, |
78 | kw_nneg, |
79 | kw_disjoint, |
80 | kw_debug_location, |
81 | kw_debug_instr_number, |
82 | kw_dbg_instr_ref, |
83 | kw_cfi_same_value, |
84 | kw_cfi_offset, |
85 | kw_cfi_rel_offset, |
86 | kw_cfi_def_cfa_register, |
87 | kw_cfi_def_cfa_offset, |
88 | kw_cfi_adjust_cfa_offset, |
89 | kw_cfi_escape, |
90 | kw_cfi_def_cfa, |
91 | kw_cfi_llvm_def_aspace_cfa, |
92 | kw_cfi_register, |
93 | kw_cfi_remember_state, |
94 | kw_cfi_restore, |
95 | kw_cfi_restore_state, |
96 | kw_cfi_undefined, |
97 | kw_cfi_window_save, |
98 | kw_cfi_aarch64_negate_ra_sign_state, |
99 | kw_blockaddress, |
100 | kw_intrinsic, |
101 | kw_target_index, |
102 | kw_half, |
103 | kw_bfloat, |
104 | kw_float, |
105 | kw_double, |
106 | kw_x86_fp80, |
107 | kw_fp128, |
108 | kw_ppc_fp128, |
109 | kw_target_flags, |
110 | kw_volatile, |
111 | kw_non_temporal, |
112 | kw_invariant, |
113 | kw_align, |
114 | kw_basealign, |
115 | kw_addrspace, |
116 | kw_stack, |
117 | kw_got, |
118 | kw_jump_table, |
119 | kw_constant_pool, |
120 | kw_call_entry, |
121 | kw_custom, |
122 | kw_liveout, |
123 | kw_landing_pad, |
124 | kw_inlineasm_br_indirect_target, |
125 | kw_ehfunclet_entry, |
126 | kw_liveins, |
127 | kw_successors, |
128 | kw_floatpred, |
129 | kw_intpred, |
130 | kw_shufflemask, |
131 | kw_pre_instr_symbol, |
132 | kw_post_instr_symbol, |
133 | kw_heap_alloc_marker, |
134 | kw_pcsections, |
135 | kw_cfi_type, |
136 | kw_bbsections, |
137 | kw_bb_id, |
138 | kw_unknown_size, |
139 | kw_unknown_address, |
140 | kw_ir_block_address_taken, |
141 | kw_machine_block_address_taken, |
142 | kw_call_frame_size, |
143 | kw_noconvergent, |
144 | |
145 | // Metadata types. |
146 | kw_distinct, |
147 | |
148 | // Named metadata keywords |
149 | md_tbaa, |
150 | md_alias_scope, |
151 | md_noalias, |
152 | md_range, |
153 | md_diexpr, |
154 | md_dilocation, |
155 | |
156 | // Identifier tokens |
157 | Identifier, |
158 | NamedRegister, |
159 | NamedVirtualRegister, |
160 | MachineBasicBlockLabel, |
161 | MachineBasicBlock, |
162 | StackObject, |
163 | FixedStackObject, |
164 | NamedGlobalValue, |
165 | GlobalValue, |
166 | ExternalSymbol, |
167 | MCSymbol, |
168 | |
169 | // Other tokens |
170 | IntegerLiteral, |
171 | FloatingPointLiteral, |
172 | HexLiteral, |
173 | VectorLiteral, |
174 | VirtualRegister, |
175 | ConstantPoolItem, |
176 | JumpTableIndex, |
177 | NamedIRBlock, |
178 | IRBlock, |
179 | NamedIRValue, |
180 | IRValue, |
181 | QuotedIRValue, // `<constant value>` |
182 | SubRegisterIndex, |
183 | StringConstant |
184 | }; |
185 | |
186 | private: |
187 | TokenKind Kind = Error; |
188 | StringRef Range; |
189 | StringRef StringValue; |
190 | std::string StringValueStorage; |
191 | APSInt IntVal; |
192 | |
193 | public: |
194 | MIToken() = default; |
195 | |
196 | MIToken &reset(TokenKind Kind, StringRef Range); |
197 | |
198 | MIToken &setStringValue(StringRef StrVal); |
199 | MIToken &setOwnedStringValue(std::string StrVal); |
200 | MIToken &setIntegerValue(APSInt IntVal); |
201 | |
202 | TokenKind kind() const { return Kind; } |
203 | |
204 | bool isError() const { return Kind == Error; } |
205 | |
206 | bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; } |
207 | |
208 | bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; } |
209 | |
210 | bool isRegister() const { |
211 | return Kind == NamedRegister || Kind == underscore || |
212 | Kind == NamedVirtualRegister || Kind == VirtualRegister; |
213 | } |
214 | |
215 | bool isRegisterFlag() const { |
216 | return Kind == kw_implicit || Kind == kw_implicit_define || |
217 | Kind == kw_def || Kind == kw_dead || Kind == kw_killed || |
218 | Kind == kw_undef || Kind == kw_internal || |
219 | Kind == kw_early_clobber || Kind == kw_debug_use || |
220 | Kind == kw_renamable; |
221 | } |
222 | |
223 | bool isMemoryOperandFlag() const { |
224 | return Kind == kw_volatile || Kind == kw_non_temporal || |
225 | Kind == kw_dereferenceable || Kind == kw_invariant || |
226 | Kind == StringConstant; |
227 | } |
228 | |
229 | bool is(TokenKind K) const { return Kind == K; } |
230 | |
231 | bool isNot(TokenKind K) const { return Kind != K; } |
232 | |
233 | StringRef::iterator location() const { return Range.begin(); } |
234 | |
235 | StringRef range() const { return Range; } |
236 | |
237 | /// Return the token's string value. |
238 | StringRef stringValue() const { return StringValue; } |
239 | |
240 | const APSInt &integerValue() const { return IntVal; } |
241 | |
242 | bool hasIntegerValue() const { |
243 | return Kind == IntegerLiteral || Kind == MachineBasicBlock || |
244 | Kind == MachineBasicBlockLabel || Kind == StackObject || |
245 | Kind == FixedStackObject || Kind == GlobalValue || |
246 | Kind == VirtualRegister || Kind == ConstantPoolItem || |
247 | Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue; |
248 | } |
249 | }; |
250 | |
251 | /// Consume a single machine instruction token in the given source and return |
252 | /// the remaining source string. |
253 | StringRef lexMIToken( |
254 | StringRef Source, MIToken &Token, |
255 | function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback); |
256 | |
257 | } // end namespace llvm |
258 | |
259 | #endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H |
260 | |