1 | //===- MILexer.cpp - Machine instructions lexer implementation ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the lexing of machine instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MILexer.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/ADT/StringSwitch.h" |
16 | #include "llvm/ADT/Twine.h" |
17 | #include <cassert> |
18 | #include <cctype> |
19 | #include <string> |
20 | |
21 | using namespace llvm; |
22 | |
23 | namespace { |
24 | |
25 | using ErrorCallbackType = |
26 | function_ref<void(StringRef::iterator Loc, const Twine &)>; |
27 | |
28 | /// This class provides a way to iterate and get characters from the source |
29 | /// string. |
30 | class Cursor { |
31 | const char *Ptr = nullptr; |
32 | const char *End = nullptr; |
33 | |
34 | public: |
35 | Cursor(std::nullopt_t) {} |
36 | |
37 | explicit Cursor(StringRef Str) { |
38 | Ptr = Str.data(); |
39 | End = Ptr + Str.size(); |
40 | } |
41 | |
42 | bool isEOF() const { return Ptr == End; } |
43 | |
44 | char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } |
45 | |
46 | void advance(unsigned I = 1) { Ptr += I; } |
47 | |
48 | StringRef remaining() const { return StringRef(Ptr, End - Ptr); } |
49 | |
50 | StringRef upto(Cursor C) const { |
51 | assert(C.Ptr >= Ptr && C.Ptr <= End); |
52 | return StringRef(Ptr, C.Ptr - Ptr); |
53 | } |
54 | |
55 | StringRef::iterator location() const { return Ptr; } |
56 | |
57 | operator bool() const { return Ptr != nullptr; } |
58 | }; |
59 | |
60 | } // end anonymous namespace |
61 | |
62 | MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { |
63 | this->Kind = Kind; |
64 | this->Range = Range; |
65 | return *this; |
66 | } |
67 | |
68 | MIToken &MIToken::setStringValue(StringRef StrVal) { |
69 | StringValue = StrVal; |
70 | return *this; |
71 | } |
72 | |
73 | MIToken &MIToken::setOwnedStringValue(std::string StrVal) { |
74 | StringValueStorage = std::move(StrVal); |
75 | StringValue = StringValueStorage; |
76 | return *this; |
77 | } |
78 | |
79 | MIToken &MIToken::setIntegerValue(APSInt IntVal) { |
80 | this->IntVal = std::move(IntVal); |
81 | return *this; |
82 | } |
83 | |
84 | /// Skip the leading whitespace characters and return the updated cursor. |
85 | static Cursor skipWhitespace(Cursor C) { |
86 | while (isblank(C.peek())) |
87 | C.advance(); |
88 | return C; |
89 | } |
90 | |
91 | static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } |
92 | |
93 | /// Skip a line comment and return the updated cursor. |
94 | static Cursor (Cursor C) { |
95 | if (C.peek() != ';') |
96 | return C; |
97 | while (!isNewlineChar(C: C.peek()) && !C.isEOF()) |
98 | C.advance(); |
99 | return C; |
100 | } |
101 | |
102 | /// Machine operands can have comments, enclosed between /* and */. |
103 | /// This eats up all tokens, including /* and */. |
104 | static Cursor skipMachineOperandComment(Cursor C) { |
105 | if (C.peek() != '/' || C.peek(I: 1) != '*') |
106 | return C; |
107 | |
108 | while (C.peek() != '*' || C.peek(I: 1) != '/') |
109 | C.advance(); |
110 | |
111 | C.advance(); |
112 | C.advance(); |
113 | return C; |
114 | } |
115 | |
116 | /// Return true if the given character satisfies the following regular |
117 | /// expression: [-a-zA-Z$._0-9] |
118 | static bool isIdentifierChar(char C) { |
119 | return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || |
120 | C == '$'; |
121 | } |
122 | |
123 | /// Unescapes the given string value. |
124 | /// |
125 | /// Expects the string value to be quoted. |
126 | static std::string unescapeQuotedString(StringRef Value) { |
127 | assert(Value.front() == '"' && Value.back() == '"'); |
128 | Cursor C = Cursor(Value.substr(Start: 1, N: Value.size() - 2)); |
129 | |
130 | std::string Str; |
131 | Str.reserve(res_arg: C.remaining().size()); |
132 | while (!C.isEOF()) { |
133 | char Char = C.peek(); |
134 | if (Char == '\\') { |
135 | if (C.peek(I: 1) == '\\') { |
136 | // Two '\' become one |
137 | Str += '\\'; |
138 | C.advance(I: 2); |
139 | continue; |
140 | } |
141 | if (isxdigit(C.peek(I: 1)) && isxdigit(C.peek(I: 2))) { |
142 | Str += hexDigitValue(C: C.peek(I: 1)) * 16 + hexDigitValue(C: C.peek(I: 2)); |
143 | C.advance(I: 3); |
144 | continue; |
145 | } |
146 | } |
147 | Str += Char; |
148 | C.advance(); |
149 | } |
150 | return Str; |
151 | } |
152 | |
153 | /// Lex a string constant using the following regular expression: \"[^\"]*\" |
154 | static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { |
155 | assert(C.peek() == '"'); |
156 | for (C.advance(); C.peek() != '"'; C.advance()) { |
157 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
158 | ErrorCallback( |
159 | C.location(), |
160 | "end of machine instruction reached before the closing '\"'" ); |
161 | return std::nullopt; |
162 | } |
163 | } |
164 | C.advance(); |
165 | return C; |
166 | } |
167 | |
168 | static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, |
169 | unsigned PrefixLength, ErrorCallbackType ErrorCallback) { |
170 | auto Range = C; |
171 | C.advance(I: PrefixLength); |
172 | if (C.peek() == '"') { |
173 | if (Cursor R = lexStringConstant(C, ErrorCallback)) { |
174 | StringRef String = Range.upto(C: R); |
175 | Token.reset(Kind: Type, Range: String) |
176 | .setOwnedStringValue( |
177 | unescapeQuotedString(Value: String.drop_front(N: PrefixLength))); |
178 | return R; |
179 | } |
180 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
181 | return Range; |
182 | } |
183 | while (isIdentifierChar(C: C.peek())) |
184 | C.advance(); |
185 | Token.reset(Kind: Type, Range: Range.upto(C)) |
186 | .setStringValue(Range.upto(C).drop_front(N: PrefixLength)); |
187 | return C; |
188 | } |
189 | |
190 | static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { |
191 | return StringSwitch<MIToken::TokenKind>(Identifier) |
192 | .Case(S: "_" , Value: MIToken::underscore) |
193 | .Case(S: "implicit" , Value: MIToken::kw_implicit) |
194 | .Case(S: "implicit-def" , Value: MIToken::kw_implicit_define) |
195 | .Case(S: "def" , Value: MIToken::kw_def) |
196 | .Case(S: "dead" , Value: MIToken::kw_dead) |
197 | .Case(S: "killed" , Value: MIToken::kw_killed) |
198 | .Case(S: "undef" , Value: MIToken::kw_undef) |
199 | .Case(S: "internal" , Value: MIToken::kw_internal) |
200 | .Case(S: "early-clobber" , Value: MIToken::kw_early_clobber) |
201 | .Case(S: "debug-use" , Value: MIToken::kw_debug_use) |
202 | .Case(S: "renamable" , Value: MIToken::kw_renamable) |
203 | .Case(S: "tied-def" , Value: MIToken::kw_tied_def) |
204 | .Case(S: "frame-setup" , Value: MIToken::kw_frame_setup) |
205 | .Case(S: "frame-destroy" , Value: MIToken::kw_frame_destroy) |
206 | .Case(S: "nnan" , Value: MIToken::kw_nnan) |
207 | .Case(S: "ninf" , Value: MIToken::kw_ninf) |
208 | .Case(S: "nsz" , Value: MIToken::kw_nsz) |
209 | .Case(S: "arcp" , Value: MIToken::kw_arcp) |
210 | .Case(S: "contract" , Value: MIToken::kw_contract) |
211 | .Case(S: "afn" , Value: MIToken::kw_afn) |
212 | .Case(S: "reassoc" , Value: MIToken::kw_reassoc) |
213 | .Case(S: "nuw" , Value: MIToken::kw_nuw) |
214 | .Case(S: "nsw" , Value: MIToken::kw_nsw) |
215 | .Case(S: "nusw" , Value: MIToken::kw_nusw) |
216 | .Case(S: "exact" , Value: MIToken::kw_exact) |
217 | .Case(S: "nneg" , Value: MIToken::kw_nneg) |
218 | .Case(S: "disjoint" , Value: MIToken::kw_disjoint) |
219 | .Case(S: "samesign" , Value: MIToken::kw_samesign) |
220 | .Case(S: "nofpexcept" , Value: MIToken::kw_nofpexcept) |
221 | .Case(S: "unpredictable" , Value: MIToken::kw_unpredictable) |
222 | .Case(S: "debug-location" , Value: MIToken::kw_debug_location) |
223 | .Case(S: "debug-instr-number" , Value: MIToken::kw_debug_instr_number) |
224 | .Case(S: "dbg-instr-ref" , Value: MIToken::kw_dbg_instr_ref) |
225 | .Case(S: "same_value" , Value: MIToken::kw_cfi_same_value) |
226 | .Case(S: "offset" , Value: MIToken::kw_cfi_offset) |
227 | .Case(S: "rel_offset" , Value: MIToken::kw_cfi_rel_offset) |
228 | .Case(S: "def_cfa_register" , Value: MIToken::kw_cfi_def_cfa_register) |
229 | .Case(S: "def_cfa_offset" , Value: MIToken::kw_cfi_def_cfa_offset) |
230 | .Case(S: "adjust_cfa_offset" , Value: MIToken::kw_cfi_adjust_cfa_offset) |
231 | .Case(S: "escape" , Value: MIToken::kw_cfi_escape) |
232 | .Case(S: "def_cfa" , Value: MIToken::kw_cfi_def_cfa) |
233 | .Case(S: "llvm_def_aspace_cfa" , Value: MIToken::kw_cfi_llvm_def_aspace_cfa) |
234 | .Case(S: "remember_state" , Value: MIToken::kw_cfi_remember_state) |
235 | .Case(S: "restore" , Value: MIToken::kw_cfi_restore) |
236 | .Case(S: "restore_state" , Value: MIToken::kw_cfi_restore_state) |
237 | .Case(S: "undefined" , Value: MIToken::kw_cfi_undefined) |
238 | .Case(S: "register" , Value: MIToken::kw_cfi_register) |
239 | .Case(S: "window_save" , Value: MIToken::kw_cfi_window_save) |
240 | .Case(S: "negate_ra_sign_state" , |
241 | Value: MIToken::kw_cfi_aarch64_negate_ra_sign_state) |
242 | .Case(S: "negate_ra_sign_state_with_pc" , |
243 | Value: MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc) |
244 | .Case(S: "blockaddress" , Value: MIToken::kw_blockaddress) |
245 | .Case(S: "intrinsic" , Value: MIToken::kw_intrinsic) |
246 | .Case(S: "target-index" , Value: MIToken::kw_target_index) |
247 | .Case(S: "half" , Value: MIToken::kw_half) |
248 | .Case(S: "bfloat" , Value: MIToken::kw_bfloat) |
249 | .Case(S: "float" , Value: MIToken::kw_float) |
250 | .Case(S: "double" , Value: MIToken::kw_double) |
251 | .Case(S: "x86_fp80" , Value: MIToken::kw_x86_fp80) |
252 | .Case(S: "fp128" , Value: MIToken::kw_fp128) |
253 | .Case(S: "ppc_fp128" , Value: MIToken::kw_ppc_fp128) |
254 | .Case(S: "target-flags" , Value: MIToken::kw_target_flags) |
255 | .Case(S: "volatile" , Value: MIToken::kw_volatile) |
256 | .Case(S: "non-temporal" , Value: MIToken::kw_non_temporal) |
257 | .Case(S: "dereferenceable" , Value: MIToken::kw_dereferenceable) |
258 | .Case(S: "invariant" , Value: MIToken::kw_invariant) |
259 | .Case(S: "align" , Value: MIToken::kw_align) |
260 | .Case(S: "basealign" , Value: MIToken::kw_basealign) |
261 | .Case(S: "addrspace" , Value: MIToken::kw_addrspace) |
262 | .Case(S: "stack" , Value: MIToken::kw_stack) |
263 | .Case(S: "got" , Value: MIToken::kw_got) |
264 | .Case(S: "jump-table" , Value: MIToken::kw_jump_table) |
265 | .Case(S: "constant-pool" , Value: MIToken::kw_constant_pool) |
266 | .Case(S: "call-entry" , Value: MIToken::kw_call_entry) |
267 | .Case(S: "custom" , Value: MIToken::kw_custom) |
268 | .Case(S: "liveout" , Value: MIToken::kw_liveout) |
269 | .Case(S: "landing-pad" , Value: MIToken::kw_landing_pad) |
270 | .Case(S: "inlineasm-br-indirect-target" , |
271 | Value: MIToken::kw_inlineasm_br_indirect_target) |
272 | .Case(S: "ehfunclet-entry" , Value: MIToken::kw_ehfunclet_entry) |
273 | .Case(S: "liveins" , Value: MIToken::kw_liveins) |
274 | .Case(S: "successors" , Value: MIToken::kw_successors) |
275 | .Case(S: "floatpred" , Value: MIToken::kw_floatpred) |
276 | .Case(S: "intpred" , Value: MIToken::kw_intpred) |
277 | .Case(S: "shufflemask" , Value: MIToken::kw_shufflemask) |
278 | .Case(S: "pre-instr-symbol" , Value: MIToken::kw_pre_instr_symbol) |
279 | .Case(S: "post-instr-symbol" , Value: MIToken::kw_post_instr_symbol) |
280 | .Case(S: "heap-alloc-marker" , Value: MIToken::kw_heap_alloc_marker) |
281 | .Case(S: "pcsections" , Value: MIToken::kw_pcsections) |
282 | .Case(S: "cfi-type" , Value: MIToken::kw_cfi_type) |
283 | .Case(S: "bbsections" , Value: MIToken::kw_bbsections) |
284 | .Case(S: "bb_id" , Value: MIToken::kw_bb_id) |
285 | .Case(S: "unknown-size" , Value: MIToken::kw_unknown_size) |
286 | .Case(S: "unknown-address" , Value: MIToken::kw_unknown_address) |
287 | .Case(S: "distinct" , Value: MIToken::kw_distinct) |
288 | .Case(S: "ir-block-address-taken" , Value: MIToken::kw_ir_block_address_taken) |
289 | .Case(S: "machine-block-address-taken" , |
290 | Value: MIToken::kw_machine_block_address_taken) |
291 | .Case(S: "call-frame-size" , Value: MIToken::kw_call_frame_size) |
292 | .Case(S: "noconvergent" , Value: MIToken::kw_noconvergent) |
293 | .Default(Value: MIToken::Identifier); |
294 | } |
295 | |
296 | static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { |
297 | if (!isalpha(C.peek()) && C.peek() != '_') |
298 | return std::nullopt; |
299 | auto Range = C; |
300 | while (isIdentifierChar(C: C.peek())) |
301 | C.advance(); |
302 | auto Identifier = Range.upto(C); |
303 | Token.reset(Kind: getIdentifierKind(Identifier), Range: Identifier) |
304 | .setStringValue(Identifier); |
305 | return C; |
306 | } |
307 | |
308 | static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, |
309 | ErrorCallbackType ErrorCallback) { |
310 | bool IsReference = C.remaining().starts_with(Prefix: "%bb." ); |
311 | if (!IsReference && !C.remaining().starts_with(Prefix: "bb." )) |
312 | return std::nullopt; |
313 | auto Range = C; |
314 | unsigned PrefixLength = IsReference ? 4 : 3; |
315 | C.advance(I: PrefixLength); // Skip '%bb.' or 'bb.' |
316 | if (!isdigit(C.peek())) { |
317 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
318 | ErrorCallback(C.location(), "expected a number after '%bb.'" ); |
319 | return C; |
320 | } |
321 | auto NumberRange = C; |
322 | while (isdigit(C.peek())) |
323 | C.advance(); |
324 | StringRef Number = NumberRange.upto(C); |
325 | unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' |
326 | // TODO: The format bb.<id>.<irname> is supported only when it's not a |
327 | // reference. Once we deprecate the format where the irname shows up, we |
328 | // should only lex forward if it is a reference. |
329 | if (C.peek() == '.') { |
330 | C.advance(); // Skip '.' |
331 | ++StringOffset; |
332 | while (isIdentifierChar(C: C.peek())) |
333 | C.advance(); |
334 | } |
335 | Token.reset(Kind: IsReference ? MIToken::MachineBasicBlock |
336 | : MIToken::MachineBasicBlockLabel, |
337 | Range: Range.upto(C)) |
338 | .setIntegerValue(APSInt(Number)) |
339 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
340 | return C; |
341 | } |
342 | |
343 | static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, |
344 | MIToken::TokenKind Kind) { |
345 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
346 | return std::nullopt; |
347 | auto Range = C; |
348 | C.advance(I: Rule.size()); |
349 | auto NumberRange = C; |
350 | while (isdigit(C.peek())) |
351 | C.advance(); |
352 | Token.reset(Kind, Range: Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); |
353 | return C; |
354 | } |
355 | |
356 | static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, |
357 | MIToken::TokenKind Kind) { |
358 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
359 | return std::nullopt; |
360 | auto Range = C; |
361 | C.advance(I: Rule.size()); |
362 | auto NumberRange = C; |
363 | while (isdigit(C.peek())) |
364 | C.advance(); |
365 | StringRef Number = NumberRange.upto(C); |
366 | unsigned StringOffset = Rule.size() + Number.size(); |
367 | if (C.peek() == '.') { |
368 | C.advance(); |
369 | ++StringOffset; |
370 | while (isIdentifierChar(C: C.peek())) |
371 | C.advance(); |
372 | } |
373 | Token.reset(Kind, Range: Range.upto(C)) |
374 | .setIntegerValue(APSInt(Number)) |
375 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
376 | return C; |
377 | } |
378 | |
379 | static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { |
380 | return maybeLexIndex(C, Token, Rule: "%jump-table." , Kind: MIToken::JumpTableIndex); |
381 | } |
382 | |
383 | static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { |
384 | return maybeLexIndexAndName(C, Token, Rule: "%stack." , Kind: MIToken::StackObject); |
385 | } |
386 | |
387 | static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { |
388 | return maybeLexIndex(C, Token, Rule: "%fixed-stack." , Kind: MIToken::FixedStackObject); |
389 | } |
390 | |
391 | static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { |
392 | return maybeLexIndex(C, Token, Rule: "%const." , Kind: MIToken::ConstantPoolItem); |
393 | } |
394 | |
395 | static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, |
396 | ErrorCallbackType ErrorCallback) { |
397 | const StringRef Rule = "%subreg." ; |
398 | if (!C.remaining().starts_with(Prefix: Rule)) |
399 | return std::nullopt; |
400 | return lexName(C, Token, Type: MIToken::SubRegisterIndex, PrefixLength: Rule.size(), |
401 | ErrorCallback); |
402 | } |
403 | |
404 | static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, |
405 | ErrorCallbackType ErrorCallback) { |
406 | const StringRef Rule = "%ir-block." ; |
407 | if (!C.remaining().starts_with(Prefix: Rule)) |
408 | return std::nullopt; |
409 | if (isdigit(C.peek(I: Rule.size()))) |
410 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRBlock); |
411 | return lexName(C, Token, Type: MIToken::NamedIRBlock, PrefixLength: Rule.size(), ErrorCallback); |
412 | } |
413 | |
414 | static Cursor maybeLexIRValue(Cursor C, MIToken &Token, |
415 | ErrorCallbackType ErrorCallback) { |
416 | const StringRef Rule = "%ir." ; |
417 | if (!C.remaining().starts_with(Prefix: Rule)) |
418 | return std::nullopt; |
419 | if (isdigit(C.peek(I: Rule.size()))) |
420 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRValue); |
421 | return lexName(C, Token, Type: MIToken::NamedIRValue, PrefixLength: Rule.size(), ErrorCallback); |
422 | } |
423 | |
424 | static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, |
425 | ErrorCallbackType ErrorCallback) { |
426 | if (C.peek() != '"') |
427 | return std::nullopt; |
428 | return lexName(C, Token, Type: MIToken::StringConstant, /*PrefixLength=*/0, |
429 | ErrorCallback); |
430 | } |
431 | |
432 | static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { |
433 | auto Range = C; |
434 | C.advance(); // Skip '%' |
435 | auto NumberRange = C; |
436 | while (isdigit(C.peek())) |
437 | C.advance(); |
438 | Token.reset(Kind: MIToken::VirtualRegister, Range: Range.upto(C)) |
439 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
440 | return C; |
441 | } |
442 | |
443 | /// Returns true for a character allowed in a register name. |
444 | static bool isRegisterChar(char C) { |
445 | return isIdentifierChar(C) && C != '.'; |
446 | } |
447 | |
448 | static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { |
449 | Cursor Range = C; |
450 | C.advance(); // Skip '%' |
451 | while (isRegisterChar(C: C.peek())) |
452 | C.advance(); |
453 | Token.reset(Kind: MIToken::NamedVirtualRegister, Range: Range.upto(C)) |
454 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '%' |
455 | return C; |
456 | } |
457 | |
458 | static Cursor maybeLexRegister(Cursor C, MIToken &Token, |
459 | ErrorCallbackType ErrorCallback) { |
460 | if (C.peek() != '%' && C.peek() != '$') |
461 | return std::nullopt; |
462 | |
463 | if (C.peek() == '%') { |
464 | if (isdigit(C.peek(I: 1))) |
465 | return lexVirtualRegister(C, Token); |
466 | |
467 | if (isRegisterChar(C: C.peek(I: 1))) |
468 | return lexNamedVirtualRegister(C, Token); |
469 | |
470 | return std::nullopt; |
471 | } |
472 | |
473 | assert(C.peek() == '$'); |
474 | auto Range = C; |
475 | C.advance(); // Skip '$' |
476 | while (isRegisterChar(C: C.peek())) |
477 | C.advance(); |
478 | Token.reset(Kind: MIToken::NamedRegister, Range: Range.upto(C)) |
479 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '$' |
480 | return C; |
481 | } |
482 | |
483 | static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, |
484 | ErrorCallbackType ErrorCallback) { |
485 | if (C.peek() != '@') |
486 | return std::nullopt; |
487 | if (!isdigit(C.peek(I: 1))) |
488 | return lexName(C, Token, Type: MIToken::NamedGlobalValue, /*PrefixLength=*/1, |
489 | ErrorCallback); |
490 | auto Range = C; |
491 | C.advance(I: 1); // Skip the '@' |
492 | auto NumberRange = C; |
493 | while (isdigit(C.peek())) |
494 | C.advance(); |
495 | Token.reset(Kind: MIToken::GlobalValue, Range: Range.upto(C)) |
496 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
497 | return C; |
498 | } |
499 | |
500 | static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, |
501 | ErrorCallbackType ErrorCallback) { |
502 | if (C.peek() != '&') |
503 | return std::nullopt; |
504 | return lexName(C, Token, Type: MIToken::ExternalSymbol, /*PrefixLength=*/1, |
505 | ErrorCallback); |
506 | } |
507 | |
508 | static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, |
509 | ErrorCallbackType ErrorCallback) { |
510 | const StringRef Rule = "<mcsymbol " ; |
511 | if (!C.remaining().starts_with(Prefix: Rule)) |
512 | return std::nullopt; |
513 | auto Start = C; |
514 | C.advance(I: Rule.size()); |
515 | |
516 | // Try a simple unquoted name. |
517 | if (C.peek() != '"') { |
518 | while (isIdentifierChar(C: C.peek())) |
519 | C.advance(); |
520 | StringRef String = Start.upto(C).drop_front(N: Rule.size()); |
521 | if (C.peek() != '>') { |
522 | ErrorCallback(C.location(), |
523 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
524 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
525 | return Start; |
526 | } |
527 | C.advance(); |
528 | |
529 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C)).setStringValue(String); |
530 | return C; |
531 | } |
532 | |
533 | // Otherwise lex out a quoted name. |
534 | Cursor R = lexStringConstant(C, ErrorCallback); |
535 | if (!R) { |
536 | ErrorCallback(C.location(), |
537 | "unable to parse quoted string from opening quote" ); |
538 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
539 | return Start; |
540 | } |
541 | StringRef String = Start.upto(C: R).drop_front(N: Rule.size()); |
542 | if (R.peek() != '>') { |
543 | ErrorCallback(R.location(), |
544 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
545 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
546 | return Start; |
547 | } |
548 | R.advance(); |
549 | |
550 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C: R)) |
551 | .setOwnedStringValue(unescapeQuotedString(Value: String)); |
552 | return R; |
553 | } |
554 | |
555 | static bool isValidHexFloatingPointPrefix(char C) { |
556 | return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; |
557 | } |
558 | |
559 | static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { |
560 | C.advance(); |
561 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
562 | while (isdigit(C.peek())) |
563 | C.advance(); |
564 | if ((C.peek() == 'e' || C.peek() == 'E') && |
565 | (isdigit(C.peek(I: 1)) || |
566 | ((C.peek(I: 1) == '-' || C.peek(I: 1) == '+') && isdigit(C.peek(I: 2))))) { |
567 | C.advance(I: 2); |
568 | while (isdigit(C.peek())) |
569 | C.advance(); |
570 | } |
571 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
572 | return C; |
573 | } |
574 | |
575 | static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { |
576 | if (C.peek() != '0' || (C.peek(I: 1) != 'x' && C.peek(I: 1) != 'X')) |
577 | return std::nullopt; |
578 | Cursor Range = C; |
579 | C.advance(I: 2); |
580 | unsigned PrefLen = 2; |
581 | if (isValidHexFloatingPointPrefix(C: C.peek())) { |
582 | C.advance(); |
583 | PrefLen++; |
584 | } |
585 | while (isxdigit(C.peek())) |
586 | C.advance(); |
587 | StringRef StrVal = Range.upto(C); |
588 | if (StrVal.size() <= PrefLen) |
589 | return std::nullopt; |
590 | if (PrefLen == 2) |
591 | Token.reset(Kind: MIToken::HexLiteral, Range: Range.upto(C)); |
592 | else // It must be 3, which means that there was a floating-point prefix. |
593 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
594 | return C; |
595 | } |
596 | |
597 | static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { |
598 | if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(I: 1)))) |
599 | return std::nullopt; |
600 | auto Range = C; |
601 | C.advance(); |
602 | while (isdigit(C.peek())) |
603 | C.advance(); |
604 | if (C.peek() == '.') |
605 | return lexFloatingPointLiteral(Range, C, Token); |
606 | StringRef StrVal = Range.upto(C); |
607 | Token.reset(Kind: MIToken::IntegerLiteral, Range: StrVal).setIntegerValue(APSInt(StrVal)); |
608 | return C; |
609 | } |
610 | |
611 | static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { |
612 | return StringSwitch<MIToken::TokenKind>(Identifier) |
613 | .Case(S: "!tbaa" , Value: MIToken::md_tbaa) |
614 | .Case(S: "!alias.scope" , Value: MIToken::md_alias_scope) |
615 | .Case(S: "!noalias" , Value: MIToken::md_noalias) |
616 | .Case(S: "!range" , Value: MIToken::md_range) |
617 | .Case(S: "!DIExpression" , Value: MIToken::md_diexpr) |
618 | .Case(S: "!DILocation" , Value: MIToken::md_dilocation) |
619 | .Default(Value: MIToken::Error); |
620 | } |
621 | |
622 | static Cursor maybeLexExclaim(Cursor C, MIToken &Token, |
623 | ErrorCallbackType ErrorCallback) { |
624 | if (C.peek() != '!') |
625 | return std::nullopt; |
626 | auto Range = C; |
627 | C.advance(I: 1); |
628 | if (isdigit(C.peek()) || !isIdentifierChar(C: C.peek())) { |
629 | Token.reset(Kind: MIToken::exclaim, Range: Range.upto(C)); |
630 | return C; |
631 | } |
632 | while (isIdentifierChar(C: C.peek())) |
633 | C.advance(); |
634 | StringRef StrVal = Range.upto(C); |
635 | Token.reset(Kind: getMetadataKeywordKind(Identifier: StrVal), Range: StrVal); |
636 | if (Token.isError()) |
637 | ErrorCallback(Token.location(), |
638 | "use of unknown metadata keyword '" + StrVal + "'" ); |
639 | return C; |
640 | } |
641 | |
642 | static MIToken::TokenKind symbolToken(char C) { |
643 | switch (C) { |
644 | case ',': |
645 | return MIToken::comma; |
646 | case '.': |
647 | return MIToken::dot; |
648 | case '=': |
649 | return MIToken::equal; |
650 | case ':': |
651 | return MIToken::colon; |
652 | case '(': |
653 | return MIToken::lparen; |
654 | case ')': |
655 | return MIToken::rparen; |
656 | case '{': |
657 | return MIToken::lbrace; |
658 | case '}': |
659 | return MIToken::rbrace; |
660 | case '+': |
661 | return MIToken::plus; |
662 | case '-': |
663 | return MIToken::minus; |
664 | case '<': |
665 | return MIToken::less; |
666 | case '>': |
667 | return MIToken::greater; |
668 | default: |
669 | return MIToken::Error; |
670 | } |
671 | } |
672 | |
673 | static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { |
674 | MIToken::TokenKind Kind; |
675 | unsigned Length = 1; |
676 | if (C.peek() == ':' && C.peek(I: 1) == ':') { |
677 | Kind = MIToken::coloncolon; |
678 | Length = 2; |
679 | } else |
680 | Kind = symbolToken(C: C.peek()); |
681 | if (Kind == MIToken::Error) |
682 | return std::nullopt; |
683 | auto Range = C; |
684 | C.advance(I: Length); |
685 | Token.reset(Kind, Range: Range.upto(C)); |
686 | return C; |
687 | } |
688 | |
689 | static Cursor maybeLexNewline(Cursor C, MIToken &Token) { |
690 | if (!isNewlineChar(C: C.peek())) |
691 | return std::nullopt; |
692 | auto Range = C; |
693 | C.advance(); |
694 | Token.reset(Kind: MIToken::Newline, Range: Range.upto(C)); |
695 | return C; |
696 | } |
697 | |
698 | static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, |
699 | ErrorCallbackType ErrorCallback) { |
700 | if (C.peek() != '`') |
701 | return std::nullopt; |
702 | auto Range = C; |
703 | C.advance(); |
704 | auto StrRange = C; |
705 | while (C.peek() != '`') { |
706 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
707 | ErrorCallback( |
708 | C.location(), |
709 | "end of machine instruction reached before the closing '`'" ); |
710 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
711 | return C; |
712 | } |
713 | C.advance(); |
714 | } |
715 | StringRef Value = StrRange.upto(C); |
716 | C.advance(); |
717 | Token.reset(Kind: MIToken::QuotedIRValue, Range: Range.upto(C)).setStringValue(Value); |
718 | return C; |
719 | } |
720 | |
721 | StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, |
722 | ErrorCallbackType ErrorCallback) { |
723 | auto C = skipComment(C: skipWhitespace(C: Cursor(Source))); |
724 | if (C.isEOF()) { |
725 | Token.reset(Kind: MIToken::Eof, Range: C.remaining()); |
726 | return C.remaining(); |
727 | } |
728 | |
729 | C = skipWhitespace(C: skipMachineOperandComment(C)); |
730 | |
731 | if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) |
732 | return R.remaining(); |
733 | if (Cursor R = maybeLexIdentifier(C, Token)) |
734 | return R.remaining(); |
735 | if (Cursor R = maybeLexJumpTableIndex(C, Token)) |
736 | return R.remaining(); |
737 | if (Cursor R = maybeLexStackObject(C, Token)) |
738 | return R.remaining(); |
739 | if (Cursor R = maybeLexFixedStackObject(C, Token)) |
740 | return R.remaining(); |
741 | if (Cursor R = maybeLexConstantPoolItem(C, Token)) |
742 | return R.remaining(); |
743 | if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) |
744 | return R.remaining(); |
745 | if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) |
746 | return R.remaining(); |
747 | if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) |
748 | return R.remaining(); |
749 | if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) |
750 | return R.remaining(); |
751 | if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) |
752 | return R.remaining(); |
753 | if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) |
754 | return R.remaining(); |
755 | if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) |
756 | return R.remaining(); |
757 | if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) |
758 | return R.remaining(); |
759 | if (Cursor R = maybeLexNumericalLiteral(C, Token)) |
760 | return R.remaining(); |
761 | if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) |
762 | return R.remaining(); |
763 | if (Cursor R = maybeLexSymbol(C, Token)) |
764 | return R.remaining(); |
765 | if (Cursor R = maybeLexNewline(C, Token)) |
766 | return R.remaining(); |
767 | if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) |
768 | return R.remaining(); |
769 | if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) |
770 | return R.remaining(); |
771 | |
772 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
773 | ErrorCallback(C.location(), |
774 | Twine("unexpected character '" ) + Twine(C.peek()) + "'" ); |
775 | return C.remaining(); |
776 | } |
777 | |