1 | //===- MILexer.cpp - Machine instructions lexer implementation ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the lexing of machine instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MILexer.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/ADT/StringSwitch.h" |
16 | #include "llvm/ADT/Twine.h" |
17 | #include <cassert> |
18 | #include <cctype> |
19 | #include <string> |
20 | |
21 | using namespace llvm; |
22 | |
23 | namespace { |
24 | |
25 | using ErrorCallbackType = |
26 | function_ref<void(StringRef::iterator Loc, const Twine &)>; |
27 | |
28 | /// This class provides a way to iterate and get characters from the source |
29 | /// string. |
30 | class Cursor { |
31 | const char *Ptr = nullptr; |
32 | const char *End = nullptr; |
33 | |
34 | public: |
35 | Cursor(std::nullopt_t) {} |
36 | |
37 | explicit Cursor(StringRef Str) { |
38 | Ptr = Str.data(); |
39 | End = Ptr + Str.size(); |
40 | } |
41 | |
42 | bool isEOF() const { return Ptr == End; } |
43 | |
44 | char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } |
45 | |
46 | void advance(unsigned I = 1) { Ptr += I; } |
47 | |
48 | StringRef remaining() const { return StringRef(Ptr, End - Ptr); } |
49 | |
50 | StringRef upto(Cursor C) const { |
51 | assert(C.Ptr >= Ptr && C.Ptr <= End); |
52 | return StringRef(Ptr, C.Ptr - Ptr); |
53 | } |
54 | |
55 | StringRef::iterator location() const { return Ptr; } |
56 | |
57 | operator bool() const { return Ptr != nullptr; } |
58 | }; |
59 | |
60 | } // end anonymous namespace |
61 | |
62 | MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { |
63 | this->Kind = Kind; |
64 | this->Range = Range; |
65 | return *this; |
66 | } |
67 | |
68 | MIToken &MIToken::setStringValue(StringRef StrVal) { |
69 | StringValue = StrVal; |
70 | return *this; |
71 | } |
72 | |
73 | MIToken &MIToken::setOwnedStringValue(std::string StrVal) { |
74 | StringValueStorage = std::move(StrVal); |
75 | StringValue = StringValueStorage; |
76 | return *this; |
77 | } |
78 | |
79 | MIToken &MIToken::setIntegerValue(APSInt IntVal) { |
80 | this->IntVal = std::move(IntVal); |
81 | return *this; |
82 | } |
83 | |
84 | /// Skip the leading whitespace characters and return the updated cursor. |
85 | static Cursor skipWhitespace(Cursor C) { |
86 | while (isblank(C.peek())) |
87 | C.advance(); |
88 | return C; |
89 | } |
90 | |
91 | static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } |
92 | |
93 | /// Skip a line comment and return the updated cursor. |
94 | static Cursor (Cursor C) { |
95 | if (C.peek() != ';') |
96 | return C; |
97 | while (!isNewlineChar(C: C.peek()) && !C.isEOF()) |
98 | C.advance(); |
99 | return C; |
100 | } |
101 | |
102 | /// Machine operands can have comments, enclosed between /* and */. |
103 | /// This eats up all tokens, including /* and */. |
104 | static Cursor skipMachineOperandComment(Cursor C) { |
105 | if (C.peek() != '/' || C.peek(I: 1) != '*') |
106 | return C; |
107 | |
108 | while (C.peek() != '*' || C.peek(I: 1) != '/') |
109 | C.advance(); |
110 | |
111 | C.advance(); |
112 | C.advance(); |
113 | return C; |
114 | } |
115 | |
116 | /// Return true if the given character satisfies the following regular |
117 | /// expression: [-a-zA-Z$._0-9] |
118 | static bool isIdentifierChar(char C) { |
119 | return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || |
120 | C == '$'; |
121 | } |
122 | |
123 | /// Unescapes the given string value. |
124 | /// |
125 | /// Expects the string value to be quoted. |
126 | static std::string unescapeQuotedString(StringRef Value) { |
127 | assert(Value.front() == '"' && Value.back() == '"'); |
128 | Cursor C = Cursor(Value.substr(Start: 1, N: Value.size() - 2)); |
129 | |
130 | std::string Str; |
131 | Str.reserve(res_arg: C.remaining().size()); |
132 | while (!C.isEOF()) { |
133 | char Char = C.peek(); |
134 | if (Char == '\\') { |
135 | if (C.peek(I: 1) == '\\') { |
136 | // Two '\' become one |
137 | Str += '\\'; |
138 | C.advance(I: 2); |
139 | continue; |
140 | } |
141 | if (isxdigit(C.peek(I: 1)) && isxdigit(C.peek(I: 2))) { |
142 | Str += hexDigitValue(C: C.peek(I: 1)) * 16 + hexDigitValue(C: C.peek(I: 2)); |
143 | C.advance(I: 3); |
144 | continue; |
145 | } |
146 | } |
147 | Str += Char; |
148 | C.advance(); |
149 | } |
150 | return Str; |
151 | } |
152 | |
153 | /// Lex a string constant using the following regular expression: \"[^\"]*\" |
154 | static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { |
155 | assert(C.peek() == '"'); |
156 | for (C.advance(); C.peek() != '"'; C.advance()) { |
157 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
158 | ErrorCallback( |
159 | C.location(), |
160 | "end of machine instruction reached before the closing '\"'" ); |
161 | return std::nullopt; |
162 | } |
163 | } |
164 | C.advance(); |
165 | return C; |
166 | } |
167 | |
168 | static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, |
169 | unsigned PrefixLength, ErrorCallbackType ErrorCallback) { |
170 | auto Range = C; |
171 | C.advance(I: PrefixLength); |
172 | if (C.peek() == '"') { |
173 | if (Cursor R = lexStringConstant(C, ErrorCallback)) { |
174 | StringRef String = Range.upto(C: R); |
175 | Token.reset(Kind: Type, Range: String) |
176 | .setOwnedStringValue( |
177 | unescapeQuotedString(Value: String.drop_front(N: PrefixLength))); |
178 | return R; |
179 | } |
180 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
181 | return Range; |
182 | } |
183 | while (isIdentifierChar(C: C.peek())) |
184 | C.advance(); |
185 | Token.reset(Kind: Type, Range: Range.upto(C)) |
186 | .setStringValue(Range.upto(C).drop_front(N: PrefixLength)); |
187 | return C; |
188 | } |
189 | |
190 | static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { |
191 | return StringSwitch<MIToken::TokenKind>(Identifier) |
192 | .Case(S: "_" , Value: MIToken::underscore) |
193 | .Case(S: "implicit" , Value: MIToken::kw_implicit) |
194 | .Case(S: "implicit-def" , Value: MIToken::kw_implicit_define) |
195 | .Case(S: "def" , Value: MIToken::kw_def) |
196 | .Case(S: "dead" , Value: MIToken::kw_dead) |
197 | .Case(S: "killed" , Value: MIToken::kw_killed) |
198 | .Case(S: "undef" , Value: MIToken::kw_undef) |
199 | .Case(S: "internal" , Value: MIToken::kw_internal) |
200 | .Case(S: "early-clobber" , Value: MIToken::kw_early_clobber) |
201 | .Case(S: "debug-use" , Value: MIToken::kw_debug_use) |
202 | .Case(S: "renamable" , Value: MIToken::kw_renamable) |
203 | .Case(S: "tied-def" , Value: MIToken::kw_tied_def) |
204 | .Case(S: "frame-setup" , Value: MIToken::kw_frame_setup) |
205 | .Case(S: "frame-destroy" , Value: MIToken::kw_frame_destroy) |
206 | .Case(S: "nnan" , Value: MIToken::kw_nnan) |
207 | .Case(S: "ninf" , Value: MIToken::kw_ninf) |
208 | .Case(S: "nsz" , Value: MIToken::kw_nsz) |
209 | .Case(S: "arcp" , Value: MIToken::kw_arcp) |
210 | .Case(S: "contract" , Value: MIToken::kw_contract) |
211 | .Case(S: "afn" , Value: MIToken::kw_afn) |
212 | .Case(S: "reassoc" , Value: MIToken::kw_reassoc) |
213 | .Case(S: "nuw" , Value: MIToken::kw_nuw) |
214 | .Case(S: "nsw" , Value: MIToken::kw_nsw) |
215 | .Case(S: "nusw" , Value: MIToken::kw_nusw) |
216 | .Case(S: "exact" , Value: MIToken::kw_exact) |
217 | .Case(S: "nneg" , Value: MIToken::kw_nneg) |
218 | .Case(S: "disjoint" , Value: MIToken::kw_disjoint) |
219 | .Case(S: "nofpexcept" , Value: MIToken::kw_nofpexcept) |
220 | .Case(S: "unpredictable" , Value: MIToken::kw_unpredictable) |
221 | .Case(S: "debug-location" , Value: MIToken::kw_debug_location) |
222 | .Case(S: "debug-instr-number" , Value: MIToken::kw_debug_instr_number) |
223 | .Case(S: "dbg-instr-ref" , Value: MIToken::kw_dbg_instr_ref) |
224 | .Case(S: "same_value" , Value: MIToken::kw_cfi_same_value) |
225 | .Case(S: "offset" , Value: MIToken::kw_cfi_offset) |
226 | .Case(S: "rel_offset" , Value: MIToken::kw_cfi_rel_offset) |
227 | .Case(S: "def_cfa_register" , Value: MIToken::kw_cfi_def_cfa_register) |
228 | .Case(S: "def_cfa_offset" , Value: MIToken::kw_cfi_def_cfa_offset) |
229 | .Case(S: "adjust_cfa_offset" , Value: MIToken::kw_cfi_adjust_cfa_offset) |
230 | .Case(S: "escape" , Value: MIToken::kw_cfi_escape) |
231 | .Case(S: "def_cfa" , Value: MIToken::kw_cfi_def_cfa) |
232 | .Case(S: "llvm_def_aspace_cfa" , Value: MIToken::kw_cfi_llvm_def_aspace_cfa) |
233 | .Case(S: "remember_state" , Value: MIToken::kw_cfi_remember_state) |
234 | .Case(S: "restore" , Value: MIToken::kw_cfi_restore) |
235 | .Case(S: "restore_state" , Value: MIToken::kw_cfi_restore_state) |
236 | .Case(S: "undefined" , Value: MIToken::kw_cfi_undefined) |
237 | .Case(S: "register" , Value: MIToken::kw_cfi_register) |
238 | .Case(S: "window_save" , Value: MIToken::kw_cfi_window_save) |
239 | .Case(S: "negate_ra_sign_state" , |
240 | Value: MIToken::kw_cfi_aarch64_negate_ra_sign_state) |
241 | .Case(S: "blockaddress" , Value: MIToken::kw_blockaddress) |
242 | .Case(S: "intrinsic" , Value: MIToken::kw_intrinsic) |
243 | .Case(S: "target-index" , Value: MIToken::kw_target_index) |
244 | .Case(S: "half" , Value: MIToken::kw_half) |
245 | .Case(S: "bfloat" , Value: MIToken::kw_bfloat) |
246 | .Case(S: "float" , Value: MIToken::kw_float) |
247 | .Case(S: "double" , Value: MIToken::kw_double) |
248 | .Case(S: "x86_fp80" , Value: MIToken::kw_x86_fp80) |
249 | .Case(S: "fp128" , Value: MIToken::kw_fp128) |
250 | .Case(S: "ppc_fp128" , Value: MIToken::kw_ppc_fp128) |
251 | .Case(S: "target-flags" , Value: MIToken::kw_target_flags) |
252 | .Case(S: "volatile" , Value: MIToken::kw_volatile) |
253 | .Case(S: "non-temporal" , Value: MIToken::kw_non_temporal) |
254 | .Case(S: "dereferenceable" , Value: MIToken::kw_dereferenceable) |
255 | .Case(S: "invariant" , Value: MIToken::kw_invariant) |
256 | .Case(S: "align" , Value: MIToken::kw_align) |
257 | .Case(S: "basealign" , Value: MIToken::kw_basealign) |
258 | .Case(S: "addrspace" , Value: MIToken::kw_addrspace) |
259 | .Case(S: "stack" , Value: MIToken::kw_stack) |
260 | .Case(S: "got" , Value: MIToken::kw_got) |
261 | .Case(S: "jump-table" , Value: MIToken::kw_jump_table) |
262 | .Case(S: "constant-pool" , Value: MIToken::kw_constant_pool) |
263 | .Case(S: "call-entry" , Value: MIToken::kw_call_entry) |
264 | .Case(S: "custom" , Value: MIToken::kw_custom) |
265 | .Case(S: "liveout" , Value: MIToken::kw_liveout) |
266 | .Case(S: "landing-pad" , Value: MIToken::kw_landing_pad) |
267 | .Case(S: "inlineasm-br-indirect-target" , |
268 | Value: MIToken::kw_inlineasm_br_indirect_target) |
269 | .Case(S: "ehfunclet-entry" , Value: MIToken::kw_ehfunclet_entry) |
270 | .Case(S: "liveins" , Value: MIToken::kw_liveins) |
271 | .Case(S: "successors" , Value: MIToken::kw_successors) |
272 | .Case(S: "floatpred" , Value: MIToken::kw_floatpred) |
273 | .Case(S: "intpred" , Value: MIToken::kw_intpred) |
274 | .Case(S: "shufflemask" , Value: MIToken::kw_shufflemask) |
275 | .Case(S: "pre-instr-symbol" , Value: MIToken::kw_pre_instr_symbol) |
276 | .Case(S: "post-instr-symbol" , Value: MIToken::kw_post_instr_symbol) |
277 | .Case(S: "heap-alloc-marker" , Value: MIToken::kw_heap_alloc_marker) |
278 | .Case(S: "pcsections" , Value: MIToken::kw_pcsections) |
279 | .Case(S: "cfi-type" , Value: MIToken::kw_cfi_type) |
280 | .Case(S: "bbsections" , Value: MIToken::kw_bbsections) |
281 | .Case(S: "bb_id" , Value: MIToken::kw_bb_id) |
282 | .Case(S: "unknown-size" , Value: MIToken::kw_unknown_size) |
283 | .Case(S: "unknown-address" , Value: MIToken::kw_unknown_address) |
284 | .Case(S: "distinct" , Value: MIToken::kw_distinct) |
285 | .Case(S: "ir-block-address-taken" , Value: MIToken::kw_ir_block_address_taken) |
286 | .Case(S: "machine-block-address-taken" , |
287 | Value: MIToken::kw_machine_block_address_taken) |
288 | .Case(S: "call-frame-size" , Value: MIToken::kw_call_frame_size) |
289 | .Case(S: "noconvergent" , Value: MIToken::kw_noconvergent) |
290 | .Default(Value: MIToken::Identifier); |
291 | } |
292 | |
293 | static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { |
294 | if (!isalpha(C.peek()) && C.peek() != '_') |
295 | return std::nullopt; |
296 | auto Range = C; |
297 | while (isIdentifierChar(C: C.peek())) |
298 | C.advance(); |
299 | auto Identifier = Range.upto(C); |
300 | Token.reset(Kind: getIdentifierKind(Identifier), Range: Identifier) |
301 | .setStringValue(Identifier); |
302 | return C; |
303 | } |
304 | |
305 | static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, |
306 | ErrorCallbackType ErrorCallback) { |
307 | bool IsReference = C.remaining().starts_with(Prefix: "%bb." ); |
308 | if (!IsReference && !C.remaining().starts_with(Prefix: "bb." )) |
309 | return std::nullopt; |
310 | auto Range = C; |
311 | unsigned PrefixLength = IsReference ? 4 : 3; |
312 | C.advance(I: PrefixLength); // Skip '%bb.' or 'bb.' |
313 | if (!isdigit(C.peek())) { |
314 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
315 | ErrorCallback(C.location(), "expected a number after '%bb.'" ); |
316 | return C; |
317 | } |
318 | auto NumberRange = C; |
319 | while (isdigit(C.peek())) |
320 | C.advance(); |
321 | StringRef Number = NumberRange.upto(C); |
322 | unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' |
323 | // TODO: The format bb.<id>.<irname> is supported only when it's not a |
324 | // reference. Once we deprecate the format where the irname shows up, we |
325 | // should only lex forward if it is a reference. |
326 | if (C.peek() == '.') { |
327 | C.advance(); // Skip '.' |
328 | ++StringOffset; |
329 | while (isIdentifierChar(C: C.peek())) |
330 | C.advance(); |
331 | } |
332 | Token.reset(Kind: IsReference ? MIToken::MachineBasicBlock |
333 | : MIToken::MachineBasicBlockLabel, |
334 | Range: Range.upto(C)) |
335 | .setIntegerValue(APSInt(Number)) |
336 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
337 | return C; |
338 | } |
339 | |
340 | static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, |
341 | MIToken::TokenKind Kind) { |
342 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
343 | return std::nullopt; |
344 | auto Range = C; |
345 | C.advance(I: Rule.size()); |
346 | auto NumberRange = C; |
347 | while (isdigit(C.peek())) |
348 | C.advance(); |
349 | Token.reset(Kind, Range: Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); |
350 | return C; |
351 | } |
352 | |
353 | static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, |
354 | MIToken::TokenKind Kind) { |
355 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
356 | return std::nullopt; |
357 | auto Range = C; |
358 | C.advance(I: Rule.size()); |
359 | auto NumberRange = C; |
360 | while (isdigit(C.peek())) |
361 | C.advance(); |
362 | StringRef Number = NumberRange.upto(C); |
363 | unsigned StringOffset = Rule.size() + Number.size(); |
364 | if (C.peek() == '.') { |
365 | C.advance(); |
366 | ++StringOffset; |
367 | while (isIdentifierChar(C: C.peek())) |
368 | C.advance(); |
369 | } |
370 | Token.reset(Kind, Range: Range.upto(C)) |
371 | .setIntegerValue(APSInt(Number)) |
372 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
373 | return C; |
374 | } |
375 | |
376 | static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { |
377 | return maybeLexIndex(C, Token, Rule: "%jump-table." , Kind: MIToken::JumpTableIndex); |
378 | } |
379 | |
380 | static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { |
381 | return maybeLexIndexAndName(C, Token, Rule: "%stack." , Kind: MIToken::StackObject); |
382 | } |
383 | |
384 | static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { |
385 | return maybeLexIndex(C, Token, Rule: "%fixed-stack." , Kind: MIToken::FixedStackObject); |
386 | } |
387 | |
388 | static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { |
389 | return maybeLexIndex(C, Token, Rule: "%const." , Kind: MIToken::ConstantPoolItem); |
390 | } |
391 | |
392 | static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, |
393 | ErrorCallbackType ErrorCallback) { |
394 | const StringRef Rule = "%subreg." ; |
395 | if (!C.remaining().starts_with(Prefix: Rule)) |
396 | return std::nullopt; |
397 | return lexName(C, Token, Type: MIToken::SubRegisterIndex, PrefixLength: Rule.size(), |
398 | ErrorCallback); |
399 | } |
400 | |
401 | static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, |
402 | ErrorCallbackType ErrorCallback) { |
403 | const StringRef Rule = "%ir-block." ; |
404 | if (!C.remaining().starts_with(Prefix: Rule)) |
405 | return std::nullopt; |
406 | if (isdigit(C.peek(I: Rule.size()))) |
407 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRBlock); |
408 | return lexName(C, Token, Type: MIToken::NamedIRBlock, PrefixLength: Rule.size(), ErrorCallback); |
409 | } |
410 | |
411 | static Cursor maybeLexIRValue(Cursor C, MIToken &Token, |
412 | ErrorCallbackType ErrorCallback) { |
413 | const StringRef Rule = "%ir." ; |
414 | if (!C.remaining().starts_with(Prefix: Rule)) |
415 | return std::nullopt; |
416 | if (isdigit(C.peek(I: Rule.size()))) |
417 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRValue); |
418 | return lexName(C, Token, Type: MIToken::NamedIRValue, PrefixLength: Rule.size(), ErrorCallback); |
419 | } |
420 | |
421 | static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, |
422 | ErrorCallbackType ErrorCallback) { |
423 | if (C.peek() != '"') |
424 | return std::nullopt; |
425 | return lexName(C, Token, Type: MIToken::StringConstant, /*PrefixLength=*/0, |
426 | ErrorCallback); |
427 | } |
428 | |
429 | static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { |
430 | auto Range = C; |
431 | C.advance(); // Skip '%' |
432 | auto NumberRange = C; |
433 | while (isdigit(C.peek())) |
434 | C.advance(); |
435 | Token.reset(Kind: MIToken::VirtualRegister, Range: Range.upto(C)) |
436 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
437 | return C; |
438 | } |
439 | |
440 | /// Returns true for a character allowed in a register name. |
441 | static bool isRegisterChar(char C) { |
442 | return isIdentifierChar(C) && C != '.'; |
443 | } |
444 | |
445 | static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { |
446 | Cursor Range = C; |
447 | C.advance(); // Skip '%' |
448 | while (isRegisterChar(C: C.peek())) |
449 | C.advance(); |
450 | Token.reset(Kind: MIToken::NamedVirtualRegister, Range: Range.upto(C)) |
451 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '%' |
452 | return C; |
453 | } |
454 | |
455 | static Cursor maybeLexRegister(Cursor C, MIToken &Token, |
456 | ErrorCallbackType ErrorCallback) { |
457 | if (C.peek() != '%' && C.peek() != '$') |
458 | return std::nullopt; |
459 | |
460 | if (C.peek() == '%') { |
461 | if (isdigit(C.peek(I: 1))) |
462 | return lexVirtualRegister(C, Token); |
463 | |
464 | if (isRegisterChar(C: C.peek(I: 1))) |
465 | return lexNamedVirtualRegister(C, Token); |
466 | |
467 | return std::nullopt; |
468 | } |
469 | |
470 | assert(C.peek() == '$'); |
471 | auto Range = C; |
472 | C.advance(); // Skip '$' |
473 | while (isRegisterChar(C: C.peek())) |
474 | C.advance(); |
475 | Token.reset(Kind: MIToken::NamedRegister, Range: Range.upto(C)) |
476 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '$' |
477 | return C; |
478 | } |
479 | |
480 | static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, |
481 | ErrorCallbackType ErrorCallback) { |
482 | if (C.peek() != '@') |
483 | return std::nullopt; |
484 | if (!isdigit(C.peek(I: 1))) |
485 | return lexName(C, Token, Type: MIToken::NamedGlobalValue, /*PrefixLength=*/1, |
486 | ErrorCallback); |
487 | auto Range = C; |
488 | C.advance(I: 1); // Skip the '@' |
489 | auto NumberRange = C; |
490 | while (isdigit(C.peek())) |
491 | C.advance(); |
492 | Token.reset(Kind: MIToken::GlobalValue, Range: Range.upto(C)) |
493 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
494 | return C; |
495 | } |
496 | |
497 | static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, |
498 | ErrorCallbackType ErrorCallback) { |
499 | if (C.peek() != '&') |
500 | return std::nullopt; |
501 | return lexName(C, Token, Type: MIToken::ExternalSymbol, /*PrefixLength=*/1, |
502 | ErrorCallback); |
503 | } |
504 | |
505 | static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, |
506 | ErrorCallbackType ErrorCallback) { |
507 | const StringRef Rule = "<mcsymbol " ; |
508 | if (!C.remaining().starts_with(Prefix: Rule)) |
509 | return std::nullopt; |
510 | auto Start = C; |
511 | C.advance(I: Rule.size()); |
512 | |
513 | // Try a simple unquoted name. |
514 | if (C.peek() != '"') { |
515 | while (isIdentifierChar(C: C.peek())) |
516 | C.advance(); |
517 | StringRef String = Start.upto(C).drop_front(N: Rule.size()); |
518 | if (C.peek() != '>') { |
519 | ErrorCallback(C.location(), |
520 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
521 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
522 | return Start; |
523 | } |
524 | C.advance(); |
525 | |
526 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C)).setStringValue(String); |
527 | return C; |
528 | } |
529 | |
530 | // Otherwise lex out a quoted name. |
531 | Cursor R = lexStringConstant(C, ErrorCallback); |
532 | if (!R) { |
533 | ErrorCallback(C.location(), |
534 | "unable to parse quoted string from opening quote" ); |
535 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
536 | return Start; |
537 | } |
538 | StringRef String = Start.upto(C: R).drop_front(N: Rule.size()); |
539 | if (R.peek() != '>') { |
540 | ErrorCallback(R.location(), |
541 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
542 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
543 | return Start; |
544 | } |
545 | R.advance(); |
546 | |
547 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C: R)) |
548 | .setOwnedStringValue(unescapeQuotedString(Value: String)); |
549 | return R; |
550 | } |
551 | |
552 | static bool isValidHexFloatingPointPrefix(char C) { |
553 | return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; |
554 | } |
555 | |
556 | static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { |
557 | C.advance(); |
558 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
559 | while (isdigit(C.peek())) |
560 | C.advance(); |
561 | if ((C.peek() == 'e' || C.peek() == 'E') && |
562 | (isdigit(C.peek(I: 1)) || |
563 | ((C.peek(I: 1) == '-' || C.peek(I: 1) == '+') && isdigit(C.peek(I: 2))))) { |
564 | C.advance(I: 2); |
565 | while (isdigit(C.peek())) |
566 | C.advance(); |
567 | } |
568 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
569 | return C; |
570 | } |
571 | |
572 | static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { |
573 | if (C.peek() != '0' || (C.peek(I: 1) != 'x' && C.peek(I: 1) != 'X')) |
574 | return std::nullopt; |
575 | Cursor Range = C; |
576 | C.advance(I: 2); |
577 | unsigned PrefLen = 2; |
578 | if (isValidHexFloatingPointPrefix(C: C.peek())) { |
579 | C.advance(); |
580 | PrefLen++; |
581 | } |
582 | while (isxdigit(C.peek())) |
583 | C.advance(); |
584 | StringRef StrVal = Range.upto(C); |
585 | if (StrVal.size() <= PrefLen) |
586 | return std::nullopt; |
587 | if (PrefLen == 2) |
588 | Token.reset(Kind: MIToken::HexLiteral, Range: Range.upto(C)); |
589 | else // It must be 3, which means that there was a floating-point prefix. |
590 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
591 | return C; |
592 | } |
593 | |
594 | static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { |
595 | if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(I: 1)))) |
596 | return std::nullopt; |
597 | auto Range = C; |
598 | C.advance(); |
599 | while (isdigit(C.peek())) |
600 | C.advance(); |
601 | if (C.peek() == '.') |
602 | return lexFloatingPointLiteral(Range, C, Token); |
603 | StringRef StrVal = Range.upto(C); |
604 | Token.reset(Kind: MIToken::IntegerLiteral, Range: StrVal).setIntegerValue(APSInt(StrVal)); |
605 | return C; |
606 | } |
607 | |
608 | static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { |
609 | return StringSwitch<MIToken::TokenKind>(Identifier) |
610 | .Case(S: "!tbaa" , Value: MIToken::md_tbaa) |
611 | .Case(S: "!alias.scope" , Value: MIToken::md_alias_scope) |
612 | .Case(S: "!noalias" , Value: MIToken::md_noalias) |
613 | .Case(S: "!range" , Value: MIToken::md_range) |
614 | .Case(S: "!DIExpression" , Value: MIToken::md_diexpr) |
615 | .Case(S: "!DILocation" , Value: MIToken::md_dilocation) |
616 | .Default(Value: MIToken::Error); |
617 | } |
618 | |
619 | static Cursor maybeLexExclaim(Cursor C, MIToken &Token, |
620 | ErrorCallbackType ErrorCallback) { |
621 | if (C.peek() != '!') |
622 | return std::nullopt; |
623 | auto Range = C; |
624 | C.advance(I: 1); |
625 | if (isdigit(C.peek()) || !isIdentifierChar(C: C.peek())) { |
626 | Token.reset(Kind: MIToken::exclaim, Range: Range.upto(C)); |
627 | return C; |
628 | } |
629 | while (isIdentifierChar(C: C.peek())) |
630 | C.advance(); |
631 | StringRef StrVal = Range.upto(C); |
632 | Token.reset(Kind: getMetadataKeywordKind(Identifier: StrVal), Range: StrVal); |
633 | if (Token.isError()) |
634 | ErrorCallback(Token.location(), |
635 | "use of unknown metadata keyword '" + StrVal + "'" ); |
636 | return C; |
637 | } |
638 | |
639 | static MIToken::TokenKind symbolToken(char C) { |
640 | switch (C) { |
641 | case ',': |
642 | return MIToken::comma; |
643 | case '.': |
644 | return MIToken::dot; |
645 | case '=': |
646 | return MIToken::equal; |
647 | case ':': |
648 | return MIToken::colon; |
649 | case '(': |
650 | return MIToken::lparen; |
651 | case ')': |
652 | return MIToken::rparen; |
653 | case '{': |
654 | return MIToken::lbrace; |
655 | case '}': |
656 | return MIToken::rbrace; |
657 | case '+': |
658 | return MIToken::plus; |
659 | case '-': |
660 | return MIToken::minus; |
661 | case '<': |
662 | return MIToken::less; |
663 | case '>': |
664 | return MIToken::greater; |
665 | default: |
666 | return MIToken::Error; |
667 | } |
668 | } |
669 | |
670 | static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { |
671 | MIToken::TokenKind Kind; |
672 | unsigned Length = 1; |
673 | if (C.peek() == ':' && C.peek(I: 1) == ':') { |
674 | Kind = MIToken::coloncolon; |
675 | Length = 2; |
676 | } else |
677 | Kind = symbolToken(C: C.peek()); |
678 | if (Kind == MIToken::Error) |
679 | return std::nullopt; |
680 | auto Range = C; |
681 | C.advance(I: Length); |
682 | Token.reset(Kind, Range: Range.upto(C)); |
683 | return C; |
684 | } |
685 | |
686 | static Cursor maybeLexNewline(Cursor C, MIToken &Token) { |
687 | if (!isNewlineChar(C: C.peek())) |
688 | return std::nullopt; |
689 | auto Range = C; |
690 | C.advance(); |
691 | Token.reset(Kind: MIToken::Newline, Range: Range.upto(C)); |
692 | return C; |
693 | } |
694 | |
695 | static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, |
696 | ErrorCallbackType ErrorCallback) { |
697 | if (C.peek() != '`') |
698 | return std::nullopt; |
699 | auto Range = C; |
700 | C.advance(); |
701 | auto StrRange = C; |
702 | while (C.peek() != '`') { |
703 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
704 | ErrorCallback( |
705 | C.location(), |
706 | "end of machine instruction reached before the closing '`'" ); |
707 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
708 | return C; |
709 | } |
710 | C.advance(); |
711 | } |
712 | StringRef Value = StrRange.upto(C); |
713 | C.advance(); |
714 | Token.reset(Kind: MIToken::QuotedIRValue, Range: Range.upto(C)).setStringValue(Value); |
715 | return C; |
716 | } |
717 | |
718 | StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, |
719 | ErrorCallbackType ErrorCallback) { |
720 | auto C = skipComment(C: skipWhitespace(C: Cursor(Source))); |
721 | if (C.isEOF()) { |
722 | Token.reset(Kind: MIToken::Eof, Range: C.remaining()); |
723 | return C.remaining(); |
724 | } |
725 | |
726 | C = skipMachineOperandComment(C); |
727 | |
728 | if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) |
729 | return R.remaining(); |
730 | if (Cursor R = maybeLexIdentifier(C, Token)) |
731 | return R.remaining(); |
732 | if (Cursor R = maybeLexJumpTableIndex(C, Token)) |
733 | return R.remaining(); |
734 | if (Cursor R = maybeLexStackObject(C, Token)) |
735 | return R.remaining(); |
736 | if (Cursor R = maybeLexFixedStackObject(C, Token)) |
737 | return R.remaining(); |
738 | if (Cursor R = maybeLexConstantPoolItem(C, Token)) |
739 | return R.remaining(); |
740 | if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) |
741 | return R.remaining(); |
742 | if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) |
743 | return R.remaining(); |
744 | if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) |
745 | return R.remaining(); |
746 | if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) |
747 | return R.remaining(); |
748 | if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) |
749 | return R.remaining(); |
750 | if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) |
751 | return R.remaining(); |
752 | if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) |
753 | return R.remaining(); |
754 | if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) |
755 | return R.remaining(); |
756 | if (Cursor R = maybeLexNumericalLiteral(C, Token)) |
757 | return R.remaining(); |
758 | if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) |
759 | return R.remaining(); |
760 | if (Cursor R = maybeLexSymbol(C, Token)) |
761 | return R.remaining(); |
762 | if (Cursor R = maybeLexNewline(C, Token)) |
763 | return R.remaining(); |
764 | if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) |
765 | return R.remaining(); |
766 | if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) |
767 | return R.remaining(); |
768 | |
769 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
770 | ErrorCallback(C.location(), |
771 | Twine("unexpected character '" ) + Twine(C.peek()) + "'" ); |
772 | return C.remaining(); |
773 | } |
774 | |