| 1 | //===- MILexer.cpp - Machine instructions lexer implementation ------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the lexing of machine instructions. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "MILexer.h" |
| 14 | #include "llvm/ADT/StringExtras.h" |
| 15 | #include "llvm/ADT/StringSwitch.h" |
| 16 | #include "llvm/ADT/Twine.h" |
| 17 | #include <cassert> |
| 18 | #include <cctype> |
| 19 | #include <string> |
| 20 | |
| 21 | using namespace llvm; |
| 22 | |
| 23 | namespace { |
| 24 | |
| 25 | using ErrorCallbackType = |
| 26 | function_ref<void(StringRef::iterator Loc, const Twine &)>; |
| 27 | |
| 28 | /// This class provides a way to iterate and get characters from the source |
| 29 | /// string. |
| 30 | class Cursor { |
| 31 | const char *Ptr = nullptr; |
| 32 | const char *End = nullptr; |
| 33 | |
| 34 | public: |
| 35 | Cursor(std::nullopt_t) {} |
| 36 | |
| 37 | explicit Cursor(StringRef Str) { |
| 38 | Ptr = Str.data(); |
| 39 | End = Ptr + Str.size(); |
| 40 | } |
| 41 | |
| 42 | bool isEOF() const { return Ptr == End; } |
| 43 | |
| 44 | char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } |
| 45 | |
| 46 | void advance(unsigned I = 1) { Ptr += I; } |
| 47 | |
| 48 | StringRef remaining() const { return StringRef(Ptr, End - Ptr); } |
| 49 | |
| 50 | StringRef upto(Cursor C) const { |
| 51 | assert(C.Ptr >= Ptr && C.Ptr <= End); |
| 52 | return StringRef(Ptr, C.Ptr - Ptr); |
| 53 | } |
| 54 | |
| 55 | StringRef::iterator location() const { return Ptr; } |
| 56 | |
| 57 | operator bool() const { return Ptr != nullptr; } |
| 58 | }; |
| 59 | |
| 60 | } // end anonymous namespace |
| 61 | |
| 62 | MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { |
| 63 | this->Kind = Kind; |
| 64 | this->Range = Range; |
| 65 | return *this; |
| 66 | } |
| 67 | |
| 68 | MIToken &MIToken::setStringValue(StringRef StrVal) { |
| 69 | StringValue = StrVal; |
| 70 | return *this; |
| 71 | } |
| 72 | |
| 73 | MIToken &MIToken::setOwnedStringValue(std::string StrVal) { |
| 74 | StringValueStorage = std::move(StrVal); |
| 75 | StringValue = StringValueStorage; |
| 76 | return *this; |
| 77 | } |
| 78 | |
| 79 | MIToken &MIToken::setIntegerValue(APSInt IntVal) { |
| 80 | this->IntVal = std::move(IntVal); |
| 81 | return *this; |
| 82 | } |
| 83 | |
| 84 | /// Skip the leading whitespace characters and return the updated cursor. |
| 85 | static Cursor skipWhitespace(Cursor C) { |
| 86 | while (isblank(C.peek())) |
| 87 | C.advance(); |
| 88 | return C; |
| 89 | } |
| 90 | |
| 91 | static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } |
| 92 | |
| 93 | /// Skip a line comment and return the updated cursor. |
| 94 | static Cursor (Cursor C) { |
| 95 | if (C.peek() != ';') |
| 96 | return C; |
| 97 | while (!isNewlineChar(C: C.peek()) && !C.isEOF()) |
| 98 | C.advance(); |
| 99 | return C; |
| 100 | } |
| 101 | |
| 102 | /// Machine operands can have comments, enclosed between /* and */. |
| 103 | /// This eats up all tokens, including /* and */. |
| 104 | static Cursor skipMachineOperandComment(Cursor C) { |
| 105 | if (C.peek() != '/' || C.peek(I: 1) != '*') |
| 106 | return C; |
| 107 | |
| 108 | while (C.peek() != '*' || C.peek(I: 1) != '/') |
| 109 | C.advance(); |
| 110 | |
| 111 | C.advance(); |
| 112 | C.advance(); |
| 113 | return C; |
| 114 | } |
| 115 | |
| 116 | /// Return true if the given character satisfies the following regular |
| 117 | /// expression: [-a-zA-Z$._0-9] |
| 118 | static bool isIdentifierChar(char C) { |
| 119 | return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || |
| 120 | C == '$'; |
| 121 | } |
| 122 | |
| 123 | /// Unescapes the given string value. |
| 124 | /// |
| 125 | /// Expects the string value to be quoted. |
| 126 | static std::string unescapeQuotedString(StringRef Value) { |
| 127 | assert(Value.front() == '"' && Value.back() == '"'); |
| 128 | Cursor C = Cursor(Value.substr(Start: 1, N: Value.size() - 2)); |
| 129 | |
| 130 | std::string Str; |
| 131 | Str.reserve(res_arg: C.remaining().size()); |
| 132 | while (!C.isEOF()) { |
| 133 | char Char = C.peek(); |
| 134 | if (Char == '\\') { |
| 135 | if (C.peek(I: 1) == '\\') { |
| 136 | // Two '\' become one |
| 137 | Str += '\\'; |
| 138 | C.advance(I: 2); |
| 139 | continue; |
| 140 | } |
| 141 | if (isxdigit(C.peek(I: 1)) && isxdigit(C.peek(I: 2))) { |
| 142 | Str += hexDigitValue(C: C.peek(I: 1)) * 16 + hexDigitValue(C: C.peek(I: 2)); |
| 143 | C.advance(I: 3); |
| 144 | continue; |
| 145 | } |
| 146 | } |
| 147 | Str += Char; |
| 148 | C.advance(); |
| 149 | } |
| 150 | return Str; |
| 151 | } |
| 152 | |
| 153 | /// Lex a string constant using the following regular expression: \"[^\"]*\" |
| 154 | static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { |
| 155 | assert(C.peek() == '"'); |
| 156 | for (C.advance(); C.peek() != '"'; C.advance()) { |
| 157 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
| 158 | ErrorCallback( |
| 159 | C.location(), |
| 160 | "end of machine instruction reached before the closing '\"'" ); |
| 161 | return std::nullopt; |
| 162 | } |
| 163 | } |
| 164 | C.advance(); |
| 165 | return C; |
| 166 | } |
| 167 | |
| 168 | static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, |
| 169 | unsigned PrefixLength, ErrorCallbackType ErrorCallback) { |
| 170 | auto Range = C; |
| 171 | C.advance(I: PrefixLength); |
| 172 | if (C.peek() == '"') { |
| 173 | if (Cursor R = lexStringConstant(C, ErrorCallback)) { |
| 174 | StringRef String = Range.upto(C: R); |
| 175 | Token.reset(Kind: Type, Range: String) |
| 176 | .setOwnedStringValue( |
| 177 | unescapeQuotedString(Value: String.drop_front(N: PrefixLength))); |
| 178 | return R; |
| 179 | } |
| 180 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
| 181 | return Range; |
| 182 | } |
| 183 | while (isIdentifierChar(C: C.peek())) |
| 184 | C.advance(); |
| 185 | Token.reset(Kind: Type, Range: Range.upto(C)) |
| 186 | .setStringValue(Range.upto(C).drop_front(N: PrefixLength)); |
| 187 | return C; |
| 188 | } |
| 189 | |
| 190 | static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { |
| 191 | return StringSwitch<MIToken::TokenKind>(Identifier) |
| 192 | .Case(S: "_" , Value: MIToken::underscore) |
| 193 | .Case(S: "implicit" , Value: MIToken::kw_implicit) |
| 194 | .Case(S: "implicit-def" , Value: MIToken::kw_implicit_define) |
| 195 | .Case(S: "def" , Value: MIToken::kw_def) |
| 196 | .Case(S: "dead" , Value: MIToken::kw_dead) |
| 197 | .Case(S: "killed" , Value: MIToken::kw_killed) |
| 198 | .Case(S: "undef" , Value: MIToken::kw_undef) |
| 199 | .Case(S: "internal" , Value: MIToken::kw_internal) |
| 200 | .Case(S: "early-clobber" , Value: MIToken::kw_early_clobber) |
| 201 | .Case(S: "debug-use" , Value: MIToken::kw_debug_use) |
| 202 | .Case(S: "renamable" , Value: MIToken::kw_renamable) |
| 203 | .Case(S: "tied-def" , Value: MIToken::kw_tied_def) |
| 204 | .Case(S: "frame-setup" , Value: MIToken::kw_frame_setup) |
| 205 | .Case(S: "frame-destroy" , Value: MIToken::kw_frame_destroy) |
| 206 | .Case(S: "nnan" , Value: MIToken::kw_nnan) |
| 207 | .Case(S: "ninf" , Value: MIToken::kw_ninf) |
| 208 | .Case(S: "nsz" , Value: MIToken::kw_nsz) |
| 209 | .Case(S: "arcp" , Value: MIToken::kw_arcp) |
| 210 | .Case(S: "contract" , Value: MIToken::kw_contract) |
| 211 | .Case(S: "afn" , Value: MIToken::kw_afn) |
| 212 | .Case(S: "reassoc" , Value: MIToken::kw_reassoc) |
| 213 | .Case(S: "nuw" , Value: MIToken::kw_nuw) |
| 214 | .Case(S: "nsw" , Value: MIToken::kw_nsw) |
| 215 | .Case(S: "nusw" , Value: MIToken::kw_nusw) |
| 216 | .Case(S: "exact" , Value: MIToken::kw_exact) |
| 217 | .Case(S: "nneg" , Value: MIToken::kw_nneg) |
| 218 | .Case(S: "disjoint" , Value: MIToken::kw_disjoint) |
| 219 | .Case(S: "samesign" , Value: MIToken::kw_samesign) |
| 220 | .Case(S: "nofpexcept" , Value: MIToken::kw_nofpexcept) |
| 221 | .Case(S: "unpredictable" , Value: MIToken::kw_unpredictable) |
| 222 | .Case(S: "debug-location" , Value: MIToken::kw_debug_location) |
| 223 | .Case(S: "debug-instr-number" , Value: MIToken::kw_debug_instr_number) |
| 224 | .Case(S: "dbg-instr-ref" , Value: MIToken::kw_dbg_instr_ref) |
| 225 | .Case(S: "same_value" , Value: MIToken::kw_cfi_same_value) |
| 226 | .Case(S: "offset" , Value: MIToken::kw_cfi_offset) |
| 227 | .Case(S: "rel_offset" , Value: MIToken::kw_cfi_rel_offset) |
| 228 | .Case(S: "def_cfa_register" , Value: MIToken::kw_cfi_def_cfa_register) |
| 229 | .Case(S: "def_cfa_offset" , Value: MIToken::kw_cfi_def_cfa_offset) |
| 230 | .Case(S: "adjust_cfa_offset" , Value: MIToken::kw_cfi_adjust_cfa_offset) |
| 231 | .Case(S: "escape" , Value: MIToken::kw_cfi_escape) |
| 232 | .Case(S: "def_cfa" , Value: MIToken::kw_cfi_def_cfa) |
| 233 | .Case(S: "llvm_def_aspace_cfa" , Value: MIToken::kw_cfi_llvm_def_aspace_cfa) |
| 234 | .Case(S: "remember_state" , Value: MIToken::kw_cfi_remember_state) |
| 235 | .Case(S: "restore" , Value: MIToken::kw_cfi_restore) |
| 236 | .Case(S: "restore_state" , Value: MIToken::kw_cfi_restore_state) |
| 237 | .Case(S: "undefined" , Value: MIToken::kw_cfi_undefined) |
| 238 | .Case(S: "register" , Value: MIToken::kw_cfi_register) |
| 239 | .Case(S: "window_save" , Value: MIToken::kw_cfi_window_save) |
| 240 | .Case(S: "negate_ra_sign_state" , |
| 241 | Value: MIToken::kw_cfi_aarch64_negate_ra_sign_state) |
| 242 | .Case(S: "negate_ra_sign_state_with_pc" , |
| 243 | Value: MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc) |
| 244 | .Case(S: "blockaddress" , Value: MIToken::kw_blockaddress) |
| 245 | .Case(S: "intrinsic" , Value: MIToken::kw_intrinsic) |
| 246 | .Case(S: "target-index" , Value: MIToken::kw_target_index) |
| 247 | .Case(S: "half" , Value: MIToken::kw_half) |
| 248 | .Case(S: "bfloat" , Value: MIToken::kw_bfloat) |
| 249 | .Case(S: "float" , Value: MIToken::kw_float) |
| 250 | .Case(S: "double" , Value: MIToken::kw_double) |
| 251 | .Case(S: "x86_fp80" , Value: MIToken::kw_x86_fp80) |
| 252 | .Case(S: "fp128" , Value: MIToken::kw_fp128) |
| 253 | .Case(S: "ppc_fp128" , Value: MIToken::kw_ppc_fp128) |
| 254 | .Case(S: "target-flags" , Value: MIToken::kw_target_flags) |
| 255 | .Case(S: "volatile" , Value: MIToken::kw_volatile) |
| 256 | .Case(S: "non-temporal" , Value: MIToken::kw_non_temporal) |
| 257 | .Case(S: "dereferenceable" , Value: MIToken::kw_dereferenceable) |
| 258 | .Case(S: "invariant" , Value: MIToken::kw_invariant) |
| 259 | .Case(S: "align" , Value: MIToken::kw_align) |
| 260 | .Case(S: "basealign" , Value: MIToken::kw_basealign) |
| 261 | .Case(S: "addrspace" , Value: MIToken::kw_addrspace) |
| 262 | .Case(S: "stack" , Value: MIToken::kw_stack) |
| 263 | .Case(S: "got" , Value: MIToken::kw_got) |
| 264 | .Case(S: "jump-table" , Value: MIToken::kw_jump_table) |
| 265 | .Case(S: "constant-pool" , Value: MIToken::kw_constant_pool) |
| 266 | .Case(S: "call-entry" , Value: MIToken::kw_call_entry) |
| 267 | .Case(S: "custom" , Value: MIToken::kw_custom) |
| 268 | .Case(S: "liveout" , Value: MIToken::kw_liveout) |
| 269 | .Case(S: "landing-pad" , Value: MIToken::kw_landing_pad) |
| 270 | .Case(S: "inlineasm-br-indirect-target" , |
| 271 | Value: MIToken::kw_inlineasm_br_indirect_target) |
| 272 | .Case(S: "ehfunclet-entry" , Value: MIToken::kw_ehfunclet_entry) |
| 273 | .Case(S: "liveins" , Value: MIToken::kw_liveins) |
| 274 | .Case(S: "successors" , Value: MIToken::kw_successors) |
| 275 | .Case(S: "floatpred" , Value: MIToken::kw_floatpred) |
| 276 | .Case(S: "intpred" , Value: MIToken::kw_intpred) |
| 277 | .Case(S: "shufflemask" , Value: MIToken::kw_shufflemask) |
| 278 | .Case(S: "pre-instr-symbol" , Value: MIToken::kw_pre_instr_symbol) |
| 279 | .Case(S: "post-instr-symbol" , Value: MIToken::kw_post_instr_symbol) |
| 280 | .Case(S: "heap-alloc-marker" , Value: MIToken::kw_heap_alloc_marker) |
| 281 | .Case(S: "pcsections" , Value: MIToken::kw_pcsections) |
| 282 | .Case(S: "cfi-type" , Value: MIToken::kw_cfi_type) |
| 283 | .Case(S: "bbsections" , Value: MIToken::kw_bbsections) |
| 284 | .Case(S: "bb_id" , Value: MIToken::kw_bb_id) |
| 285 | .Case(S: "unknown-size" , Value: MIToken::kw_unknown_size) |
| 286 | .Case(S: "unknown-address" , Value: MIToken::kw_unknown_address) |
| 287 | .Case(S: "distinct" , Value: MIToken::kw_distinct) |
| 288 | .Case(S: "ir-block-address-taken" , Value: MIToken::kw_ir_block_address_taken) |
| 289 | .Case(S: "machine-block-address-taken" , |
| 290 | Value: MIToken::kw_machine_block_address_taken) |
| 291 | .Case(S: "call-frame-size" , Value: MIToken::kw_call_frame_size) |
| 292 | .Case(S: "noconvergent" , Value: MIToken::kw_noconvergent) |
| 293 | .Default(Value: MIToken::Identifier); |
| 294 | } |
| 295 | |
| 296 | static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { |
| 297 | if (!isalpha(C.peek()) && C.peek() != '_') |
| 298 | return std::nullopt; |
| 299 | auto Range = C; |
| 300 | while (isIdentifierChar(C: C.peek())) |
| 301 | C.advance(); |
| 302 | auto Identifier = Range.upto(C); |
| 303 | Token.reset(Kind: getIdentifierKind(Identifier), Range: Identifier) |
| 304 | .setStringValue(Identifier); |
| 305 | return C; |
| 306 | } |
| 307 | |
| 308 | static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, |
| 309 | ErrorCallbackType ErrorCallback) { |
| 310 | bool IsReference = C.remaining().starts_with(Prefix: "%bb." ); |
| 311 | if (!IsReference && !C.remaining().starts_with(Prefix: "bb." )) |
| 312 | return std::nullopt; |
| 313 | auto Range = C; |
| 314 | unsigned PrefixLength = IsReference ? 4 : 3; |
| 315 | C.advance(I: PrefixLength); // Skip '%bb.' or 'bb.' |
| 316 | if (!isdigit(C.peek())) { |
| 317 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
| 318 | ErrorCallback(C.location(), "expected a number after '%bb.'" ); |
| 319 | return C; |
| 320 | } |
| 321 | auto NumberRange = C; |
| 322 | while (isdigit(C.peek())) |
| 323 | C.advance(); |
| 324 | StringRef Number = NumberRange.upto(C); |
| 325 | unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' |
| 326 | // TODO: The format bb.<id>.<irname> is supported only when it's not a |
| 327 | // reference. Once we deprecate the format where the irname shows up, we |
| 328 | // should only lex forward if it is a reference. |
| 329 | if (C.peek() == '.') { |
| 330 | C.advance(); // Skip '.' |
| 331 | ++StringOffset; |
| 332 | while (isIdentifierChar(C: C.peek())) |
| 333 | C.advance(); |
| 334 | } |
| 335 | Token.reset(Kind: IsReference ? MIToken::MachineBasicBlock |
| 336 | : MIToken::MachineBasicBlockLabel, |
| 337 | Range: Range.upto(C)) |
| 338 | .setIntegerValue(APSInt(Number)) |
| 339 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
| 340 | return C; |
| 341 | } |
| 342 | |
| 343 | static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, |
| 344 | MIToken::TokenKind Kind) { |
| 345 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
| 346 | return std::nullopt; |
| 347 | auto Range = C; |
| 348 | C.advance(I: Rule.size()); |
| 349 | auto NumberRange = C; |
| 350 | while (isdigit(C.peek())) |
| 351 | C.advance(); |
| 352 | Token.reset(Kind, Range: Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); |
| 353 | return C; |
| 354 | } |
| 355 | |
| 356 | static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, |
| 357 | MIToken::TokenKind Kind) { |
| 358 | if (!C.remaining().starts_with(Prefix: Rule) || !isdigit(C.peek(I: Rule.size()))) |
| 359 | return std::nullopt; |
| 360 | auto Range = C; |
| 361 | C.advance(I: Rule.size()); |
| 362 | auto NumberRange = C; |
| 363 | while (isdigit(C.peek())) |
| 364 | C.advance(); |
| 365 | StringRef Number = NumberRange.upto(C); |
| 366 | unsigned StringOffset = Rule.size() + Number.size(); |
| 367 | if (C.peek() == '.') { |
| 368 | C.advance(); |
| 369 | ++StringOffset; |
| 370 | while (isIdentifierChar(C: C.peek())) |
| 371 | C.advance(); |
| 372 | } |
| 373 | Token.reset(Kind, Range: Range.upto(C)) |
| 374 | .setIntegerValue(APSInt(Number)) |
| 375 | .setStringValue(Range.upto(C).drop_front(N: StringOffset)); |
| 376 | return C; |
| 377 | } |
| 378 | |
| 379 | static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { |
| 380 | return maybeLexIndex(C, Token, Rule: "%jump-table." , Kind: MIToken::JumpTableIndex); |
| 381 | } |
| 382 | |
| 383 | static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { |
| 384 | return maybeLexIndexAndName(C, Token, Rule: "%stack." , Kind: MIToken::StackObject); |
| 385 | } |
| 386 | |
| 387 | static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { |
| 388 | return maybeLexIndex(C, Token, Rule: "%fixed-stack." , Kind: MIToken::FixedStackObject); |
| 389 | } |
| 390 | |
| 391 | static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { |
| 392 | return maybeLexIndex(C, Token, Rule: "%const." , Kind: MIToken::ConstantPoolItem); |
| 393 | } |
| 394 | |
| 395 | static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, |
| 396 | ErrorCallbackType ErrorCallback) { |
| 397 | const StringRef Rule = "%subreg." ; |
| 398 | if (!C.remaining().starts_with(Prefix: Rule)) |
| 399 | return std::nullopt; |
| 400 | return lexName(C, Token, Type: MIToken::SubRegisterIndex, PrefixLength: Rule.size(), |
| 401 | ErrorCallback); |
| 402 | } |
| 403 | |
| 404 | static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, |
| 405 | ErrorCallbackType ErrorCallback) { |
| 406 | const StringRef Rule = "%ir-block." ; |
| 407 | if (!C.remaining().starts_with(Prefix: Rule)) |
| 408 | return std::nullopt; |
| 409 | if (isdigit(C.peek(I: Rule.size()))) |
| 410 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRBlock); |
| 411 | return lexName(C, Token, Type: MIToken::NamedIRBlock, PrefixLength: Rule.size(), ErrorCallback); |
| 412 | } |
| 413 | |
| 414 | static Cursor maybeLexIRValue(Cursor C, MIToken &Token, |
| 415 | ErrorCallbackType ErrorCallback) { |
| 416 | const StringRef Rule = "%ir." ; |
| 417 | if (!C.remaining().starts_with(Prefix: Rule)) |
| 418 | return std::nullopt; |
| 419 | if (isdigit(C.peek(I: Rule.size()))) |
| 420 | return maybeLexIndex(C, Token, Rule, Kind: MIToken::IRValue); |
| 421 | return lexName(C, Token, Type: MIToken::NamedIRValue, PrefixLength: Rule.size(), ErrorCallback); |
| 422 | } |
| 423 | |
| 424 | static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, |
| 425 | ErrorCallbackType ErrorCallback) { |
| 426 | if (C.peek() != '"') |
| 427 | return std::nullopt; |
| 428 | return lexName(C, Token, Type: MIToken::StringConstant, /*PrefixLength=*/0, |
| 429 | ErrorCallback); |
| 430 | } |
| 431 | |
| 432 | static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { |
| 433 | auto Range = C; |
| 434 | C.advance(); // Skip '%' |
| 435 | auto NumberRange = C; |
| 436 | while (isdigit(C.peek())) |
| 437 | C.advance(); |
| 438 | Token.reset(Kind: MIToken::VirtualRegister, Range: Range.upto(C)) |
| 439 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
| 440 | return C; |
| 441 | } |
| 442 | |
| 443 | /// Returns true for a character allowed in a register name. |
| 444 | static bool isRegisterChar(char C) { |
| 445 | return isIdentifierChar(C) && C != '.'; |
| 446 | } |
| 447 | |
| 448 | static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { |
| 449 | Cursor Range = C; |
| 450 | C.advance(); // Skip '%' |
| 451 | while (isRegisterChar(C: C.peek())) |
| 452 | C.advance(); |
| 453 | Token.reset(Kind: MIToken::NamedVirtualRegister, Range: Range.upto(C)) |
| 454 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '%' |
| 455 | return C; |
| 456 | } |
| 457 | |
| 458 | static Cursor maybeLexRegister(Cursor C, MIToken &Token, |
| 459 | ErrorCallbackType ErrorCallback) { |
| 460 | if (C.peek() != '%' && C.peek() != '$') |
| 461 | return std::nullopt; |
| 462 | |
| 463 | if (C.peek() == '%') { |
| 464 | if (isdigit(C.peek(I: 1))) |
| 465 | return lexVirtualRegister(C, Token); |
| 466 | |
| 467 | if (isRegisterChar(C: C.peek(I: 1))) |
| 468 | return lexNamedVirtualRegister(C, Token); |
| 469 | |
| 470 | return std::nullopt; |
| 471 | } |
| 472 | |
| 473 | assert(C.peek() == '$'); |
| 474 | auto Range = C; |
| 475 | C.advance(); // Skip '$' |
| 476 | while (isRegisterChar(C: C.peek())) |
| 477 | C.advance(); |
| 478 | Token.reset(Kind: MIToken::NamedRegister, Range: Range.upto(C)) |
| 479 | .setStringValue(Range.upto(C).drop_front(N: 1)); // Drop the '$' |
| 480 | return C; |
| 481 | } |
| 482 | |
| 483 | static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, |
| 484 | ErrorCallbackType ErrorCallback) { |
| 485 | if (C.peek() != '@') |
| 486 | return std::nullopt; |
| 487 | if (!isdigit(C.peek(I: 1))) |
| 488 | return lexName(C, Token, Type: MIToken::NamedGlobalValue, /*PrefixLength=*/1, |
| 489 | ErrorCallback); |
| 490 | auto Range = C; |
| 491 | C.advance(I: 1); // Skip the '@' |
| 492 | auto NumberRange = C; |
| 493 | while (isdigit(C.peek())) |
| 494 | C.advance(); |
| 495 | Token.reset(Kind: MIToken::GlobalValue, Range: Range.upto(C)) |
| 496 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
| 497 | return C; |
| 498 | } |
| 499 | |
| 500 | static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, |
| 501 | ErrorCallbackType ErrorCallback) { |
| 502 | if (C.peek() != '&') |
| 503 | return std::nullopt; |
| 504 | return lexName(C, Token, Type: MIToken::ExternalSymbol, /*PrefixLength=*/1, |
| 505 | ErrorCallback); |
| 506 | } |
| 507 | |
| 508 | static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, |
| 509 | ErrorCallbackType ErrorCallback) { |
| 510 | const StringRef Rule = "<mcsymbol " ; |
| 511 | if (!C.remaining().starts_with(Prefix: Rule)) |
| 512 | return std::nullopt; |
| 513 | auto Start = C; |
| 514 | C.advance(I: Rule.size()); |
| 515 | |
| 516 | // Try a simple unquoted name. |
| 517 | if (C.peek() != '"') { |
| 518 | while (isIdentifierChar(C: C.peek())) |
| 519 | C.advance(); |
| 520 | StringRef String = Start.upto(C).drop_front(N: Rule.size()); |
| 521 | if (C.peek() != '>') { |
| 522 | ErrorCallback(C.location(), |
| 523 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
| 524 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
| 525 | return Start; |
| 526 | } |
| 527 | C.advance(); |
| 528 | |
| 529 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C)).setStringValue(String); |
| 530 | return C; |
| 531 | } |
| 532 | |
| 533 | // Otherwise lex out a quoted name. |
| 534 | Cursor R = lexStringConstant(C, ErrorCallback); |
| 535 | if (!R) { |
| 536 | ErrorCallback(C.location(), |
| 537 | "unable to parse quoted string from opening quote" ); |
| 538 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
| 539 | return Start; |
| 540 | } |
| 541 | StringRef String = Start.upto(C: R).drop_front(N: Rule.size()); |
| 542 | if (R.peek() != '>') { |
| 543 | ErrorCallback(R.location(), |
| 544 | "expected the '<mcsymbol ...' to be closed by a '>'" ); |
| 545 | Token.reset(Kind: MIToken::Error, Range: Start.remaining()); |
| 546 | return Start; |
| 547 | } |
| 548 | R.advance(); |
| 549 | |
| 550 | Token.reset(Kind: MIToken::MCSymbol, Range: Start.upto(C: R)) |
| 551 | .setOwnedStringValue(unescapeQuotedString(Value: String)); |
| 552 | return R; |
| 553 | } |
| 554 | |
| 555 | static bool isValidHexFloatingPointPrefix(char C) { |
| 556 | return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; |
| 557 | } |
| 558 | |
| 559 | static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { |
| 560 | C.advance(); |
| 561 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
| 562 | while (isdigit(C.peek())) |
| 563 | C.advance(); |
| 564 | if ((C.peek() == 'e' || C.peek() == 'E') && |
| 565 | (isdigit(C.peek(I: 1)) || |
| 566 | ((C.peek(I: 1) == '-' || C.peek(I: 1) == '+') && isdigit(C.peek(I: 2))))) { |
| 567 | C.advance(I: 2); |
| 568 | while (isdigit(C.peek())) |
| 569 | C.advance(); |
| 570 | } |
| 571 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
| 572 | return C; |
| 573 | } |
| 574 | |
| 575 | static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { |
| 576 | if (C.peek() != '0' || (C.peek(I: 1) != 'x' && C.peek(I: 1) != 'X')) |
| 577 | return std::nullopt; |
| 578 | Cursor Range = C; |
| 579 | C.advance(I: 2); |
| 580 | unsigned PrefLen = 2; |
| 581 | if (isValidHexFloatingPointPrefix(C: C.peek())) { |
| 582 | C.advance(); |
| 583 | PrefLen++; |
| 584 | } |
| 585 | while (isxdigit(C.peek())) |
| 586 | C.advance(); |
| 587 | StringRef StrVal = Range.upto(C); |
| 588 | if (StrVal.size() <= PrefLen) |
| 589 | return std::nullopt; |
| 590 | if (PrefLen == 2) |
| 591 | Token.reset(Kind: MIToken::HexLiteral, Range: Range.upto(C)); |
| 592 | else // It must be 3, which means that there was a floating-point prefix. |
| 593 | Token.reset(Kind: MIToken::FloatingPointLiteral, Range: Range.upto(C)); |
| 594 | return C; |
| 595 | } |
| 596 | |
| 597 | static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { |
| 598 | if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(I: 1)))) |
| 599 | return std::nullopt; |
| 600 | auto Range = C; |
| 601 | C.advance(); |
| 602 | while (isdigit(C.peek())) |
| 603 | C.advance(); |
| 604 | if (C.peek() == '.') |
| 605 | return lexFloatingPointLiteral(Range, C, Token); |
| 606 | StringRef StrVal = Range.upto(C); |
| 607 | Token.reset(Kind: MIToken::IntegerLiteral, Range: StrVal).setIntegerValue(APSInt(StrVal)); |
| 608 | return C; |
| 609 | } |
| 610 | |
| 611 | static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { |
| 612 | return StringSwitch<MIToken::TokenKind>(Identifier) |
| 613 | .Case(S: "!tbaa" , Value: MIToken::md_tbaa) |
| 614 | .Case(S: "!alias.scope" , Value: MIToken::md_alias_scope) |
| 615 | .Case(S: "!noalias" , Value: MIToken::md_noalias) |
| 616 | .Case(S: "!range" , Value: MIToken::md_range) |
| 617 | .Case(S: "!DIExpression" , Value: MIToken::md_diexpr) |
| 618 | .Case(S: "!DILocation" , Value: MIToken::md_dilocation) |
| 619 | .Default(Value: MIToken::Error); |
| 620 | } |
| 621 | |
| 622 | static Cursor maybeLexExclaim(Cursor C, MIToken &Token, |
| 623 | ErrorCallbackType ErrorCallback) { |
| 624 | if (C.peek() != '!') |
| 625 | return std::nullopt; |
| 626 | auto Range = C; |
| 627 | C.advance(I: 1); |
| 628 | if (isdigit(C.peek()) || !isIdentifierChar(C: C.peek())) { |
| 629 | Token.reset(Kind: MIToken::exclaim, Range: Range.upto(C)); |
| 630 | return C; |
| 631 | } |
| 632 | while (isIdentifierChar(C: C.peek())) |
| 633 | C.advance(); |
| 634 | StringRef StrVal = Range.upto(C); |
| 635 | Token.reset(Kind: getMetadataKeywordKind(Identifier: StrVal), Range: StrVal); |
| 636 | if (Token.isError()) |
| 637 | ErrorCallback(Token.location(), |
| 638 | "use of unknown metadata keyword '" + StrVal + "'" ); |
| 639 | return C; |
| 640 | } |
| 641 | |
| 642 | static MIToken::TokenKind symbolToken(char C) { |
| 643 | switch (C) { |
| 644 | case ',': |
| 645 | return MIToken::comma; |
| 646 | case '.': |
| 647 | return MIToken::dot; |
| 648 | case '=': |
| 649 | return MIToken::equal; |
| 650 | case ':': |
| 651 | return MIToken::colon; |
| 652 | case '(': |
| 653 | return MIToken::lparen; |
| 654 | case ')': |
| 655 | return MIToken::rparen; |
| 656 | case '{': |
| 657 | return MIToken::lbrace; |
| 658 | case '}': |
| 659 | return MIToken::rbrace; |
| 660 | case '+': |
| 661 | return MIToken::plus; |
| 662 | case '-': |
| 663 | return MIToken::minus; |
| 664 | case '<': |
| 665 | return MIToken::less; |
| 666 | case '>': |
| 667 | return MIToken::greater; |
| 668 | default: |
| 669 | return MIToken::Error; |
| 670 | } |
| 671 | } |
| 672 | |
| 673 | static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { |
| 674 | MIToken::TokenKind Kind; |
| 675 | unsigned Length = 1; |
| 676 | if (C.peek() == ':' && C.peek(I: 1) == ':') { |
| 677 | Kind = MIToken::coloncolon; |
| 678 | Length = 2; |
| 679 | } else |
| 680 | Kind = symbolToken(C: C.peek()); |
| 681 | if (Kind == MIToken::Error) |
| 682 | return std::nullopt; |
| 683 | auto Range = C; |
| 684 | C.advance(I: Length); |
| 685 | Token.reset(Kind, Range: Range.upto(C)); |
| 686 | return C; |
| 687 | } |
| 688 | |
| 689 | static Cursor maybeLexNewline(Cursor C, MIToken &Token) { |
| 690 | if (!isNewlineChar(C: C.peek())) |
| 691 | return std::nullopt; |
| 692 | auto Range = C; |
| 693 | C.advance(); |
| 694 | Token.reset(Kind: MIToken::Newline, Range: Range.upto(C)); |
| 695 | return C; |
| 696 | } |
| 697 | |
| 698 | static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, |
| 699 | ErrorCallbackType ErrorCallback) { |
| 700 | if (C.peek() != '`') |
| 701 | return std::nullopt; |
| 702 | auto Range = C; |
| 703 | C.advance(); |
| 704 | auto StrRange = C; |
| 705 | while (C.peek() != '`') { |
| 706 | if (C.isEOF() || isNewlineChar(C: C.peek())) { |
| 707 | ErrorCallback( |
| 708 | C.location(), |
| 709 | "end of machine instruction reached before the closing '`'" ); |
| 710 | Token.reset(Kind: MIToken::Error, Range: Range.remaining()); |
| 711 | return C; |
| 712 | } |
| 713 | C.advance(); |
| 714 | } |
| 715 | StringRef Value = StrRange.upto(C); |
| 716 | C.advance(); |
| 717 | Token.reset(Kind: MIToken::QuotedIRValue, Range: Range.upto(C)).setStringValue(Value); |
| 718 | return C; |
| 719 | } |
| 720 | |
| 721 | StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, |
| 722 | ErrorCallbackType ErrorCallback) { |
| 723 | auto C = skipComment(C: skipWhitespace(C: Cursor(Source))); |
| 724 | if (C.isEOF()) { |
| 725 | Token.reset(Kind: MIToken::Eof, Range: C.remaining()); |
| 726 | return C.remaining(); |
| 727 | } |
| 728 | |
| 729 | C = skipWhitespace(C: skipMachineOperandComment(C)); |
| 730 | |
| 731 | if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) |
| 732 | return R.remaining(); |
| 733 | if (Cursor R = maybeLexIdentifier(C, Token)) |
| 734 | return R.remaining(); |
| 735 | if (Cursor R = maybeLexJumpTableIndex(C, Token)) |
| 736 | return R.remaining(); |
| 737 | if (Cursor R = maybeLexStackObject(C, Token)) |
| 738 | return R.remaining(); |
| 739 | if (Cursor R = maybeLexFixedStackObject(C, Token)) |
| 740 | return R.remaining(); |
| 741 | if (Cursor R = maybeLexConstantPoolItem(C, Token)) |
| 742 | return R.remaining(); |
| 743 | if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) |
| 744 | return R.remaining(); |
| 745 | if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) |
| 746 | return R.remaining(); |
| 747 | if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) |
| 748 | return R.remaining(); |
| 749 | if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) |
| 750 | return R.remaining(); |
| 751 | if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) |
| 752 | return R.remaining(); |
| 753 | if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) |
| 754 | return R.remaining(); |
| 755 | if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) |
| 756 | return R.remaining(); |
| 757 | if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) |
| 758 | return R.remaining(); |
| 759 | if (Cursor R = maybeLexNumericalLiteral(C, Token)) |
| 760 | return R.remaining(); |
| 761 | if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) |
| 762 | return R.remaining(); |
| 763 | if (Cursor R = maybeLexSymbol(C, Token)) |
| 764 | return R.remaining(); |
| 765 | if (Cursor R = maybeLexNewline(C, Token)) |
| 766 | return R.remaining(); |
| 767 | if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) |
| 768 | return R.remaining(); |
| 769 | if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) |
| 770 | return R.remaining(); |
| 771 | |
| 772 | Token.reset(Kind: MIToken::Error, Range: C.remaining()); |
| 773 | ErrorCallback(C.location(), |
| 774 | Twine("unexpected character '" ) + Twine(C.peek()) + "'" ); |
| 775 | return C.remaining(); |
| 776 | } |
| 777 | |