| 1 | //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implement the Lexer for .ll files. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/AsmParser/LLLexer.h" |
| 14 | #include "llvm/ADT/APInt.h" |
| 15 | #include "llvm/ADT/STLExtras.h" |
| 16 | #include "llvm/ADT/StringExtras.h" |
| 17 | #include "llvm/ADT/Twine.h" |
| 18 | #include "llvm/IR/DerivedTypes.h" |
| 19 | #include "llvm/IR/Instruction.h" |
| 20 | #include "llvm/Support/ErrorHandling.h" |
| 21 | #include "llvm/Support/SourceMgr.h" |
| 22 | #include <cassert> |
| 23 | #include <cctype> |
| 24 | #include <cstdio> |
| 25 | |
| 26 | using namespace llvm; |
| 27 | |
| 28 | // Both the lexer and parser can issue error messages. If the lexer issues a |
| 29 | // lexer error, since we do not terminate execution immediately, usually that |
| 30 | // is followed by the parser issuing a parser error. However, the error issued |
| 31 | // by the lexer is more relevant in that case as opposed to potentially more |
| 32 | // generic parser error. So instead of always recording the last error message |
| 33 | // use the `Priority` to establish a priority, with Lexer > Parser > None. We |
| 34 | // record the issued message only if the message has same or higher priority |
| 35 | // than the existing one. This prevents lexer errors from being overwritten by |
| 36 | // parser errors. |
| 37 | void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg, |
| 38 | LLLexer::ErrorPriority Priority) { |
| 39 | if (Priority < ErrorInfo.Priority) |
| 40 | return; |
| 41 | ErrorInfo.Error = SM.GetMessage(Loc: ErrorLoc, Kind: SourceMgr::DK_Error, Msg); |
| 42 | ErrorInfo.Priority = Priority; |
| 43 | } |
| 44 | |
| 45 | void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { |
| 46 | SM.PrintMessage(Loc: WarningLoc, Kind: SourceMgr::DK_Warning, Msg); |
| 47 | } |
| 48 | |
| 49 | //===----------------------------------------------------------------------===// |
| 50 | // Helper functions. |
| 51 | //===----------------------------------------------------------------------===// |
| 52 | |
| 53 | // atoull - Convert an ascii string of decimal digits into the unsigned long |
| 54 | // long representation... this does not have to do input error checking, |
| 55 | // because we know that the input will be matched by a suitable regex... |
| 56 | // |
| 57 | uint64_t LLLexer::atoull(const char *Buffer, const char *End) { |
| 58 | uint64_t Result = 0; |
| 59 | for (; Buffer != End; Buffer++) { |
| 60 | uint64_t OldRes = Result; |
| 61 | Result *= 10; |
| 62 | Result += *Buffer-'0'; |
| 63 | if (Result < OldRes) { // overflow detected. |
| 64 | LexError(Msg: "constant bigger than 64 bits detected" ); |
| 65 | return 0; |
| 66 | } |
| 67 | } |
| 68 | return Result; |
| 69 | } |
| 70 | |
| 71 | uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { |
| 72 | uint64_t Result = 0; |
| 73 | for (; Buffer != End; ++Buffer) { |
| 74 | uint64_t OldRes = Result; |
| 75 | Result *= 16; |
| 76 | Result += hexDigitValue(C: *Buffer); |
| 77 | |
| 78 | if (Result < OldRes) { // overflow detected. |
| 79 | LexError(Msg: "constant bigger than 64 bits detected" ); |
| 80 | return 0; |
| 81 | } |
| 82 | } |
| 83 | return Result; |
| 84 | } |
| 85 | |
| 86 | void LLLexer::HexToIntPair(const char *Buffer, const char *End, |
| 87 | uint64_t Pair[2]) { |
| 88 | Pair[0] = 0; |
| 89 | if (End - Buffer >= 16) { |
| 90 | for (int i = 0; i < 16; i++, Buffer++) { |
| 91 | assert(Buffer != End); |
| 92 | Pair[0] *= 16; |
| 93 | Pair[0] += hexDigitValue(C: *Buffer); |
| 94 | } |
| 95 | } |
| 96 | Pair[1] = 0; |
| 97 | for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { |
| 98 | Pair[1] *= 16; |
| 99 | Pair[1] += hexDigitValue(C: *Buffer); |
| 100 | } |
| 101 | if (Buffer != End) |
| 102 | LexError(Msg: "constant bigger than 128 bits detected" ); |
| 103 | } |
| 104 | |
| 105 | /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into |
| 106 | /// { low64, high16 } as usual for an APInt. |
| 107 | void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, |
| 108 | uint64_t Pair[2]) { |
| 109 | Pair[1] = 0; |
| 110 | for (int i=0; i<4 && Buffer != End; i++, Buffer++) { |
| 111 | assert(Buffer != End); |
| 112 | Pair[1] *= 16; |
| 113 | Pair[1] += hexDigitValue(C: *Buffer); |
| 114 | } |
| 115 | Pair[0] = 0; |
| 116 | for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { |
| 117 | Pair[0] *= 16; |
| 118 | Pair[0] += hexDigitValue(C: *Buffer); |
| 119 | } |
| 120 | if (Buffer != End) |
| 121 | LexError(Msg: "constant bigger than 128 bits detected" ); |
| 122 | } |
| 123 | |
| 124 | // UnEscapeLexed - Run through the specified buffer and change \xx codes to the |
| 125 | // appropriate character. |
| 126 | static void UnEscapeLexed(std::string &Str) { |
| 127 | if (Str.empty()) return; |
| 128 | |
| 129 | char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); |
| 130 | char *BOut = Buffer; |
| 131 | for (char *BIn = Buffer; BIn != EndBuffer; ) { |
| 132 | if (BIn[0] == '\\') { |
| 133 | if (BIn < EndBuffer-1 && BIn[1] == '\\') { |
| 134 | *BOut++ = '\\'; // Two \ becomes one |
| 135 | BIn += 2; |
| 136 | } else if (BIn < EndBuffer-2 && |
| 137 | isxdigit(static_cast<unsigned char>(BIn[1])) && |
| 138 | isxdigit(static_cast<unsigned char>(BIn[2]))) { |
| 139 | *BOut = hexDigitValue(C: BIn[1]) * 16 + hexDigitValue(C: BIn[2]); |
| 140 | BIn += 3; // Skip over handled chars |
| 141 | ++BOut; |
| 142 | } else { |
| 143 | *BOut++ = *BIn++; |
| 144 | } |
| 145 | } else { |
| 146 | *BOut++ = *BIn++; |
| 147 | } |
| 148 | } |
| 149 | Str.resize(n: BOut-Buffer); |
| 150 | } |
| 151 | |
| 152 | /// isLabelChar - Return true for [-a-zA-Z$._0-9]. |
| 153 | static bool isLabelChar(char C) { |
| 154 | return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' || |
| 155 | C == '.' || C == '_'; |
| 156 | } |
| 157 | |
| 158 | /// isLabelTail - Return true if this pointer points to a valid end of a label. |
| 159 | static const char *isLabelTail(const char *CurPtr) { |
| 160 | while (true) { |
| 161 | if (CurPtr[0] == ':') return CurPtr+1; |
| 162 | if (!isLabelChar(C: CurPtr[0])) return nullptr; |
| 163 | ++CurPtr; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | //===----------------------------------------------------------------------===// |
| 168 | // Lexer definition. |
| 169 | //===----------------------------------------------------------------------===// |
| 170 | |
| 171 | LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, |
| 172 | LLVMContext &C) |
| 173 | : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) { |
| 174 | CurPtr = CurBuf.begin(); |
| 175 | } |
| 176 | |
| 177 | int LLLexer::getNextChar() { |
| 178 | char CurChar = *CurPtr++; |
| 179 | switch (CurChar) { |
| 180 | default: return (unsigned char)CurChar; |
| 181 | case 0: |
| 182 | // A nul character in the stream is either the end of the current buffer or |
| 183 | // a random nul in the file. Disambiguate that here. |
| 184 | if (CurPtr-1 != CurBuf.end()) |
| 185 | return 0; // Just whitespace. |
| 186 | |
| 187 | // Otherwise, return end of file. |
| 188 | --CurPtr; // Another call to lex will return EOF again. |
| 189 | return EOF; |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | lltok::Kind LLLexer::LexToken() { |
| 194 | while (true) { |
| 195 | TokStart = CurPtr; |
| 196 | |
| 197 | int CurChar = getNextChar(); |
| 198 | switch (CurChar) { |
| 199 | default: |
| 200 | // Handle letters: [a-zA-Z_] |
| 201 | if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_') |
| 202 | return LexIdentifier(); |
| 203 | return lltok::Error; |
| 204 | case EOF: return lltok::Eof; |
| 205 | case 0: |
| 206 | case ' ': |
| 207 | case '\t': |
| 208 | case '\n': |
| 209 | case '\r': |
| 210 | // Ignore whitespace. |
| 211 | continue; |
| 212 | case '+': return LexPositive(); |
| 213 | case '@': return LexAt(); |
| 214 | case '$': return LexDollar(); |
| 215 | case '%': return LexPercent(); |
| 216 | case '"': return LexQuote(); |
| 217 | case '.': |
| 218 | if (const char *Ptr = isLabelTail(CurPtr)) { |
| 219 | CurPtr = Ptr; |
| 220 | StrVal.assign(first: TokStart, last: CurPtr-1); |
| 221 | return lltok::LabelStr; |
| 222 | } |
| 223 | if (CurPtr[0] == '.' && CurPtr[1] == '.') { |
| 224 | CurPtr += 2; |
| 225 | return lltok::dotdotdot; |
| 226 | } |
| 227 | return lltok::Error; |
| 228 | case ';': |
| 229 | SkipLineComment(); |
| 230 | continue; |
| 231 | case '!': return LexExclaim(); |
| 232 | case '^': |
| 233 | return LexCaret(); |
| 234 | case ':': |
| 235 | return lltok::colon; |
| 236 | case '#': return LexHash(); |
| 237 | case '0': case '1': case '2': case '3': case '4': |
| 238 | case '5': case '6': case '7': case '8': case '9': |
| 239 | case '-': |
| 240 | return LexDigitOrNegative(); |
| 241 | case '=': return lltok::equal; |
| 242 | case '[': return lltok::lsquare; |
| 243 | case ']': return lltok::rsquare; |
| 244 | case '{': return lltok::lbrace; |
| 245 | case '}': return lltok::rbrace; |
| 246 | case '<': return lltok::less; |
| 247 | case '>': return lltok::greater; |
| 248 | case '(': return lltok::lparen; |
| 249 | case ')': return lltok::rparen; |
| 250 | case ',': return lltok::comma; |
| 251 | case '*': return lltok::star; |
| 252 | case '|': return lltok::bar; |
| 253 | case '/': |
| 254 | if (getNextChar() != '*') |
| 255 | return lltok::Error; |
| 256 | if (SkipCComment()) |
| 257 | return lltok::Error; |
| 258 | continue; |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | void LLLexer::() { |
| 264 | while (true) { |
| 265 | if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) |
| 266 | return; |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | /// This skips C-style /**/ comments. Returns true if there |
| 271 | /// was an error. |
| 272 | bool LLLexer::() { |
| 273 | while (true) { |
| 274 | int CurChar = getNextChar(); |
| 275 | switch (CurChar) { |
| 276 | case EOF: |
| 277 | LexError(Msg: "unterminated comment" ); |
| 278 | return true; |
| 279 | case '*': |
| 280 | // End of the comment? |
| 281 | CurChar = getNextChar(); |
| 282 | if (CurChar == '/') |
| 283 | return false; |
| 284 | if (CurChar == EOF) { |
| 285 | LexError(Msg: "unterminated comment" ); |
| 286 | return true; |
| 287 | } |
| 288 | } |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | /// Lex all tokens that start with an @ character. |
| 293 | /// GlobalVar @\"[^\"]*\" |
| 294 | /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* |
| 295 | /// GlobalVarID @[0-9]+ |
| 296 | lltok::Kind LLLexer::LexAt() { |
| 297 | return LexVar(Var: lltok::GlobalVar, VarID: lltok::GlobalID); |
| 298 | } |
| 299 | |
| 300 | lltok::Kind LLLexer::LexDollar() { |
| 301 | if (const char *Ptr = isLabelTail(CurPtr: TokStart)) { |
| 302 | CurPtr = Ptr; |
| 303 | StrVal.assign(first: TokStart, last: CurPtr - 1); |
| 304 | return lltok::LabelStr; |
| 305 | } |
| 306 | |
| 307 | // Handle DollarStringConstant: $\"[^\"]*\" |
| 308 | if (CurPtr[0] == '"') { |
| 309 | ++CurPtr; |
| 310 | |
| 311 | while (true) { |
| 312 | int CurChar = getNextChar(); |
| 313 | |
| 314 | if (CurChar == EOF) { |
| 315 | LexError(Msg: "end of file in COMDAT variable name" ); |
| 316 | return lltok::Error; |
| 317 | } |
| 318 | if (CurChar == '"') { |
| 319 | StrVal.assign(first: TokStart + 2, last: CurPtr - 1); |
| 320 | UnEscapeLexed(Str&: StrVal); |
| 321 | if (StringRef(StrVal).contains(C: 0)) { |
| 322 | LexError(Msg: "NUL character is not allowed in names" ); |
| 323 | return lltok::Error; |
| 324 | } |
| 325 | return lltok::ComdatVar; |
| 326 | } |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]* |
| 331 | if (ReadVarName()) |
| 332 | return lltok::ComdatVar; |
| 333 | |
| 334 | return lltok::Error; |
| 335 | } |
| 336 | |
| 337 | /// ReadString - Read a string until the closing quote. |
| 338 | lltok::Kind LLLexer::ReadString(lltok::Kind kind) { |
| 339 | const char *Start = CurPtr; |
| 340 | while (true) { |
| 341 | int CurChar = getNextChar(); |
| 342 | |
| 343 | if (CurChar == EOF) { |
| 344 | LexError(Msg: "end of file in string constant" ); |
| 345 | return lltok::Error; |
| 346 | } |
| 347 | if (CurChar == '"') { |
| 348 | StrVal.assign(first: Start, last: CurPtr-1); |
| 349 | UnEscapeLexed(Str&: StrVal); |
| 350 | return kind; |
| 351 | } |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | /// ReadVarName - Read the rest of a token containing a variable name. |
| 356 | bool LLLexer::ReadVarName() { |
| 357 | const char *NameStart = CurPtr; |
| 358 | if (isalpha(static_cast<unsigned char>(CurPtr[0])) || |
| 359 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
| 360 | CurPtr[0] == '.' || CurPtr[0] == '_') { |
| 361 | ++CurPtr; |
| 362 | while (isalnum(static_cast<unsigned char>(CurPtr[0])) || |
| 363 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
| 364 | CurPtr[0] == '.' || CurPtr[0] == '_') |
| 365 | ++CurPtr; |
| 366 | |
| 367 | StrVal.assign(first: NameStart, last: CurPtr); |
| 368 | return true; |
| 369 | } |
| 370 | return false; |
| 371 | } |
| 372 | |
| 373 | // Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is |
| 374 | // returned, otherwise the Error token is returned. |
| 375 | lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { |
| 376 | if (!isdigit(static_cast<unsigned char>(CurPtr[0]))) |
| 377 | return lltok::Error; |
| 378 | |
| 379 | for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) |
| 380 | /*empty*/; |
| 381 | |
| 382 | uint64_t Val = atoull(Buffer: TokStart + 1, End: CurPtr); |
| 383 | if ((unsigned)Val != Val) |
| 384 | LexError(Msg: "invalid value number (too large)" ); |
| 385 | UIntVal = unsigned(Val); |
| 386 | return Token; |
| 387 | } |
| 388 | |
| 389 | lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { |
| 390 | // Handle StringConstant: \"[^\"]*\" |
| 391 | if (CurPtr[0] == '"') { |
| 392 | ++CurPtr; |
| 393 | |
| 394 | while (true) { |
| 395 | int CurChar = getNextChar(); |
| 396 | |
| 397 | if (CurChar == EOF) { |
| 398 | LexError(Msg: "end of file in global variable name" ); |
| 399 | return lltok::Error; |
| 400 | } |
| 401 | if (CurChar == '"') { |
| 402 | StrVal.assign(first: TokStart+2, last: CurPtr-1); |
| 403 | UnEscapeLexed(Str&: StrVal); |
| 404 | if (StringRef(StrVal).contains(C: 0)) { |
| 405 | LexError(Msg: "NUL character is not allowed in names" ); |
| 406 | return lltok::Error; |
| 407 | } |
| 408 | return Var; |
| 409 | } |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]* |
| 414 | if (ReadVarName()) |
| 415 | return Var; |
| 416 | |
| 417 | // Handle VarID: [0-9]+ |
| 418 | return LexUIntID(Token: VarID); |
| 419 | } |
| 420 | |
| 421 | /// Lex all tokens that start with a % character. |
| 422 | /// LocalVar ::= %\"[^\"]*\" |
| 423 | /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* |
| 424 | /// LocalVarID ::= %[0-9]+ |
| 425 | lltok::Kind LLLexer::LexPercent() { |
| 426 | return LexVar(Var: lltok::LocalVar, VarID: lltok::LocalVarID); |
| 427 | } |
| 428 | |
| 429 | /// Lex all tokens that start with a " character. |
| 430 | /// QuoteLabel "[^"]+": |
| 431 | /// StringConstant "[^"]*" |
| 432 | lltok::Kind LLLexer::LexQuote() { |
| 433 | lltok::Kind kind = ReadString(kind: lltok::StringConstant); |
| 434 | if (kind == lltok::Error || kind == lltok::Eof) |
| 435 | return kind; |
| 436 | |
| 437 | if (CurPtr[0] == ':') { |
| 438 | ++CurPtr; |
| 439 | if (StringRef(StrVal).contains(C: 0)) { |
| 440 | LexError(Msg: "NUL character is not allowed in names" ); |
| 441 | kind = lltok::Error; |
| 442 | } else { |
| 443 | kind = lltok::LabelStr; |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | return kind; |
| 448 | } |
| 449 | |
| 450 | /// Lex all tokens that start with a ! character. |
| 451 | /// !foo |
| 452 | /// ! |
| 453 | lltok::Kind LLLexer::LexExclaim() { |
| 454 | // Lex a metadata name as a MetadataVar. |
| 455 | if (isalpha(static_cast<unsigned char>(CurPtr[0])) || |
| 456 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
| 457 | CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { |
| 458 | ++CurPtr; |
| 459 | while (isalnum(static_cast<unsigned char>(CurPtr[0])) || |
| 460 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
| 461 | CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') |
| 462 | ++CurPtr; |
| 463 | |
| 464 | StrVal.assign(first: TokStart+1, last: CurPtr); // Skip ! |
| 465 | UnEscapeLexed(Str&: StrVal); |
| 466 | return lltok::MetadataVar; |
| 467 | } |
| 468 | return lltok::exclaim; |
| 469 | } |
| 470 | |
| 471 | /// Lex all tokens that start with a ^ character. |
| 472 | /// SummaryID ::= ^[0-9]+ |
| 473 | lltok::Kind LLLexer::LexCaret() { |
| 474 | // Handle SummaryID: ^[0-9]+ |
| 475 | return LexUIntID(Token: lltok::SummaryID); |
| 476 | } |
| 477 | |
| 478 | /// Lex all tokens that start with a # character. |
| 479 | /// AttrGrpID ::= #[0-9]+ |
| 480 | /// Hash ::= # |
| 481 | lltok::Kind LLLexer::LexHash() { |
| 482 | // Handle AttrGrpID: #[0-9]+ |
| 483 | if (isdigit(static_cast<unsigned char>(CurPtr[0]))) |
| 484 | return LexUIntID(Token: lltok::AttrGrpID); |
| 485 | return lltok::hash; |
| 486 | } |
| 487 | |
| 488 | /// Lex a label, integer type, keyword, or hexadecimal integer constant. |
| 489 | /// Label [-a-zA-Z$._0-9]+: |
| 490 | /// IntegerType i[0-9]+ |
| 491 | /// Keyword sdiv, float, ... |
| 492 | /// HexIntConstant [us]0x[0-9A-Fa-f]+ |
| 493 | lltok::Kind LLLexer::LexIdentifier() { |
| 494 | const char *StartChar = CurPtr; |
| 495 | const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; |
| 496 | const char *KeywordEnd = nullptr; |
| 497 | |
| 498 | for (; isLabelChar(C: *CurPtr); ++CurPtr) { |
| 499 | // If we decide this is an integer, remember the end of the sequence. |
| 500 | if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr))) |
| 501 | IntEnd = CurPtr; |
| 502 | if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) && |
| 503 | *CurPtr != '_') |
| 504 | KeywordEnd = CurPtr; |
| 505 | } |
| 506 | |
| 507 | // If we stopped due to a colon, unless we were directed to ignore it, |
| 508 | // this really is a label. |
| 509 | if (!IgnoreColonInIdentifiers && *CurPtr == ':') { |
| 510 | StrVal.assign(first: StartChar-1, last: CurPtr++); |
| 511 | return lltok::LabelStr; |
| 512 | } |
| 513 | |
| 514 | // Otherwise, this wasn't a label. If this was valid as an integer type, |
| 515 | // return it. |
| 516 | if (!IntEnd) IntEnd = CurPtr; |
| 517 | if (IntEnd != StartChar) { |
| 518 | CurPtr = IntEnd; |
| 519 | uint64_t NumBits = atoull(Buffer: StartChar, End: CurPtr); |
| 520 | if (NumBits < IntegerType::MIN_INT_BITS || |
| 521 | NumBits > IntegerType::MAX_INT_BITS) { |
| 522 | LexError(Msg: "bitwidth for integer type out of range" ); |
| 523 | return lltok::Error; |
| 524 | } |
| 525 | TyVal = IntegerType::get(C&: Context, NumBits); |
| 526 | return lltok::Type; |
| 527 | } |
| 528 | |
| 529 | // Otherwise, this was a letter sequence. See which keyword this is. |
| 530 | if (!KeywordEnd) KeywordEnd = CurPtr; |
| 531 | CurPtr = KeywordEnd; |
| 532 | --StartChar; |
| 533 | StringRef Keyword(StartChar, CurPtr - StartChar); |
| 534 | |
| 535 | #define KEYWORD(STR) \ |
| 536 | do { \ |
| 537 | if (Keyword == #STR) \ |
| 538 | return lltok::kw_##STR; \ |
| 539 | } while (false) |
| 540 | |
| 541 | KEYWORD(true); KEYWORD(false); |
| 542 | KEYWORD(declare); KEYWORD(define); |
| 543 | KEYWORD(global); KEYWORD(constant); |
| 544 | |
| 545 | KEYWORD(dso_local); |
| 546 | KEYWORD(dso_preemptable); |
| 547 | |
| 548 | KEYWORD(private); |
| 549 | KEYWORD(internal); |
| 550 | KEYWORD(available_externally); |
| 551 | KEYWORD(linkonce); |
| 552 | KEYWORD(linkonce_odr); |
| 553 | KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg". |
| 554 | KEYWORD(weak_odr); |
| 555 | KEYWORD(appending); |
| 556 | KEYWORD(dllimport); |
| 557 | KEYWORD(dllexport); |
| 558 | KEYWORD(common); |
| 559 | KEYWORD(default); |
| 560 | KEYWORD(hidden); |
| 561 | KEYWORD(protected); |
| 562 | KEYWORD(unnamed_addr); |
| 563 | KEYWORD(local_unnamed_addr); |
| 564 | KEYWORD(externally_initialized); |
| 565 | KEYWORD(extern_weak); |
| 566 | KEYWORD(external); |
| 567 | KEYWORD(thread_local); |
| 568 | KEYWORD(localdynamic); |
| 569 | KEYWORD(initialexec); |
| 570 | KEYWORD(localexec); |
| 571 | KEYWORD(zeroinitializer); |
| 572 | KEYWORD(undef); |
| 573 | KEYWORD(null); |
| 574 | KEYWORD(none); |
| 575 | KEYWORD(poison); |
| 576 | KEYWORD(to); |
| 577 | KEYWORD(caller); |
| 578 | KEYWORD(within); |
| 579 | KEYWORD(from); |
| 580 | KEYWORD(tail); |
| 581 | KEYWORD(musttail); |
| 582 | KEYWORD(notail); |
| 583 | KEYWORD(target); |
| 584 | KEYWORD(triple); |
| 585 | KEYWORD(source_filename); |
| 586 | KEYWORD(unwind); |
| 587 | KEYWORD(datalayout); |
| 588 | KEYWORD(volatile); |
| 589 | KEYWORD(atomic); |
| 590 | KEYWORD(unordered); |
| 591 | KEYWORD(monotonic); |
| 592 | KEYWORD(acquire); |
| 593 | KEYWORD(release); |
| 594 | KEYWORD(acq_rel); |
| 595 | KEYWORD(seq_cst); |
| 596 | KEYWORD(syncscope); |
| 597 | |
| 598 | KEYWORD(nnan); |
| 599 | KEYWORD(ninf); |
| 600 | KEYWORD(nsz); |
| 601 | KEYWORD(arcp); |
| 602 | KEYWORD(contract); |
| 603 | KEYWORD(reassoc); |
| 604 | KEYWORD(afn); |
| 605 | KEYWORD(fast); |
| 606 | KEYWORD(nuw); |
| 607 | KEYWORD(nsw); |
| 608 | KEYWORD(nusw); |
| 609 | KEYWORD(exact); |
| 610 | KEYWORD(disjoint); |
| 611 | KEYWORD(inbounds); |
| 612 | KEYWORD(nneg); |
| 613 | KEYWORD(samesign); |
| 614 | KEYWORD(inrange); |
| 615 | KEYWORD(addrspace); |
| 616 | KEYWORD(section); |
| 617 | KEYWORD(partition); |
| 618 | KEYWORD(code_model); |
| 619 | KEYWORD(alias); |
| 620 | KEYWORD(ifunc); |
| 621 | KEYWORD(module); |
| 622 | KEYWORD(asm); |
| 623 | KEYWORD(sideeffect); |
| 624 | KEYWORD(inteldialect); |
| 625 | KEYWORD(gc); |
| 626 | KEYWORD(prefix); |
| 627 | KEYWORD(prologue); |
| 628 | |
| 629 | KEYWORD(no_sanitize_address); |
| 630 | KEYWORD(no_sanitize_hwaddress); |
| 631 | KEYWORD(sanitize_address_dyninit); |
| 632 | |
| 633 | KEYWORD(ccc); |
| 634 | KEYWORD(fastcc); |
| 635 | KEYWORD(coldcc); |
| 636 | KEYWORD(cfguard_checkcc); |
| 637 | KEYWORD(x86_stdcallcc); |
| 638 | KEYWORD(x86_fastcallcc); |
| 639 | KEYWORD(x86_thiscallcc); |
| 640 | KEYWORD(x86_vectorcallcc); |
| 641 | KEYWORD(arm_apcscc); |
| 642 | KEYWORD(arm_aapcscc); |
| 643 | KEYWORD(arm_aapcs_vfpcc); |
| 644 | KEYWORD(aarch64_vector_pcs); |
| 645 | KEYWORD(aarch64_sve_vector_pcs); |
| 646 | KEYWORD(aarch64_sme_preservemost_from_x0); |
| 647 | KEYWORD(aarch64_sme_preservemost_from_x1); |
| 648 | KEYWORD(aarch64_sme_preservemost_from_x2); |
| 649 | KEYWORD(msp430_intrcc); |
| 650 | KEYWORD(avr_intrcc); |
| 651 | KEYWORD(avr_signalcc); |
| 652 | KEYWORD(ptx_kernel); |
| 653 | KEYWORD(ptx_device); |
| 654 | KEYWORD(spir_kernel); |
| 655 | KEYWORD(spir_func); |
| 656 | KEYWORD(intel_ocl_bicc); |
| 657 | KEYWORD(x86_64_sysvcc); |
| 658 | KEYWORD(win64cc); |
| 659 | KEYWORD(x86_regcallcc); |
| 660 | KEYWORD(swiftcc); |
| 661 | KEYWORD(swifttailcc); |
| 662 | KEYWORD(anyregcc); |
| 663 | KEYWORD(preserve_mostcc); |
| 664 | KEYWORD(preserve_allcc); |
| 665 | KEYWORD(preserve_nonecc); |
| 666 | KEYWORD(ghccc); |
| 667 | KEYWORD(x86_intrcc); |
| 668 | KEYWORD(hhvmcc); |
| 669 | KEYWORD(hhvm_ccc); |
| 670 | KEYWORD(cxx_fast_tlscc); |
| 671 | KEYWORD(amdgpu_vs); |
| 672 | KEYWORD(amdgpu_ls); |
| 673 | KEYWORD(amdgpu_hs); |
| 674 | KEYWORD(amdgpu_es); |
| 675 | KEYWORD(amdgpu_gs); |
| 676 | KEYWORD(amdgpu_ps); |
| 677 | KEYWORD(amdgpu_cs); |
| 678 | KEYWORD(amdgpu_cs_chain); |
| 679 | KEYWORD(amdgpu_cs_chain_preserve); |
| 680 | KEYWORD(amdgpu_kernel); |
| 681 | KEYWORD(amdgpu_gfx); |
| 682 | KEYWORD(tailcc); |
| 683 | KEYWORD(m68k_rtdcc); |
| 684 | KEYWORD(graalcc); |
| 685 | KEYWORD(riscv_vector_cc); |
| 686 | KEYWORD(riscv_vls_cc); |
| 687 | |
| 688 | KEYWORD(cc); |
| 689 | KEYWORD(c); |
| 690 | |
| 691 | KEYWORD(attributes); |
| 692 | KEYWORD(sync); |
| 693 | KEYWORD(async); |
| 694 | |
| 695 | #define GET_ATTR_NAMES |
| 696 | #define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \ |
| 697 | KEYWORD(DISPLAY_NAME); |
| 698 | #include "llvm/IR/Attributes.inc" |
| 699 | |
| 700 | KEYWORD(read); |
| 701 | KEYWORD(write); |
| 702 | KEYWORD(readwrite); |
| 703 | KEYWORD(argmem); |
| 704 | KEYWORD(inaccessiblemem); |
| 705 | KEYWORD(errnomem); |
| 706 | KEYWORD(argmemonly); |
| 707 | KEYWORD(inaccessiblememonly); |
| 708 | KEYWORD(inaccessiblemem_or_argmemonly); |
| 709 | KEYWORD(nocapture); |
| 710 | KEYWORD(address_is_null); |
| 711 | KEYWORD(address); |
| 712 | KEYWORD(provenance); |
| 713 | KEYWORD(read_provenance); |
| 714 | |
| 715 | // nofpclass attribute |
| 716 | KEYWORD(all); |
| 717 | KEYWORD(nan); |
| 718 | KEYWORD(snan); |
| 719 | KEYWORD(qnan); |
| 720 | KEYWORD(inf); |
| 721 | // ninf already a keyword |
| 722 | KEYWORD(pinf); |
| 723 | KEYWORD(norm); |
| 724 | KEYWORD(nnorm); |
| 725 | KEYWORD(pnorm); |
| 726 | // sub already a keyword |
| 727 | KEYWORD(nsub); |
| 728 | KEYWORD(psub); |
| 729 | KEYWORD(zero); |
| 730 | KEYWORD(nzero); |
| 731 | KEYWORD(pzero); |
| 732 | |
| 733 | KEYWORD(type); |
| 734 | KEYWORD(opaque); |
| 735 | |
| 736 | KEYWORD(comdat); |
| 737 | |
| 738 | // Comdat types |
| 739 | KEYWORD(any); |
| 740 | KEYWORD(exactmatch); |
| 741 | KEYWORD(largest); |
| 742 | KEYWORD(nodeduplicate); |
| 743 | KEYWORD(samesize); |
| 744 | |
| 745 | KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); |
| 746 | KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); |
| 747 | KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); |
| 748 | KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); |
| 749 | |
| 750 | KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); |
| 751 | KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); |
| 752 | KEYWORD(fmaximum); |
| 753 | KEYWORD(fminimum); |
| 754 | KEYWORD(uinc_wrap); |
| 755 | KEYWORD(udec_wrap); |
| 756 | KEYWORD(usub_cond); |
| 757 | KEYWORD(usub_sat); |
| 758 | |
| 759 | KEYWORD(splat); |
| 760 | KEYWORD(vscale); |
| 761 | KEYWORD(x); |
| 762 | KEYWORD(blockaddress); |
| 763 | KEYWORD(dso_local_equivalent); |
| 764 | KEYWORD(no_cfi); |
| 765 | KEYWORD(ptrauth); |
| 766 | |
| 767 | // Metadata types. |
| 768 | KEYWORD(distinct); |
| 769 | |
| 770 | // Use-list order directives. |
| 771 | KEYWORD(uselistorder); |
| 772 | KEYWORD(uselistorder_bb); |
| 773 | |
| 774 | KEYWORD(personality); |
| 775 | KEYWORD(cleanup); |
| 776 | KEYWORD(catch); |
| 777 | KEYWORD(filter); |
| 778 | |
| 779 | // Summary index keywords. |
| 780 | KEYWORD(path); |
| 781 | KEYWORD(hash); |
| 782 | KEYWORD(gv); |
| 783 | KEYWORD(guid); |
| 784 | KEYWORD(name); |
| 785 | KEYWORD(summaries); |
| 786 | KEYWORD(flags); |
| 787 | KEYWORD(blockcount); |
| 788 | KEYWORD(linkage); |
| 789 | KEYWORD(visibility); |
| 790 | KEYWORD(notEligibleToImport); |
| 791 | KEYWORD(live); |
| 792 | KEYWORD(dsoLocal); |
| 793 | KEYWORD(canAutoHide); |
| 794 | KEYWORD(importType); |
| 795 | KEYWORD(definition); |
| 796 | KEYWORD(declaration); |
| 797 | KEYWORD(function); |
| 798 | KEYWORD(insts); |
| 799 | KEYWORD(funcFlags); |
| 800 | KEYWORD(readNone); |
| 801 | KEYWORD(readOnly); |
| 802 | KEYWORD(noRecurse); |
| 803 | KEYWORD(returnDoesNotAlias); |
| 804 | KEYWORD(noInline); |
| 805 | KEYWORD(alwaysInline); |
| 806 | KEYWORD(noUnwind); |
| 807 | KEYWORD(mayThrow); |
| 808 | KEYWORD(hasUnknownCall); |
| 809 | KEYWORD(mustBeUnreachable); |
| 810 | KEYWORD(calls); |
| 811 | KEYWORD(callee); |
| 812 | KEYWORD(params); |
| 813 | KEYWORD(param); |
| 814 | KEYWORD(hotness); |
| 815 | KEYWORD(unknown); |
| 816 | KEYWORD(critical); |
| 817 | KEYWORD(relbf); |
| 818 | KEYWORD(variable); |
| 819 | KEYWORD(vTableFuncs); |
| 820 | KEYWORD(virtFunc); |
| 821 | KEYWORD(aliasee); |
| 822 | KEYWORD(refs); |
| 823 | KEYWORD(typeIdInfo); |
| 824 | KEYWORD(typeTests); |
| 825 | KEYWORD(typeTestAssumeVCalls); |
| 826 | KEYWORD(typeCheckedLoadVCalls); |
| 827 | KEYWORD(typeTestAssumeConstVCalls); |
| 828 | KEYWORD(typeCheckedLoadConstVCalls); |
| 829 | KEYWORD(vFuncId); |
| 830 | KEYWORD(offset); |
| 831 | KEYWORD(args); |
| 832 | KEYWORD(typeid); |
| 833 | KEYWORD(typeidCompatibleVTable); |
| 834 | KEYWORD(summary); |
| 835 | KEYWORD(typeTestRes); |
| 836 | KEYWORD(kind); |
| 837 | KEYWORD(unsat); |
| 838 | KEYWORD(byteArray); |
| 839 | KEYWORD(inline); |
| 840 | KEYWORD(single); |
| 841 | KEYWORD(allOnes); |
| 842 | KEYWORD(sizeM1BitWidth); |
| 843 | KEYWORD(alignLog2); |
| 844 | KEYWORD(sizeM1); |
| 845 | KEYWORD(bitMask); |
| 846 | KEYWORD(inlineBits); |
| 847 | KEYWORD(vcall_visibility); |
| 848 | KEYWORD(wpdResolutions); |
| 849 | KEYWORD(wpdRes); |
| 850 | KEYWORD(indir); |
| 851 | KEYWORD(singleImpl); |
| 852 | KEYWORD(branchFunnel); |
| 853 | KEYWORD(singleImplName); |
| 854 | KEYWORD(resByArg); |
| 855 | KEYWORD(byArg); |
| 856 | KEYWORD(uniformRetVal); |
| 857 | KEYWORD(uniqueRetVal); |
| 858 | KEYWORD(virtualConstProp); |
| 859 | KEYWORD(info); |
| 860 | KEYWORD(byte); |
| 861 | KEYWORD(bit); |
| 862 | KEYWORD(varFlags); |
| 863 | KEYWORD(callsites); |
| 864 | KEYWORD(clones); |
| 865 | KEYWORD(stackIds); |
| 866 | KEYWORD(allocs); |
| 867 | KEYWORD(versions); |
| 868 | KEYWORD(memProf); |
| 869 | KEYWORD(notcold); |
| 870 | |
| 871 | #undef KEYWORD |
| 872 | |
| 873 | // Keywords for types. |
| 874 | #define TYPEKEYWORD(STR, LLVMTY) \ |
| 875 | do { \ |
| 876 | if (Keyword == STR) { \ |
| 877 | TyVal = LLVMTY; \ |
| 878 | return lltok::Type; \ |
| 879 | } \ |
| 880 | } while (false) |
| 881 | |
| 882 | TYPEKEYWORD("void" , Type::getVoidTy(Context)); |
| 883 | TYPEKEYWORD("half" , Type::getHalfTy(Context)); |
| 884 | TYPEKEYWORD("bfloat" , Type::getBFloatTy(Context)); |
| 885 | TYPEKEYWORD("float" , Type::getFloatTy(Context)); |
| 886 | TYPEKEYWORD("double" , Type::getDoubleTy(Context)); |
| 887 | TYPEKEYWORD("x86_fp80" , Type::getX86_FP80Ty(Context)); |
| 888 | TYPEKEYWORD("fp128" , Type::getFP128Ty(Context)); |
| 889 | TYPEKEYWORD("ppc_fp128" , Type::getPPC_FP128Ty(Context)); |
| 890 | TYPEKEYWORD("label" , Type::getLabelTy(Context)); |
| 891 | TYPEKEYWORD("metadata" , Type::getMetadataTy(Context)); |
| 892 | TYPEKEYWORD("x86_amx" , Type::getX86_AMXTy(Context)); |
| 893 | TYPEKEYWORD("token" , Type::getTokenTy(Context)); |
| 894 | TYPEKEYWORD("ptr" , PointerType::getUnqual(Context)); |
| 895 | |
| 896 | #undef TYPEKEYWORD |
| 897 | |
| 898 | // Keywords for instructions. |
| 899 | #define INSTKEYWORD(STR, Enum) \ |
| 900 | do { \ |
| 901 | if (Keyword == #STR) { \ |
| 902 | UIntVal = Instruction::Enum; \ |
| 903 | return lltok::kw_##STR; \ |
| 904 | } \ |
| 905 | } while (false) |
| 906 | |
| 907 | INSTKEYWORD(fneg, FNeg); |
| 908 | |
| 909 | INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); |
| 910 | INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); |
| 911 | INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); |
| 912 | INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); |
| 913 | INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); |
| 914 | INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); |
| 915 | INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); |
| 916 | INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); |
| 917 | |
| 918 | INSTKEYWORD(phi, PHI); |
| 919 | INSTKEYWORD(call, Call); |
| 920 | INSTKEYWORD(trunc, Trunc); |
| 921 | INSTKEYWORD(zext, ZExt); |
| 922 | INSTKEYWORD(sext, SExt); |
| 923 | INSTKEYWORD(fptrunc, FPTrunc); |
| 924 | INSTKEYWORD(fpext, FPExt); |
| 925 | INSTKEYWORD(uitofp, UIToFP); |
| 926 | INSTKEYWORD(sitofp, SIToFP); |
| 927 | INSTKEYWORD(fptoui, FPToUI); |
| 928 | INSTKEYWORD(fptosi, FPToSI); |
| 929 | INSTKEYWORD(inttoptr, IntToPtr); |
| 930 | INSTKEYWORD(ptrtoint, PtrToInt); |
| 931 | INSTKEYWORD(bitcast, BitCast); |
| 932 | INSTKEYWORD(addrspacecast, AddrSpaceCast); |
| 933 | INSTKEYWORD(select, Select); |
| 934 | INSTKEYWORD(va_arg, VAArg); |
| 935 | INSTKEYWORD(ret, Ret); |
| 936 | INSTKEYWORD(br, Br); |
| 937 | INSTKEYWORD(switch, Switch); |
| 938 | INSTKEYWORD(indirectbr, IndirectBr); |
| 939 | INSTKEYWORD(invoke, Invoke); |
| 940 | INSTKEYWORD(resume, Resume); |
| 941 | INSTKEYWORD(unreachable, Unreachable); |
| 942 | INSTKEYWORD(callbr, CallBr); |
| 943 | |
| 944 | INSTKEYWORD(alloca, Alloca); |
| 945 | INSTKEYWORD(load, Load); |
| 946 | INSTKEYWORD(store, Store); |
| 947 | INSTKEYWORD(cmpxchg, AtomicCmpXchg); |
| 948 | INSTKEYWORD(atomicrmw, AtomicRMW); |
| 949 | INSTKEYWORD(fence, Fence); |
| 950 | INSTKEYWORD(getelementptr, GetElementPtr); |
| 951 | |
| 952 | INSTKEYWORD(extractelement, ExtractElement); |
| 953 | INSTKEYWORD(insertelement, InsertElement); |
| 954 | INSTKEYWORD(shufflevector, ShuffleVector); |
| 955 | INSTKEYWORD(extractvalue, ExtractValue); |
| 956 | INSTKEYWORD(insertvalue, InsertValue); |
| 957 | INSTKEYWORD(landingpad, LandingPad); |
| 958 | INSTKEYWORD(cleanupret, CleanupRet); |
| 959 | INSTKEYWORD(catchret, CatchRet); |
| 960 | INSTKEYWORD(catchswitch, CatchSwitch); |
| 961 | INSTKEYWORD(catchpad, CatchPad); |
| 962 | INSTKEYWORD(cleanuppad, CleanupPad); |
| 963 | |
| 964 | INSTKEYWORD(freeze, Freeze); |
| 965 | |
| 966 | #undef INSTKEYWORD |
| 967 | |
| 968 | #define DWKEYWORD(TYPE, TOKEN) \ |
| 969 | do { \ |
| 970 | if (Keyword.starts_with("DW_" #TYPE "_")) { \ |
| 971 | StrVal.assign(Keyword.begin(), Keyword.end()); \ |
| 972 | return lltok::TOKEN; \ |
| 973 | } \ |
| 974 | } while (false) |
| 975 | |
| 976 | DWKEYWORD(TAG, DwarfTag); |
| 977 | DWKEYWORD(ATE, DwarfAttEncoding); |
| 978 | DWKEYWORD(VIRTUALITY, DwarfVirtuality); |
| 979 | DWKEYWORD(LANG, DwarfLang); |
| 980 | DWKEYWORD(CC, DwarfCC); |
| 981 | DWKEYWORD(OP, DwarfOp); |
| 982 | DWKEYWORD(MACINFO, DwarfMacinfo); |
| 983 | DWKEYWORD(APPLE_ENUM_KIND, DwarfEnumKind); |
| 984 | |
| 985 | #undef DWKEYWORD |
| 986 | |
| 987 | // Keywords for debug record types. |
| 988 | #define DBGRECORDTYPEKEYWORD(STR) \ |
| 989 | do { \ |
| 990 | if (Keyword == "dbg_" #STR) { \ |
| 991 | StrVal = #STR; \ |
| 992 | return lltok::DbgRecordType; \ |
| 993 | } \ |
| 994 | } while (false) |
| 995 | |
| 996 | DBGRECORDTYPEKEYWORD(value); |
| 997 | DBGRECORDTYPEKEYWORD(declare); |
| 998 | DBGRECORDTYPEKEYWORD(assign); |
| 999 | DBGRECORDTYPEKEYWORD(label); |
| 1000 | #undef DBGRECORDTYPEKEYWORD |
| 1001 | |
| 1002 | if (Keyword.starts_with(Prefix: "DIFlag" )) { |
| 1003 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
| 1004 | return lltok::DIFlag; |
| 1005 | } |
| 1006 | |
| 1007 | if (Keyword.starts_with(Prefix: "DISPFlag" )) { |
| 1008 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
| 1009 | return lltok::DISPFlag; |
| 1010 | } |
| 1011 | |
| 1012 | if (Keyword.starts_with(Prefix: "CSK_" )) { |
| 1013 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
| 1014 | return lltok::ChecksumKind; |
| 1015 | } |
| 1016 | |
| 1017 | if (Keyword == "NoDebug" || Keyword == "FullDebug" || |
| 1018 | Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly" ) { |
| 1019 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
| 1020 | return lltok::EmissionKind; |
| 1021 | } |
| 1022 | |
| 1023 | if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" || |
| 1024 | Keyword == "Default" ) { |
| 1025 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
| 1026 | return lltok::NameTableKind; |
| 1027 | } |
| 1028 | |
| 1029 | if (Keyword == "Binary" || Keyword == "Decimal" || Keyword == "Rational" ) { |
| 1030 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
| 1031 | return lltok::FixedPointKind; |
| 1032 | } |
| 1033 | |
| 1034 | // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by |
| 1035 | // the CFE to avoid forcing it to deal with 64-bit numbers. |
| 1036 | if ((TokStart[0] == 'u' || TokStart[0] == 's') && |
| 1037 | TokStart[1] == '0' && TokStart[2] == 'x' && |
| 1038 | isxdigit(static_cast<unsigned char>(TokStart[3]))) { |
| 1039 | int len = CurPtr-TokStart-3; |
| 1040 | uint32_t bits = len * 4; |
| 1041 | StringRef HexStr(TokStart + 3, len); |
| 1042 | if (!all_of(Range&: HexStr, P: isxdigit)) { |
| 1043 | // Bad token, return it as an error. |
| 1044 | CurPtr = TokStart+3; |
| 1045 | return lltok::Error; |
| 1046 | } |
| 1047 | APInt Tmp(bits, HexStr, 16); |
| 1048 | uint32_t activeBits = Tmp.getActiveBits(); |
| 1049 | if (activeBits > 0 && activeBits < bits) |
| 1050 | Tmp = Tmp.trunc(width: activeBits); |
| 1051 | APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); |
| 1052 | return lltok::APSInt; |
| 1053 | } |
| 1054 | |
| 1055 | // If this is "cc1234", return this as just "cc". |
| 1056 | if (TokStart[0] == 'c' && TokStart[1] == 'c') { |
| 1057 | CurPtr = TokStart+2; |
| 1058 | return lltok::kw_cc; |
| 1059 | } |
| 1060 | |
| 1061 | // Finally, if this isn't known, return an error. |
| 1062 | CurPtr = TokStart+1; |
| 1063 | return lltok::Error; |
| 1064 | } |
| 1065 | |
| 1066 | /// Lex all tokens that start with a 0x prefix, knowing they match and are not |
| 1067 | /// labels. |
| 1068 | /// HexFPConstant 0x[0-9A-Fa-f]+ |
| 1069 | /// HexFP80Constant 0xK[0-9A-Fa-f]+ |
| 1070 | /// HexFP128Constant 0xL[0-9A-Fa-f]+ |
| 1071 | /// HexPPC128Constant 0xM[0-9A-Fa-f]+ |
| 1072 | /// HexHalfConstant 0xH[0-9A-Fa-f]+ |
| 1073 | /// HexBFloatConstant 0xR[0-9A-Fa-f]+ |
| 1074 | lltok::Kind LLLexer::Lex0x() { |
| 1075 | CurPtr = TokStart + 2; |
| 1076 | |
| 1077 | char Kind; |
| 1078 | if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || |
| 1079 | CurPtr[0] == 'R') { |
| 1080 | Kind = *CurPtr++; |
| 1081 | } else { |
| 1082 | Kind = 'J'; |
| 1083 | } |
| 1084 | |
| 1085 | if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) { |
| 1086 | // Bad token, return it as an error. |
| 1087 | CurPtr = TokStart+1; |
| 1088 | return lltok::Error; |
| 1089 | } |
| 1090 | |
| 1091 | while (isxdigit(static_cast<unsigned char>(CurPtr[0]))) |
| 1092 | ++CurPtr; |
| 1093 | |
| 1094 | if (Kind == 'J') { |
| 1095 | // HexFPConstant - Floating point constant represented in IEEE format as a |
| 1096 | // hexadecimal number for when exponential notation is not precise enough. |
| 1097 | // Half, BFloat, Float, and double only. |
| 1098 | APFloatVal = APFloat(APFloat::IEEEdouble(), |
| 1099 | APInt(64, HexIntToVal(Buffer: TokStart + 2, End: CurPtr))); |
| 1100 | return lltok::APFloat; |
| 1101 | } |
| 1102 | |
| 1103 | uint64_t Pair[2]; |
| 1104 | switch (Kind) { |
| 1105 | default: llvm_unreachable("Unknown kind!" ); |
| 1106 | case 'K': |
| 1107 | // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) |
| 1108 | FP80HexToIntPair(Buffer: TokStart+3, End: CurPtr, Pair); |
| 1109 | APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair)); |
| 1110 | return lltok::APFloat; |
| 1111 | case 'L': |
| 1112 | // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) |
| 1113 | HexToIntPair(Buffer: TokStart+3, End: CurPtr, Pair); |
| 1114 | APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair)); |
| 1115 | return lltok::APFloat; |
| 1116 | case 'M': |
| 1117 | // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) |
| 1118 | HexToIntPair(Buffer: TokStart+3, End: CurPtr, Pair); |
| 1119 | APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair)); |
| 1120 | return lltok::APFloat; |
| 1121 | case 'H': |
| 1122 | APFloatVal = APFloat(APFloat::IEEEhalf(), |
| 1123 | APInt(16,HexIntToVal(Buffer: TokStart+3, End: CurPtr))); |
| 1124 | return lltok::APFloat; |
| 1125 | case 'R': |
| 1126 | // Brain floating point |
| 1127 | APFloatVal = APFloat(APFloat::BFloat(), |
| 1128 | APInt(16, HexIntToVal(Buffer: TokStart + 3, End: CurPtr))); |
| 1129 | return lltok::APFloat; |
| 1130 | } |
| 1131 | } |
| 1132 | |
| 1133 | /// Lex tokens for a label or a numeric constant, possibly starting with -. |
| 1134 | /// Label [-a-zA-Z$._0-9]+: |
| 1135 | /// NInteger -[0-9]+ |
| 1136 | /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? |
| 1137 | /// PInteger [0-9]+ |
| 1138 | /// HexFPConstant 0x[0-9A-Fa-f]+ |
| 1139 | /// HexFP80Constant 0xK[0-9A-Fa-f]+ |
| 1140 | /// HexFP128Constant 0xL[0-9A-Fa-f]+ |
| 1141 | /// HexPPC128Constant 0xM[0-9A-Fa-f]+ |
| 1142 | lltok::Kind LLLexer::LexDigitOrNegative() { |
| 1143 | // If the letter after the negative is not a number, this is probably a label. |
| 1144 | if (!isdigit(static_cast<unsigned char>(TokStart[0])) && |
| 1145 | !isdigit(static_cast<unsigned char>(CurPtr[0]))) { |
| 1146 | // Okay, this is not a number after the -, it's probably a label. |
| 1147 | if (const char *End = isLabelTail(CurPtr)) { |
| 1148 | StrVal.assign(first: TokStart, last: End-1); |
| 1149 | CurPtr = End; |
| 1150 | return lltok::LabelStr; |
| 1151 | } |
| 1152 | |
| 1153 | return lltok::Error; |
| 1154 | } |
| 1155 | |
| 1156 | // At this point, it is either a label, int or fp constant. |
| 1157 | |
| 1158 | // Skip digits, we have at least one. |
| 1159 | for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) |
| 1160 | /*empty*/; |
| 1161 | |
| 1162 | // Check if this is a fully-numeric label: |
| 1163 | if (isdigit(TokStart[0]) && CurPtr[0] == ':') { |
| 1164 | uint64_t Val = atoull(Buffer: TokStart, End: CurPtr); |
| 1165 | ++CurPtr; // Skip the colon. |
| 1166 | if ((unsigned)Val != Val) |
| 1167 | LexError(Msg: "invalid value number (too large)" ); |
| 1168 | UIntVal = unsigned(Val); |
| 1169 | return lltok::LabelID; |
| 1170 | } |
| 1171 | |
| 1172 | // Check to see if this really is a string label, e.g. "-1:". |
| 1173 | if (isLabelChar(C: CurPtr[0]) || CurPtr[0] == ':') { |
| 1174 | if (const char *End = isLabelTail(CurPtr)) { |
| 1175 | StrVal.assign(first: TokStart, last: End-1); |
| 1176 | CurPtr = End; |
| 1177 | return lltok::LabelStr; |
| 1178 | } |
| 1179 | } |
| 1180 | |
| 1181 | // If the next character is a '.', then it is a fp value, otherwise its |
| 1182 | // integer. |
| 1183 | if (CurPtr[0] != '.') { |
| 1184 | if (TokStart[0] == '0' && TokStart[1] == 'x') |
| 1185 | return Lex0x(); |
| 1186 | APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart)); |
| 1187 | return lltok::APSInt; |
| 1188 | } |
| 1189 | |
| 1190 | ++CurPtr; |
| 1191 | |
| 1192 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
| 1193 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
| 1194 | |
| 1195 | if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { |
| 1196 | if (isdigit(static_cast<unsigned char>(CurPtr[1])) || |
| 1197 | ((CurPtr[1] == '-' || CurPtr[1] == '+') && |
| 1198 | isdigit(static_cast<unsigned char>(CurPtr[2])))) { |
| 1199 | CurPtr += 2; |
| 1200 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
| 1201 | } |
| 1202 | } |
| 1203 | |
| 1204 | APFloatVal = APFloat(APFloat::IEEEdouble(), |
| 1205 | StringRef(TokStart, CurPtr - TokStart)); |
| 1206 | return lltok::APFloat; |
| 1207 | } |
| 1208 | |
| 1209 | /// Lex a floating point constant starting with +. |
| 1210 | /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? |
| 1211 | lltok::Kind LLLexer::LexPositive() { |
| 1212 | // If the letter after the negative is a number, this is probably not a |
| 1213 | // label. |
| 1214 | if (!isdigit(static_cast<unsigned char>(CurPtr[0]))) |
| 1215 | return lltok::Error; |
| 1216 | |
| 1217 | // Skip digits. |
| 1218 | for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) |
| 1219 | /*empty*/; |
| 1220 | |
| 1221 | // At this point, we need a '.'. |
| 1222 | if (CurPtr[0] != '.') { |
| 1223 | CurPtr = TokStart+1; |
| 1224 | return lltok::Error; |
| 1225 | } |
| 1226 | |
| 1227 | ++CurPtr; |
| 1228 | |
| 1229 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
| 1230 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
| 1231 | |
| 1232 | if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { |
| 1233 | if (isdigit(static_cast<unsigned char>(CurPtr[1])) || |
| 1234 | ((CurPtr[1] == '-' || CurPtr[1] == '+') && |
| 1235 | isdigit(static_cast<unsigned char>(CurPtr[2])))) { |
| 1236 | CurPtr += 2; |
| 1237 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
| 1238 | } |
| 1239 | } |
| 1240 | |
| 1241 | APFloatVal = APFloat(APFloat::IEEEdouble(), |
| 1242 | StringRef(TokStart, CurPtr - TokStart)); |
| 1243 | return lltok::APFloat; |
| 1244 | } |
| 1245 | |