| 1 | //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This class implements the lexer for assembly files. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/MC/MCParser/AsmLexer.h" |
| 14 | #include "llvm/ADT/APInt.h" |
| 15 | #include "llvm/ADT/ArrayRef.h" |
| 16 | #include "llvm/ADT/StringExtras.h" |
| 17 | #include "llvm/ADT/StringRef.h" |
| 18 | #include "llvm/MC/MCAsmInfo.h" |
| 19 | #include "llvm/Support/Compiler.h" |
| 20 | #include "llvm/Support/SMLoc.h" |
| 21 | #include "llvm/Support/SaveAndRestore.h" |
| 22 | #include "llvm/Support/raw_ostream.h" |
| 23 | #include <cassert> |
| 24 | #include <cctype> |
| 25 | #include <cstdio> |
| 26 | #include <cstring> |
| 27 | #include <string> |
| 28 | |
| 29 | using namespace llvm; |
| 30 | |
| 31 | SMLoc AsmToken::getLoc() const { return SMLoc::getFromPointer(Ptr: Str.data()); } |
| 32 | |
| 33 | SMLoc AsmToken::getEndLoc() const { |
| 34 | return SMLoc::getFromPointer(Ptr: Str.data() + Str.size()); |
| 35 | } |
| 36 | |
| 37 | SMRange AsmToken::getLocRange() const { return SMRange(getLoc(), getEndLoc()); } |
| 38 | |
| 39 | void AsmToken::dump(raw_ostream &OS) const { |
| 40 | switch (Kind) { |
| 41 | case AsmToken::Error: |
| 42 | OS << "error" ; |
| 43 | break; |
| 44 | case AsmToken::Identifier: |
| 45 | OS << "identifier: " << getString(); |
| 46 | break; |
| 47 | case AsmToken::Integer: |
| 48 | OS << "int: " << getString(); |
| 49 | break; |
| 50 | case AsmToken::Real: |
| 51 | OS << "real: " << getString(); |
| 52 | break; |
| 53 | case AsmToken::String: |
| 54 | OS << "string: " << getString(); |
| 55 | break; |
| 56 | |
| 57 | // clang-format off |
| 58 | case AsmToken::Amp: OS << "Amp" ; break; |
| 59 | case AsmToken::AmpAmp: OS << "AmpAmp" ; break; |
| 60 | case AsmToken::At: OS << "At" ; break; |
| 61 | case AsmToken::BackSlash: OS << "BackSlash" ; break; |
| 62 | case AsmToken::BigNum: OS << "BigNum" ; break; |
| 63 | case AsmToken::Caret: OS << "Caret" ; break; |
| 64 | case AsmToken::Colon: OS << "Colon" ; break; |
| 65 | case AsmToken::Comma: OS << "Comma" ; break; |
| 66 | case AsmToken::Comment: OS << "Comment" ; break; |
| 67 | case AsmToken::Dollar: OS << "Dollar" ; break; |
| 68 | case AsmToken::Dot: OS << "Dot" ; break; |
| 69 | case AsmToken::EndOfStatement: OS << "EndOfStatement" ; break; |
| 70 | case AsmToken::Eof: OS << "Eof" ; break; |
| 71 | case AsmToken::Equal: OS << "Equal" ; break; |
| 72 | case AsmToken::EqualEqual: OS << "EqualEqual" ; break; |
| 73 | case AsmToken::Exclaim: OS << "Exclaim" ; break; |
| 74 | case AsmToken::ExclaimEqual: OS << "ExclaimEqual" ; break; |
| 75 | case AsmToken::Greater: OS << "Greater" ; break; |
| 76 | case AsmToken::GreaterEqual: OS << "GreaterEqual" ; break; |
| 77 | case AsmToken::GreaterGreater: OS << "GreaterGreater" ; break; |
| 78 | case AsmToken::Hash: OS << "Hash" ; break; |
| 79 | case AsmToken::HashDirective: OS << "HashDirective" ; break; |
| 80 | case AsmToken::LBrac: OS << "LBrac" ; break; |
| 81 | case AsmToken::LCurly: OS << "LCurly" ; break; |
| 82 | case AsmToken::LParen: OS << "LParen" ; break; |
| 83 | case AsmToken::Less: OS << "Less" ; break; |
| 84 | case AsmToken::LessEqual: OS << "LessEqual" ; break; |
| 85 | case AsmToken::LessGreater: OS << "LessGreater" ; break; |
| 86 | case AsmToken::LessLess: OS << "LessLess" ; break; |
| 87 | case AsmToken::Minus: OS << "Minus" ; break; |
| 88 | case AsmToken::MinusGreater: OS << "MinusGreater" ; break; |
| 89 | case AsmToken::Percent: OS << "Percent" ; break; |
| 90 | case AsmToken::Pipe: OS << "Pipe" ; break; |
| 91 | case AsmToken::PipePipe: OS << "PipePipe" ; break; |
| 92 | case AsmToken::Plus: OS << "Plus" ; break; |
| 93 | case AsmToken::Question: OS << "Question" ; break; |
| 94 | case AsmToken::RBrac: OS << "RBrac" ; break; |
| 95 | case AsmToken::RCurly: OS << "RCurly" ; break; |
| 96 | case AsmToken::RParen: OS << "RParen" ; break; |
| 97 | case AsmToken::Slash: OS << "Slash" ; break; |
| 98 | case AsmToken::Space: OS << "Space" ; break; |
| 99 | case AsmToken::Star: OS << "Star" ; break; |
| 100 | case AsmToken::Tilde: OS << "Tilde" ; break; |
| 101 | // clang-format on |
| 102 | } |
| 103 | |
| 104 | // Print the token string. |
| 105 | OS << " (\"" ; |
| 106 | OS.write_escaped(Str: getString()); |
| 107 | OS << "\")" ; |
| 108 | } |
| 109 | |
| 110 | AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { |
| 111 | // For COFF targets, this is true, while for ELF targets, it should be false. |
| 112 | // Currently, @specifier parsing depends on '@' being included in the token. |
| 113 | AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with(Prefix: "@" ) && |
| 114 | MAI.useAtForSpecifier(); |
| 115 | LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers(); |
| 116 | |
| 117 | CurTok.emplace_back(Args: AsmToken::Space, Args: StringRef()); |
| 118 | } |
| 119 | |
| 120 | void AsmLexer::setBuffer(StringRef Buf, const char *ptr, |
| 121 | bool EndStatementAtEOF) { |
| 122 | // Buffer must be NULL-terminated. NULL terminator must reside at `Buf.end()`. |
| 123 | // It must be safe to dereference `Buf.end()`. |
| 124 | assert(*Buf.end() == '\0' && |
| 125 | "Buffer provided to AsmLexer lacks null terminator." ); |
| 126 | |
| 127 | CurBuf = Buf; |
| 128 | |
| 129 | if (ptr) |
| 130 | CurPtr = ptr; |
| 131 | else |
| 132 | CurPtr = CurBuf.begin(); |
| 133 | |
| 134 | TokStart = nullptr; |
| 135 | this->EndStatementAtEOF = EndStatementAtEOF; |
| 136 | } |
| 137 | |
| 138 | /// ReturnError - Set the error to the specified string at the specified |
| 139 | /// location. This is defined to always return AsmToken::Error. |
| 140 | AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { |
| 141 | SetError(errLoc: SMLoc::getFromPointer(Ptr: Loc), err: Msg); |
| 142 | |
| 143 | return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); |
| 144 | } |
| 145 | |
| 146 | int AsmLexer::getNextChar() { |
| 147 | if (CurPtr == CurBuf.end()) |
| 148 | return EOF; |
| 149 | return (unsigned char)*CurPtr++; |
| 150 | } |
| 151 | |
| 152 | int AsmLexer::peekNextChar() { |
| 153 | if (CurPtr == CurBuf.end()) |
| 154 | return EOF; |
| 155 | return (unsigned char)*CurPtr; |
| 156 | } |
| 157 | |
| 158 | /// The leading integral digit sequence and dot should have already been |
| 159 | /// consumed, some or all of the fractional digit sequence *can* have been |
| 160 | /// consumed. |
| 161 | AsmToken AsmLexer::LexFloatLiteral() { |
| 162 | // Skip the fractional digit sequence. |
| 163 | while (isDigit(C: *CurPtr)) |
| 164 | ++CurPtr; |
| 165 | |
| 166 | if (*CurPtr == '-' || *CurPtr == '+') |
| 167 | return ReturnError(Loc: CurPtr, Msg: "invalid sign in float literal" ); |
| 168 | |
| 169 | // Check for exponent |
| 170 | if ((*CurPtr == 'e' || *CurPtr == 'E')) { |
| 171 | ++CurPtr; |
| 172 | |
| 173 | if (*CurPtr == '-' || *CurPtr == '+') |
| 174 | ++CurPtr; |
| 175 | |
| 176 | while (isDigit(C: *CurPtr)) |
| 177 | ++CurPtr; |
| 178 | } |
| 179 | |
| 180 | return AsmToken(AsmToken::Real, |
| 181 | StringRef(TokStart, CurPtr - TokStart)); |
| 182 | } |
| 183 | |
| 184 | /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ |
| 185 | /// while making sure there are enough actual digits around for the constant to |
| 186 | /// be valid. |
| 187 | /// |
| 188 | /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed |
| 189 | /// before we get here. |
| 190 | AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { |
| 191 | assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && |
| 192 | "unexpected parse state in floating hex" ); |
| 193 | bool NoFracDigits = true; |
| 194 | |
| 195 | // Skip the fractional part if there is one |
| 196 | if (*CurPtr == '.') { |
| 197 | ++CurPtr; |
| 198 | |
| 199 | const char *FracStart = CurPtr; |
| 200 | while (isHexDigit(C: *CurPtr)) |
| 201 | ++CurPtr; |
| 202 | |
| 203 | NoFracDigits = CurPtr == FracStart; |
| 204 | } |
| 205 | |
| 206 | if (NoIntDigits && NoFracDigits) |
| 207 | return ReturnError(Loc: TokStart, Msg: "invalid hexadecimal floating-point constant: " |
| 208 | "expected at least one significand digit" ); |
| 209 | |
| 210 | // Make sure we do have some kind of proper exponent part |
| 211 | if (*CurPtr != 'p' && *CurPtr != 'P') |
| 212 | return ReturnError(Loc: TokStart, Msg: "invalid hexadecimal floating-point constant: " |
| 213 | "expected exponent part 'p'" ); |
| 214 | ++CurPtr; |
| 215 | |
| 216 | if (*CurPtr == '+' || *CurPtr == '-') |
| 217 | ++CurPtr; |
| 218 | |
| 219 | // N.b. exponent digits are *not* hex |
| 220 | const char *ExpStart = CurPtr; |
| 221 | while (isDigit(C: *CurPtr)) |
| 222 | ++CurPtr; |
| 223 | |
| 224 | if (CurPtr == ExpStart) |
| 225 | return ReturnError(Loc: TokStart, Msg: "invalid hexadecimal floating-point constant: " |
| 226 | "expected at least one exponent digit" ); |
| 227 | |
| 228 | return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); |
| 229 | } |
| 230 | |
| 231 | /// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* |
| 232 | static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) { |
| 233 | return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' || |
| 234 | (AllowAt && C == '@') || (AllowHash && C == '#'); |
| 235 | } |
| 236 | |
| 237 | AsmToken AsmLexer::LexIdentifier() { |
| 238 | // Check for floating point literals. |
| 239 | if (CurPtr[-1] == '.' && isDigit(C: *CurPtr)) { |
| 240 | // Disambiguate a .1243foo identifier from a floating literal. |
| 241 | while (isDigit(C: *CurPtr)) |
| 242 | ++CurPtr; |
| 243 | |
| 244 | if (!isIdentifierChar(C: *CurPtr, AllowAt: AllowAtInIdentifier, |
| 245 | AllowHash: AllowHashInIdentifier) || |
| 246 | *CurPtr == 'e' || *CurPtr == 'E') |
| 247 | return LexFloatLiteral(); |
| 248 | } |
| 249 | |
| 250 | while (isIdentifierChar(C: *CurPtr, AllowAt: AllowAtInIdentifier, AllowHash: AllowHashInIdentifier)) |
| 251 | ++CurPtr; |
| 252 | |
| 253 | // Handle . as a special case. |
| 254 | if (CurPtr == TokStart+1 && TokStart[0] == '.') |
| 255 | return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); |
| 256 | |
| 257 | return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); |
| 258 | } |
| 259 | |
| 260 | /// LexSlash: Slash: / |
| 261 | /// C-Style Comment: /* ... */ |
| 262 | /// C-style Comment: // ... |
| 263 | AsmToken AsmLexer::LexSlash() { |
| 264 | if (!MAI.shouldAllowAdditionalComments()) { |
| 265 | IsAtStartOfStatement = false; |
| 266 | return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); |
| 267 | } |
| 268 | |
| 269 | switch (*CurPtr) { |
| 270 | case '*': |
| 271 | IsAtStartOfStatement = false; |
| 272 | break; // C style comment. |
| 273 | case '/': |
| 274 | ++CurPtr; |
| 275 | return LexLineComment(); |
| 276 | default: |
| 277 | IsAtStartOfStatement = false; |
| 278 | return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); |
| 279 | } |
| 280 | |
| 281 | // C Style comment. |
| 282 | ++CurPtr; // skip the star. |
| 283 | const char * = CurPtr; |
| 284 | while (CurPtr != CurBuf.end()) { |
| 285 | switch (*CurPtr++) { |
| 286 | case '*': |
| 287 | // End of the comment? |
| 288 | if (*CurPtr != '/') |
| 289 | break; |
| 290 | // If we have a CommentConsumer, notify it about the comment. |
| 291 | if (CommentConsumer) { |
| 292 | CommentConsumer->HandleComment( |
| 293 | Loc: SMLoc::getFromPointer(Ptr: CommentTextStart), |
| 294 | CommentText: StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); |
| 295 | } |
| 296 | ++CurPtr; // End the */. |
| 297 | return AsmToken(AsmToken::Comment, |
| 298 | StringRef(TokStart, CurPtr - TokStart)); |
| 299 | } |
| 300 | } |
| 301 | return ReturnError(Loc: TokStart, Msg: "unterminated comment" ); |
| 302 | } |
| 303 | |
| 304 | /// LexLineComment: Comment: #[^\n]* |
| 305 | /// : //[^\n]* |
| 306 | AsmToken AsmLexer::() { |
| 307 | // Mark This as an end of statement with a body of the |
| 308 | // comment. While it would be nicer to leave this two tokens, |
| 309 | // backwards compatability with TargetParsers makes keeping this in this form |
| 310 | // better. |
| 311 | const char * = CurPtr; |
| 312 | int CurChar = getNextChar(); |
| 313 | while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) |
| 314 | CurChar = getNextChar(); |
| 315 | const char *NewlinePtr = CurPtr; |
| 316 | if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n') |
| 317 | ++CurPtr; |
| 318 | |
| 319 | // If we have a CommentConsumer, notify it about the comment. |
| 320 | if (CommentConsumer) { |
| 321 | CommentConsumer->HandleComment( |
| 322 | Loc: SMLoc::getFromPointer(Ptr: CommentTextStart), |
| 323 | CommentText: StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart)); |
| 324 | } |
| 325 | |
| 326 | IsAtStartOfLine = true; |
| 327 | // This is a whole line comment. leave newline |
| 328 | if (IsAtStartOfStatement) |
| 329 | return AsmToken(AsmToken::EndOfStatement, |
| 330 | StringRef(TokStart, CurPtr - TokStart)); |
| 331 | IsAtStartOfStatement = true; |
| 332 | |
| 333 | return AsmToken(AsmToken::EndOfStatement, |
| 334 | StringRef(TokStart, CurPtr - 1 - TokStart)); |
| 335 | } |
| 336 | |
| 337 | static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { |
| 338 | // Skip case-insensitive ULL, UL, U, L and LL suffixes. |
| 339 | if (CurPtr[0] == 'U' || CurPtr[0] == 'u') |
| 340 | ++CurPtr; |
| 341 | if (CurPtr[0] == 'L' || CurPtr[0] == 'l') |
| 342 | ++CurPtr; |
| 343 | if (CurPtr[0] == 'L' || CurPtr[0] == 'l') |
| 344 | ++CurPtr; |
| 345 | } |
| 346 | |
| 347 | // Look ahead to search for first non-hex digit, if it's [hH], then we treat the |
| 348 | // integer as a hexadecimal, possibly with leading zeroes. |
| 349 | static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, |
| 350 | bool LexHex) { |
| 351 | const char *FirstNonDec = nullptr; |
| 352 | const char *LookAhead = CurPtr; |
| 353 | while (true) { |
| 354 | if (isDigit(C: *LookAhead)) { |
| 355 | ++LookAhead; |
| 356 | } else { |
| 357 | if (!FirstNonDec) |
| 358 | FirstNonDec = LookAhead; |
| 359 | |
| 360 | // Keep going if we are looking for a 'h' suffix. |
| 361 | if (LexHex && isHexDigit(C: *LookAhead)) |
| 362 | ++LookAhead; |
| 363 | else |
| 364 | break; |
| 365 | } |
| 366 | } |
| 367 | bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H'); |
| 368 | CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec; |
| 369 | if (isHex) |
| 370 | return 16; |
| 371 | return DefaultRadix; |
| 372 | } |
| 373 | |
| 374 | static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) { |
| 375 | while (hexDigitValue(C: *CurPtr) < DefaultRadix) { |
| 376 | ++CurPtr; |
| 377 | } |
| 378 | return CurPtr; |
| 379 | } |
| 380 | |
| 381 | static AsmToken intToken(StringRef Ref, APInt &Value) { |
| 382 | if (Value.isIntN(N: 64)) |
| 383 | return AsmToken(AsmToken::Integer, Ref, Value); |
| 384 | return AsmToken(AsmToken::BigNum, Ref, Value); |
| 385 | } |
| 386 | |
| 387 | static std::string radixName(unsigned Radix) { |
| 388 | switch (Radix) { |
| 389 | case 2: |
| 390 | return "binary" ; |
| 391 | case 8: |
| 392 | return "octal" ; |
| 393 | case 10: |
| 394 | return "decimal" ; |
| 395 | case 16: |
| 396 | return "hexadecimal" ; |
| 397 | default: |
| 398 | return "base-" + std::to_string(val: Radix); |
| 399 | } |
| 400 | } |
| 401 | |
| 402 | /// LexDigit: First character is [0-9]. |
| 403 | /// Local Label: [0-9][:] |
| 404 | /// Forward/Backward Label: [0-9][fb] |
| 405 | /// Binary integer: 0b[01]+ |
| 406 | /// Octal integer: 0[0-7]+ |
| 407 | /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] |
| 408 | /// Decimal integer: [1-9][0-9]* |
| 409 | AsmToken AsmLexer::LexDigit() { |
| 410 | // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY]) |
| 411 | // MASM-flavor octal integer: [0-7]+[oOqQ] |
| 412 | // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT]) |
| 413 | // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] |
| 414 | if (LexMasmIntegers && isdigit(CurPtr[-1])) { |
| 415 | const char *FirstNonBinary = |
| 416 | (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; |
| 417 | const char *FirstNonDecimal = |
| 418 | (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr; |
| 419 | const char *OldCurPtr = CurPtr; |
| 420 | while (isHexDigit(C: *CurPtr)) { |
| 421 | switch (*CurPtr) { |
| 422 | default: |
| 423 | if (!FirstNonDecimal) { |
| 424 | FirstNonDecimal = CurPtr; |
| 425 | } |
| 426 | [[fallthrough]]; |
| 427 | case '9': |
| 428 | case '8': |
| 429 | case '7': |
| 430 | case '6': |
| 431 | case '5': |
| 432 | case '4': |
| 433 | case '3': |
| 434 | case '2': |
| 435 | if (!FirstNonBinary) { |
| 436 | FirstNonBinary = CurPtr; |
| 437 | } |
| 438 | break; |
| 439 | case '1': |
| 440 | case '0': |
| 441 | break; |
| 442 | } |
| 443 | ++CurPtr; |
| 444 | } |
| 445 | if (*CurPtr == '.') { |
| 446 | // MASM float literals (other than hex floats) always contain a ".", and |
| 447 | // are always written in decimal. |
| 448 | ++CurPtr; |
| 449 | return LexFloatLiteral(); |
| 450 | } |
| 451 | |
| 452 | if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) { |
| 453 | ++CurPtr; |
| 454 | return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); |
| 455 | } |
| 456 | |
| 457 | unsigned Radix = 0; |
| 458 | if (*CurPtr == 'h' || *CurPtr == 'H') { |
| 459 | // hexadecimal number |
| 460 | ++CurPtr; |
| 461 | Radix = 16; |
| 462 | } else if (*CurPtr == 't' || *CurPtr == 'T') { |
| 463 | // decimal number |
| 464 | ++CurPtr; |
| 465 | Radix = 10; |
| 466 | } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' || |
| 467 | *CurPtr == 'Q') { |
| 468 | // octal number |
| 469 | ++CurPtr; |
| 470 | Radix = 8; |
| 471 | } else if (*CurPtr == 'y' || *CurPtr == 'Y') { |
| 472 | // binary number |
| 473 | ++CurPtr; |
| 474 | Radix = 2; |
| 475 | } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr && |
| 476 | DefaultRadix < 14 && |
| 477 | (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) { |
| 478 | Radix = 10; |
| 479 | } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && |
| 480 | DefaultRadix < 12 && |
| 481 | (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) { |
| 482 | Radix = 2; |
| 483 | } |
| 484 | |
| 485 | if (Radix) { |
| 486 | StringRef Result(TokStart, CurPtr - TokStart); |
| 487 | APInt Value(128, 0, true); |
| 488 | |
| 489 | if (Result.drop_back().getAsInteger(Radix, Result&: Value)) |
| 490 | return ReturnError(Loc: TokStart, Msg: "invalid " + radixName(Radix) + " number" ); |
| 491 | |
| 492 | // MSVC accepts and ignores type suffices on integer literals. |
| 493 | SkipIgnoredIntegerSuffix(CurPtr); |
| 494 | |
| 495 | return intToken(Ref: Result, Value); |
| 496 | } |
| 497 | |
| 498 | // default-radix integers, or floating point numbers, fall through |
| 499 | CurPtr = OldCurPtr; |
| 500 | } |
| 501 | |
| 502 | // MASM default-radix integers: [0-9a-fA-F]+ |
| 503 | // (All other integer literals have a radix specifier.) |
| 504 | if (LexMasmIntegers && UseMasmDefaultRadix) { |
| 505 | CurPtr = findLastDigit(CurPtr, DefaultRadix: 16); |
| 506 | StringRef Result(TokStart, CurPtr - TokStart); |
| 507 | |
| 508 | APInt Value(128, 0, true); |
| 509 | if (Result.getAsInteger(Radix: DefaultRadix, Result&: Value)) { |
| 510 | return ReturnError(Loc: TokStart, |
| 511 | Msg: "invalid " + radixName(Radix: DefaultRadix) + " number" ); |
| 512 | } |
| 513 | |
| 514 | return intToken(Ref: Result, Value); |
| 515 | } |
| 516 | |
| 517 | // Motorola hex integers: $[0-9a-fA-F]+ |
| 518 | if (LexMotorolaIntegers && CurPtr[-1] == '$') { |
| 519 | const char *NumStart = CurPtr; |
| 520 | while (isHexDigit(C: CurPtr[0])) |
| 521 | ++CurPtr; |
| 522 | |
| 523 | APInt Result(128, 0); |
| 524 | if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(Radix: 16, Result)) |
| 525 | return ReturnError(Loc: TokStart, Msg: "invalid hexadecimal number" ); |
| 526 | |
| 527 | return intToken(Ref: StringRef(TokStart, CurPtr - TokStart), Value&: Result); |
| 528 | } |
| 529 | |
| 530 | // Motorola binary integers: %[01]+ |
| 531 | if (LexMotorolaIntegers && CurPtr[-1] == '%') { |
| 532 | const char *NumStart = CurPtr; |
| 533 | while (*CurPtr == '0' || *CurPtr == '1') |
| 534 | ++CurPtr; |
| 535 | |
| 536 | APInt Result(128, 0); |
| 537 | if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(Radix: 2, Result)) |
| 538 | return ReturnError(Loc: TokStart, Msg: "invalid binary number" ); |
| 539 | |
| 540 | return intToken(Ref: StringRef(TokStart, CurPtr - TokStart), Value&: Result); |
| 541 | } |
| 542 | |
| 543 | // Decimal integer: [1-9][0-9]* |
| 544 | // HLASM-flavour decimal integer: [0-9][0-9]* |
| 545 | // FIXME: Later on, support for fb for HLASM has to be added in |
| 546 | // as they probably would be needed for asm goto |
| 547 | if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') { |
| 548 | unsigned Radix = doHexLookAhead(CurPtr, DefaultRadix: 10, LexHex: LexMasmIntegers); |
| 549 | |
| 550 | if (!LexHLASMIntegers) { |
| 551 | bool IsHex = Radix == 16; |
| 552 | // Check for floating point literals. |
| 553 | if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) { |
| 554 | if (*CurPtr == '.') |
| 555 | ++CurPtr; |
| 556 | return LexFloatLiteral(); |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | StringRef Result(TokStart, CurPtr - TokStart); |
| 561 | |
| 562 | APInt Value(128, 0, true); |
| 563 | if (Result.getAsInteger(Radix, Result&: Value)) |
| 564 | return ReturnError(Loc: TokStart, Msg: "invalid " + radixName(Radix) + " number" ); |
| 565 | |
| 566 | if (!LexHLASMIntegers) |
| 567 | // The darwin/x86 (and x86-64) assembler accepts and ignores type |
| 568 | // suffices on integer literals. |
| 569 | SkipIgnoredIntegerSuffix(CurPtr); |
| 570 | |
| 571 | return intToken(Ref: Result, Value); |
| 572 | } |
| 573 | |
| 574 | if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { |
| 575 | ++CurPtr; |
| 576 | // See if we actually have "0b" as part of something like "jmp 0b\n" |
| 577 | if (!isDigit(C: CurPtr[0])) { |
| 578 | --CurPtr; |
| 579 | StringRef Result(TokStart, CurPtr - TokStart); |
| 580 | return AsmToken(AsmToken::Integer, Result, 0); |
| 581 | } |
| 582 | const char *NumStart = CurPtr; |
| 583 | while (CurPtr[0] == '0' || CurPtr[0] == '1') |
| 584 | ++CurPtr; |
| 585 | |
| 586 | // Requires at least one binary digit. |
| 587 | if (CurPtr == NumStart) |
| 588 | return ReturnError(Loc: TokStart, Msg: "invalid binary number" ); |
| 589 | |
| 590 | StringRef Result(TokStart, CurPtr - TokStart); |
| 591 | |
| 592 | APInt Value(128, 0, true); |
| 593 | if (Result.substr(Start: 2).getAsInteger(Radix: 2, Result&: Value)) |
| 594 | return ReturnError(Loc: TokStart, Msg: "invalid binary number" ); |
| 595 | |
| 596 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 597 | // suffixes on integer literals. |
| 598 | SkipIgnoredIntegerSuffix(CurPtr); |
| 599 | |
| 600 | return intToken(Ref: Result, Value); |
| 601 | } |
| 602 | |
| 603 | if ((*CurPtr == 'x') || (*CurPtr == 'X')) { |
| 604 | ++CurPtr; |
| 605 | const char *NumStart = CurPtr; |
| 606 | while (isHexDigit(C: CurPtr[0])) |
| 607 | ++CurPtr; |
| 608 | |
| 609 | // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be |
| 610 | // diagnosed by LexHexFloatLiteral). |
| 611 | if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') |
| 612 | return LexHexFloatLiteral(NoIntDigits: NumStart == CurPtr); |
| 613 | |
| 614 | // Otherwise requires at least one hex digit. |
| 615 | if (CurPtr == NumStart) |
| 616 | return ReturnError(Loc: CurPtr-2, Msg: "invalid hexadecimal number" ); |
| 617 | |
| 618 | APInt Result(128, 0); |
| 619 | if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(Radix: 0, Result)) |
| 620 | return ReturnError(Loc: TokStart, Msg: "invalid hexadecimal number" ); |
| 621 | |
| 622 | // Consume the optional [hH]. |
| 623 | if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H')) |
| 624 | ++CurPtr; |
| 625 | |
| 626 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 627 | // suffixes on integer literals. |
| 628 | SkipIgnoredIntegerSuffix(CurPtr); |
| 629 | |
| 630 | return intToken(Ref: StringRef(TokStart, CurPtr - TokStart), Value&: Result); |
| 631 | } |
| 632 | |
| 633 | // Either octal or hexadecimal. |
| 634 | APInt Value(128, 0, true); |
| 635 | unsigned Radix = doHexLookAhead(CurPtr, DefaultRadix: 8, LexHex: LexMasmIntegers); |
| 636 | StringRef Result(TokStart, CurPtr - TokStart); |
| 637 | if (Result.getAsInteger(Radix, Result&: Value)) |
| 638 | return ReturnError(Loc: TokStart, Msg: "invalid " + radixName(Radix) + " number" ); |
| 639 | |
| 640 | // Consume the [hH]. |
| 641 | if (Radix == 16) |
| 642 | ++CurPtr; |
| 643 | |
| 644 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 645 | // suffixes on integer literals. |
| 646 | SkipIgnoredIntegerSuffix(CurPtr); |
| 647 | |
| 648 | return intToken(Ref: Result, Value); |
| 649 | } |
| 650 | |
| 651 | /// LexSingleQuote: Integer: 'b' |
| 652 | AsmToken AsmLexer::LexSingleQuote() { |
| 653 | int CurChar = getNextChar(); |
| 654 | |
| 655 | if (LexHLASMStrings) |
| 656 | return ReturnError(Loc: TokStart, Msg: "invalid usage of character literals" ); |
| 657 | |
| 658 | if (LexMasmStrings) { |
| 659 | while (CurChar != EOF) { |
| 660 | if (CurChar != '\'') { |
| 661 | CurChar = getNextChar(); |
| 662 | } else if (peekNextChar() == '\'') { |
| 663 | // In MASM single-quote strings, doubled single-quotes mean an escaped |
| 664 | // single quote, so should be lexed in. |
| 665 | (void)getNextChar(); |
| 666 | CurChar = getNextChar(); |
| 667 | } else { |
| 668 | break; |
| 669 | } |
| 670 | } |
| 671 | if (CurChar == EOF) |
| 672 | return ReturnError(Loc: TokStart, Msg: "unterminated string constant" ); |
| 673 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
| 674 | } |
| 675 | |
| 676 | if (CurChar == '\\') |
| 677 | CurChar = getNextChar(); |
| 678 | |
| 679 | if (CurChar == EOF) |
| 680 | return ReturnError(Loc: TokStart, Msg: "unterminated single quote" ); |
| 681 | |
| 682 | CurChar = getNextChar(); |
| 683 | |
| 684 | if (CurChar != '\'') |
| 685 | return ReturnError(Loc: TokStart, Msg: "single quote way too long" ); |
| 686 | |
| 687 | // The idea here being that 'c' is basically just an integral |
| 688 | // constant. |
| 689 | StringRef Res = StringRef(TokStart,CurPtr - TokStart); |
| 690 | long long Value; |
| 691 | |
| 692 | if (Res.starts_with(Prefix: "\'\\" )) { |
| 693 | char theChar = Res[2]; |
| 694 | switch (theChar) { |
| 695 | default: Value = theChar; break; |
| 696 | case '\'': Value = '\''; break; |
| 697 | case 't': Value = '\t'; break; |
| 698 | case 'n': Value = '\n'; break; |
| 699 | case 'b': Value = '\b'; break; |
| 700 | case 'f': Value = '\f'; break; |
| 701 | case 'r': Value = '\r'; break; |
| 702 | } |
| 703 | } else |
| 704 | Value = TokStart[1]; |
| 705 | |
| 706 | return AsmToken(AsmToken::Integer, Res, Value); |
| 707 | } |
| 708 | |
| 709 | /// LexQuote: String: "..." |
| 710 | AsmToken AsmLexer::LexQuote() { |
| 711 | int CurChar = getNextChar(); |
| 712 | if (LexHLASMStrings) |
| 713 | return ReturnError(Loc: TokStart, Msg: "invalid usage of string literals" ); |
| 714 | |
| 715 | if (LexMasmStrings) { |
| 716 | while (CurChar != EOF) { |
| 717 | if (CurChar != '"') { |
| 718 | CurChar = getNextChar(); |
| 719 | } else if (peekNextChar() == '"') { |
| 720 | // In MASM double-quoted strings, doubled double-quotes mean an escaped |
| 721 | // double quote, so should be lexed in. |
| 722 | (void)getNextChar(); |
| 723 | CurChar = getNextChar(); |
| 724 | } else { |
| 725 | break; |
| 726 | } |
| 727 | } |
| 728 | if (CurChar == EOF) |
| 729 | return ReturnError(Loc: TokStart, Msg: "unterminated string constant" ); |
| 730 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
| 731 | } |
| 732 | |
| 733 | while (CurChar != '"') { |
| 734 | if (CurChar == '\\') { |
| 735 | // Allow \", etc. |
| 736 | CurChar = getNextChar(); |
| 737 | } |
| 738 | |
| 739 | if (CurChar == EOF) |
| 740 | return ReturnError(Loc: TokStart, Msg: "unterminated string constant" ); |
| 741 | |
| 742 | CurChar = getNextChar(); |
| 743 | } |
| 744 | |
| 745 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
| 746 | } |
| 747 | |
| 748 | StringRef AsmLexer::LexUntilEndOfStatement() { |
| 749 | TokStart = CurPtr; |
| 750 | |
| 751 | while (!isAtStartOfComment(Ptr: CurPtr) && // Start of line comment. |
| 752 | !isAtStatementSeparator(Ptr: CurPtr) && // End of statement marker. |
| 753 | *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { |
| 754 | ++CurPtr; |
| 755 | } |
| 756 | return StringRef(TokStart, CurPtr-TokStart); |
| 757 | } |
| 758 | |
| 759 | StringRef AsmLexer::LexUntilEndOfLine() { |
| 760 | TokStart = CurPtr; |
| 761 | |
| 762 | while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { |
| 763 | ++CurPtr; |
| 764 | } |
| 765 | return StringRef(TokStart, CurPtr-TokStart); |
| 766 | } |
| 767 | |
| 768 | size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, |
| 769 | bool ShouldSkipSpace) { |
| 770 | SaveAndRestore SavedTokenStart(TokStart); |
| 771 | SaveAndRestore SavedCurPtr(CurPtr); |
| 772 | SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); |
| 773 | SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); |
| 774 | SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); |
| 775 | SaveAndRestore SavedIsPeeking(IsPeeking, true); |
| 776 | std::string SavedErr = getErr(); |
| 777 | SMLoc SavedErrLoc = getErrLoc(); |
| 778 | |
| 779 | size_t ReadCount; |
| 780 | for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { |
| 781 | AsmToken Token = LexToken(); |
| 782 | |
| 783 | Buf[ReadCount] = Token; |
| 784 | |
| 785 | if (Token.is(K: AsmToken::Eof)) { |
| 786 | ReadCount++; |
| 787 | break; |
| 788 | } |
| 789 | } |
| 790 | |
| 791 | SetError(errLoc: SavedErrLoc, err: SavedErr); |
| 792 | return ReadCount; |
| 793 | } |
| 794 | |
| 795 | bool AsmLexer::(const char *Ptr) { |
| 796 | if (MAI.isHLASM() && !IsAtStartOfStatement) |
| 797 | return false; |
| 798 | |
| 799 | StringRef = MAI.getCommentString(); |
| 800 | |
| 801 | if (CommentString.size() == 1) |
| 802 | return CommentString[0] == Ptr[0]; |
| 803 | |
| 804 | // Allow # preprocessor comments also be counted as comments for "##" cases |
| 805 | if (CommentString[1] == '#') |
| 806 | return CommentString[0] == Ptr[0]; |
| 807 | |
| 808 | return strncmp(s1: Ptr, s2: CommentString.data(), n: CommentString.size()) == 0; |
| 809 | } |
| 810 | |
| 811 | bool AsmLexer::isAtStatementSeparator(const char *Ptr) { |
| 812 | return strncmp(s1: Ptr, s2: MAI.getSeparatorString(), |
| 813 | n: strlen(s: MAI.getSeparatorString())) == 0; |
| 814 | } |
| 815 | |
| 816 | AsmToken AsmLexer::LexToken() { |
| 817 | TokStart = CurPtr; |
| 818 | // This always consumes at least one character. |
| 819 | int CurChar = getNextChar(); |
| 820 | |
| 821 | if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { |
| 822 | // If this starts with a '#', this may be a cpp |
| 823 | // hash directive and otherwise a line comment. |
| 824 | AsmToken TokenBuf[2]; |
| 825 | MutableArrayRef<AsmToken> Buf(TokenBuf, 2); |
| 826 | size_t num = peekTokens(Buf, ShouldSkipSpace: true); |
| 827 | // There cannot be a space preceding this |
| 828 | if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(K: AsmToken::Integer) && |
| 829 | TokenBuf[1].is(K: AsmToken::String)) { |
| 830 | CurPtr = TokStart; // reset curPtr; |
| 831 | StringRef s = LexUntilEndOfLine(); |
| 832 | UnLex(Token: TokenBuf[1]); |
| 833 | UnLex(Token: TokenBuf[0]); |
| 834 | return AsmToken(AsmToken::HashDirective, s); |
| 835 | } |
| 836 | |
| 837 | if (MAI.shouldAllowAdditionalComments()) |
| 838 | return LexLineComment(); |
| 839 | } |
| 840 | |
| 841 | if (isAtStartOfComment(Ptr: TokStart)) { |
| 842 | StringRef = MAI.getCommentString(); |
| 843 | // For multi-char comment strings, advance CurPtr only if we matched the |
| 844 | // full string. This stops us from accidentally eating the newline if the |
| 845 | // current line ends in a single comment char. |
| 846 | if (CommentString.size() > 1 && |
| 847 | StringRef(TokStart, CommentString.size()) == CommentString) { |
| 848 | CurPtr += CommentString.size() - 1; |
| 849 | } |
| 850 | return LexLineComment(); |
| 851 | } |
| 852 | |
| 853 | if (isAtStatementSeparator(Ptr: TokStart)) { |
| 854 | CurPtr += strlen(s: MAI.getSeparatorString()) - 1; |
| 855 | IsAtStartOfLine = true; |
| 856 | IsAtStartOfStatement = true; |
| 857 | return AsmToken(AsmToken::EndOfStatement, |
| 858 | StringRef(TokStart, strlen(s: MAI.getSeparatorString()))); |
| 859 | } |
| 860 | |
| 861 | // If we're missing a newline at EOF, make sure we still get an |
| 862 | // EndOfStatement token before the Eof token. |
| 863 | if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) { |
| 864 | IsAtStartOfLine = true; |
| 865 | IsAtStartOfStatement = true; |
| 866 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); |
| 867 | } |
| 868 | IsAtStartOfLine = false; |
| 869 | bool OldIsAtStartOfStatement = IsAtStartOfStatement; |
| 870 | IsAtStartOfStatement = false; |
| 871 | switch (CurChar) { |
| 872 | default: |
| 873 | // Handle identifier: [a-zA-Z_.$@#?][a-zA-Z0-9_.$@#?]* |
| 874 | // Whether or not the lexer accepts '$', '@', '#' and '?' at the start of |
| 875 | // an identifier is target-dependent. These characters are handled in the |
| 876 | // respective switch cases. |
| 877 | if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') |
| 878 | return LexIdentifier(); |
| 879 | |
| 880 | // Unknown character, emit an error. |
| 881 | return ReturnError(Loc: TokStart, Msg: "invalid character in input" ); |
| 882 | case EOF: |
| 883 | if (EndStatementAtEOF) { |
| 884 | IsAtStartOfLine = true; |
| 885 | IsAtStartOfStatement = true; |
| 886 | } |
| 887 | return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
| 888 | case 0: |
| 889 | case ' ': |
| 890 | case '\t': |
| 891 | IsAtStartOfStatement = OldIsAtStartOfStatement; |
| 892 | while (*CurPtr == ' ' || *CurPtr == '\t') |
| 893 | CurPtr++; |
| 894 | if (SkipSpace) |
| 895 | return LexToken(); // Ignore whitespace. |
| 896 | else |
| 897 | return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); |
| 898 | case '\r': { |
| 899 | IsAtStartOfLine = true; |
| 900 | IsAtStartOfStatement = true; |
| 901 | // If this is a CR followed by LF, treat that as one token. |
| 902 | if (CurPtr != CurBuf.end() && *CurPtr == '\n') |
| 903 | ++CurPtr; |
| 904 | return AsmToken(AsmToken::EndOfStatement, |
| 905 | StringRef(TokStart, CurPtr - TokStart)); |
| 906 | } |
| 907 | case '\n': |
| 908 | IsAtStartOfLine = true; |
| 909 | IsAtStartOfStatement = true; |
| 910 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
| 911 | case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); |
| 912 | case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); |
| 913 | case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); |
| 914 | case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); |
| 915 | case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); |
| 916 | case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); |
| 917 | case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); |
| 918 | case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); |
| 919 | case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); |
| 920 | case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); |
| 921 | case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); |
| 922 | case '$': { |
| 923 | if (LexMotorolaIntegers && isHexDigit(C: *CurPtr)) |
| 924 | return LexDigit(); |
| 925 | if (MAI.doesAllowDollarAtStartOfIdentifier()) |
| 926 | return LexIdentifier(); |
| 927 | return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); |
| 928 | } |
| 929 | case '@': |
| 930 | if (MAI.doesAllowAtAtStartOfIdentifier()) |
| 931 | return LexIdentifier(); |
| 932 | return AsmToken(AsmToken::At, StringRef(TokStart, 1)); |
| 933 | case '#': |
| 934 | if (MAI.isHLASM()) |
| 935 | return LexIdentifier(); |
| 936 | return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
| 937 | case '?': |
| 938 | if (MAI.doesAllowQuestionAtStartOfIdentifier()) |
| 939 | return LexIdentifier(); |
| 940 | return AsmToken(AsmToken::Question, StringRef(TokStart, 1)); |
| 941 | case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); |
| 942 | case '=': |
| 943 | if (*CurPtr == '=') { |
| 944 | ++CurPtr; |
| 945 | return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); |
| 946 | } |
| 947 | return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); |
| 948 | case '-': |
| 949 | if (*CurPtr == '>') { |
| 950 | ++CurPtr; |
| 951 | return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2)); |
| 952 | } |
| 953 | return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); |
| 954 | case '|': |
| 955 | if (*CurPtr == '|') { |
| 956 | ++CurPtr; |
| 957 | return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); |
| 958 | } |
| 959 | return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); |
| 960 | case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); |
| 961 | case '&': |
| 962 | if (*CurPtr == '&') { |
| 963 | ++CurPtr; |
| 964 | return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); |
| 965 | } |
| 966 | return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); |
| 967 | case '!': |
| 968 | if (*CurPtr == '=') { |
| 969 | ++CurPtr; |
| 970 | return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); |
| 971 | } |
| 972 | return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); |
| 973 | case '%': |
| 974 | if (LexMotorolaIntegers && (*CurPtr == '0' || *CurPtr == '1')) { |
| 975 | return LexDigit(); |
| 976 | } |
| 977 | return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); |
| 978 | case '/': |
| 979 | IsAtStartOfStatement = OldIsAtStartOfStatement; |
| 980 | return LexSlash(); |
| 981 | case '\'': return LexSingleQuote(); |
| 982 | case '"': return LexQuote(); |
| 983 | case '0': case '1': case '2': case '3': case '4': |
| 984 | case '5': case '6': case '7': case '8': case '9': |
| 985 | return LexDigit(); |
| 986 | case '<': |
| 987 | switch (*CurPtr) { |
| 988 | case '<': |
| 989 | ++CurPtr; |
| 990 | return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); |
| 991 | case '=': |
| 992 | ++CurPtr; |
| 993 | return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); |
| 994 | case '>': |
| 995 | ++CurPtr; |
| 996 | return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); |
| 997 | default: |
| 998 | return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); |
| 999 | } |
| 1000 | case '>': |
| 1001 | switch (*CurPtr) { |
| 1002 | case '>': |
| 1003 | ++CurPtr; |
| 1004 | return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); |
| 1005 | case '=': |
| 1006 | ++CurPtr; |
| 1007 | return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); |
| 1008 | default: |
| 1009 | return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); |
| 1010 | } |
| 1011 | |
| 1012 | // TODO: Quoted identifiers (objc methods etc) |
| 1013 | // local labels: [0-9][:] |
| 1014 | // Forward/backward labels: [0-9][fb] |
| 1015 | // Integers, fp constants, character constants. |
| 1016 | } |
| 1017 | } |
| 1018 | |