1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implement the Lexer for .ll files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/AsmParser/LLLexer.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/StringExtras.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/IR/DerivedTypes.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/SourceMgr.h"
22#include <cassert>
23#include <cctype>
24#include <cstdio>
25
26using namespace llvm;
27
28// Both the lexer and parser can issue error messages. If the lexer issues a
29// lexer error, since we do not terminate execution immediately, usually that
30// is followed by the parser issuing a parser error. However, the error issued
31// by the lexer is more relevant in that case as opposed to potentially more
32// generic parser error. So instead of always recording the last error message
33// use the `Priority` to establish a priority, with Lexer > Parser > None. We
34// record the issued message only if the message has same or higher priority
35// than the existing one. This prevents lexer errors from being overwritten by
36// parser errors.
37void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
38 LLLexer::ErrorPriority Priority) {
39 if (Priority < ErrorInfo.Priority)
40 return;
41 ErrorInfo.Error = SM.GetMessage(Loc: ErrorLoc, Kind: SourceMgr::DK_Error, Msg);
42 ErrorInfo.Priority = Priority;
43}
44
45void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
46 SM.PrintMessage(Loc: WarningLoc, Kind: SourceMgr::DK_Warning, Msg);
47}
48
49//===----------------------------------------------------------------------===//
50// Helper functions.
51//===----------------------------------------------------------------------===//
52
53// atoull - Convert an ascii string of decimal digits into the unsigned long
54// long representation... this does not have to do input error checking,
55// because we know that the input will be matched by a suitable regex...
56//
57uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
58 uint64_t Result = 0;
59 for (; Buffer != End; Buffer++) {
60 uint64_t OldRes = Result;
61 Result *= 10;
62 Result += *Buffer-'0';
63 if (Result < OldRes) { // overflow detected.
64 LexError(Msg: "constant bigger than 64 bits detected");
65 return 0;
66 }
67 }
68 return Result;
69}
70
71uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
72 uint64_t Result = 0;
73 for (; Buffer != End; ++Buffer) {
74 uint64_t OldRes = Result;
75 Result *= 16;
76 Result += hexDigitValue(C: *Buffer);
77
78 if (Result < OldRes) { // overflow detected.
79 LexError(Msg: "constant bigger than 64 bits detected");
80 return 0;
81 }
82 }
83 return Result;
84}
85
86void LLLexer::HexToIntPair(const char *Buffer, const char *End,
87 uint64_t Pair[2]) {
88 Pair[0] = 0;
89 if (End - Buffer >= 16) {
90 for (int i = 0; i < 16; i++, Buffer++) {
91 assert(Buffer != End);
92 Pair[0] *= 16;
93 Pair[0] += hexDigitValue(C: *Buffer);
94 }
95 }
96 Pair[1] = 0;
97 for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
98 Pair[1] *= 16;
99 Pair[1] += hexDigitValue(C: *Buffer);
100 }
101 if (Buffer != End)
102 LexError(Msg: "constant bigger than 128 bits detected");
103}
104
105/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
106/// { low64, high16 } as usual for an APInt.
107void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
108 uint64_t Pair[2]) {
109 Pair[1] = 0;
110 for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
111 assert(Buffer != End);
112 Pair[1] *= 16;
113 Pair[1] += hexDigitValue(C: *Buffer);
114 }
115 Pair[0] = 0;
116 for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
117 Pair[0] *= 16;
118 Pair[0] += hexDigitValue(C: *Buffer);
119 }
120 if (Buffer != End)
121 LexError(Msg: "constant bigger than 128 bits detected");
122}
123
124// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
125// appropriate character.
126static void UnEscapeLexed(std::string &Str) {
127 if (Str.empty()) return;
128
129 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
130 char *BOut = Buffer;
131 for (char *BIn = Buffer; BIn != EndBuffer; ) {
132 if (BIn[0] == '\\') {
133 if (BIn < EndBuffer-1 && BIn[1] == '\\') {
134 *BOut++ = '\\'; // Two \ becomes one
135 BIn += 2;
136 } else if (BIn < EndBuffer-2 &&
137 isxdigit(static_cast<unsigned char>(BIn[1])) &&
138 isxdigit(static_cast<unsigned char>(BIn[2]))) {
139 *BOut = hexDigitValue(C: BIn[1]) * 16 + hexDigitValue(C: BIn[2]);
140 BIn += 3; // Skip over handled chars
141 ++BOut;
142 } else {
143 *BOut++ = *BIn++;
144 }
145 } else {
146 *BOut++ = *BIn++;
147 }
148 }
149 Str.resize(n: BOut-Buffer);
150}
151
152/// isLabelChar - Return true for [-a-zA-Z$._0-9].
153static bool isLabelChar(char C) {
154 return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
155 C == '.' || C == '_';
156}
157
158/// isLabelTail - Return true if this pointer points to a valid end of a label.
159static const char *isLabelTail(const char *CurPtr) {
160 while (true) {
161 if (CurPtr[0] == ':') return CurPtr+1;
162 if (!isLabelChar(C: CurPtr[0])) return nullptr;
163 ++CurPtr;
164 }
165}
166
167//===----------------------------------------------------------------------===//
168// Lexer definition.
169//===----------------------------------------------------------------------===//
170
171LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
172 LLVMContext &C)
173 : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) {
174 CurPtr = CurBuf.begin();
175}
176
177int LLLexer::getNextChar() {
178 char CurChar = *CurPtr++;
179 switch (CurChar) {
180 default: return (unsigned char)CurChar;
181 case 0:
182 // A nul character in the stream is either the end of the current buffer or
183 // a random nul in the file. Disambiguate that here.
184 if (CurPtr-1 != CurBuf.end())
185 return 0; // Just whitespace.
186
187 // Otherwise, return end of file.
188 --CurPtr; // Another call to lex will return EOF again.
189 return EOF;
190 }
191}
192
193lltok::Kind LLLexer::LexToken() {
194 // Set token end to next location, since the end is exclusive.
195 PrevTokEnd = CurPtr;
196 while (true) {
197 TokStart = CurPtr;
198
199 int CurChar = getNextChar();
200 switch (CurChar) {
201 default:
202 // Handle letters: [a-zA-Z_]
203 if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
204 return LexIdentifier();
205 return lltok::Error;
206 case EOF: return lltok::Eof;
207 case 0:
208 case ' ':
209 case '\t':
210 case '\n':
211 case '\r':
212 // Ignore whitespace.
213 continue;
214 case '+': return LexPositive();
215 case '@': return LexAt();
216 case '$': return LexDollar();
217 case '%': return LexPercent();
218 case '"': return LexQuote();
219 case '.':
220 if (const char *Ptr = isLabelTail(CurPtr)) {
221 CurPtr = Ptr;
222 StrVal.assign(first: TokStart, last: CurPtr-1);
223 return lltok::LabelStr;
224 }
225 if (CurPtr[0] == '.' && CurPtr[1] == '.') {
226 CurPtr += 2;
227 return lltok::dotdotdot;
228 }
229 return lltok::Error;
230 case ';':
231 SkipLineComment();
232 continue;
233 case '!': return LexExclaim();
234 case '^':
235 return LexCaret();
236 case ':':
237 return lltok::colon;
238 case '#': return LexHash();
239 case '0': case '1': case '2': case '3': case '4':
240 case '5': case '6': case '7': case '8': case '9':
241 case '-':
242 return LexDigitOrNegative();
243 case '=': return lltok::equal;
244 case '[': return lltok::lsquare;
245 case ']': return lltok::rsquare;
246 case '{': return lltok::lbrace;
247 case '}': return lltok::rbrace;
248 case '<': return lltok::less;
249 case '>': return lltok::greater;
250 case '(': return lltok::lparen;
251 case ')': return lltok::rparen;
252 case ',': return lltok::comma;
253 case '*': return lltok::star;
254 case '|': return lltok::bar;
255 case '/':
256 if (getNextChar() != '*')
257 return lltok::Error;
258 if (SkipCComment())
259 return lltok::Error;
260 continue;
261 }
262 }
263}
264
265void LLLexer::SkipLineComment() {
266 while (true) {
267 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
268 return;
269 }
270}
271
272/// This skips C-style /**/ comments. Returns true if there
273/// was an error.
274bool LLLexer::SkipCComment() {
275 while (true) {
276 int CurChar = getNextChar();
277 switch (CurChar) {
278 case EOF:
279 LexError(Msg: "unterminated comment");
280 return true;
281 case '*':
282 // End of the comment?
283 CurChar = getNextChar();
284 if (CurChar == '/')
285 return false;
286 if (CurChar == EOF) {
287 LexError(Msg: "unterminated comment");
288 return true;
289 }
290 }
291 }
292}
293
294/// Lex all tokens that start with an @ character.
295/// GlobalVar @\"[^\"]*\"
296/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
297/// GlobalVarID @[0-9]+
298lltok::Kind LLLexer::LexAt() {
299 return LexVar(Var: lltok::GlobalVar, VarID: lltok::GlobalID);
300}
301
302lltok::Kind LLLexer::LexDollar() {
303 if (const char *Ptr = isLabelTail(CurPtr: TokStart)) {
304 CurPtr = Ptr;
305 StrVal.assign(first: TokStart, last: CurPtr - 1);
306 return lltok::LabelStr;
307 }
308
309 // Handle DollarStringConstant: $\"[^\"]*\"
310 if (CurPtr[0] == '"') {
311 ++CurPtr;
312
313 while (true) {
314 int CurChar = getNextChar();
315
316 if (CurChar == EOF) {
317 LexError(Msg: "end of file in COMDAT variable name");
318 return lltok::Error;
319 }
320 if (CurChar == '"') {
321 StrVal.assign(first: TokStart + 2, last: CurPtr - 1);
322 UnEscapeLexed(Str&: StrVal);
323 if (StringRef(StrVal).contains(C: 0)) {
324 LexError(Msg: "NUL character is not allowed in names");
325 return lltok::Error;
326 }
327 return lltok::ComdatVar;
328 }
329 }
330 }
331
332 // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
333 if (ReadVarName())
334 return lltok::ComdatVar;
335
336 return lltok::Error;
337}
338
339/// ReadString - Read a string until the closing quote.
340lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
341 const char *Start = CurPtr;
342 while (true) {
343 int CurChar = getNextChar();
344
345 if (CurChar == EOF) {
346 LexError(Msg: "end of file in string constant");
347 return lltok::Error;
348 }
349 if (CurChar == '"') {
350 StrVal.assign(first: Start, last: CurPtr-1);
351 UnEscapeLexed(Str&: StrVal);
352 return kind;
353 }
354 }
355}
356
357/// ReadVarName - Read the rest of a token containing a variable name.
358bool LLLexer::ReadVarName() {
359 const char *NameStart = CurPtr;
360 if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
361 CurPtr[0] == '-' || CurPtr[0] == '$' ||
362 CurPtr[0] == '.' || CurPtr[0] == '_') {
363 ++CurPtr;
364 while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
365 CurPtr[0] == '-' || CurPtr[0] == '$' ||
366 CurPtr[0] == '.' || CurPtr[0] == '_')
367 ++CurPtr;
368
369 StrVal.assign(first: NameStart, last: CurPtr);
370 return true;
371 }
372 return false;
373}
374
375// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
376// returned, otherwise the Error token is returned.
377lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
378 if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
379 return lltok::Error;
380
381 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
382 /*empty*/;
383
384 uint64_t Val = atoull(Buffer: TokStart + 1, End: CurPtr);
385 if ((unsigned)Val != Val)
386 LexError(Msg: "invalid value number (too large)");
387 UIntVal = unsigned(Val);
388 return Token;
389}
390
391lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
392 // Handle StringConstant: \"[^\"]*\"
393 if (CurPtr[0] == '"') {
394 ++CurPtr;
395
396 while (true) {
397 int CurChar = getNextChar();
398
399 if (CurChar == EOF) {
400 LexError(Msg: "end of file in global variable name");
401 return lltok::Error;
402 }
403 if (CurChar == '"') {
404 StrVal.assign(first: TokStart+2, last: CurPtr-1);
405 UnEscapeLexed(Str&: StrVal);
406 if (StringRef(StrVal).contains(C: 0)) {
407 LexError(Msg: "NUL character is not allowed in names");
408 return lltok::Error;
409 }
410 return Var;
411 }
412 }
413 }
414
415 // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
416 if (ReadVarName())
417 return Var;
418
419 // Handle VarID: [0-9]+
420 return LexUIntID(Token: VarID);
421}
422
423/// Lex all tokens that start with a % character.
424/// LocalVar ::= %\"[^\"]*\"
425/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
426/// LocalVarID ::= %[0-9]+
427lltok::Kind LLLexer::LexPercent() {
428 return LexVar(Var: lltok::LocalVar, VarID: lltok::LocalVarID);
429}
430
431/// Lex all tokens that start with a " character.
432/// QuoteLabel "[^"]+":
433/// StringConstant "[^"]*"
434lltok::Kind LLLexer::LexQuote() {
435 lltok::Kind kind = ReadString(kind: lltok::StringConstant);
436 if (kind == lltok::Error || kind == lltok::Eof)
437 return kind;
438
439 if (CurPtr[0] == ':') {
440 ++CurPtr;
441 if (StringRef(StrVal).contains(C: 0)) {
442 LexError(Msg: "NUL character is not allowed in names");
443 kind = lltok::Error;
444 } else {
445 kind = lltok::LabelStr;
446 }
447 }
448
449 return kind;
450}
451
452/// Lex all tokens that start with a ! character.
453/// !foo
454/// !
455lltok::Kind LLLexer::LexExclaim() {
456 // Lex a metadata name as a MetadataVar.
457 if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
458 CurPtr[0] == '-' || CurPtr[0] == '$' ||
459 CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
460 ++CurPtr;
461 while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
462 CurPtr[0] == '-' || CurPtr[0] == '$' ||
463 CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
464 ++CurPtr;
465
466 StrVal.assign(first: TokStart+1, last: CurPtr); // Skip !
467 UnEscapeLexed(Str&: StrVal);
468 return lltok::MetadataVar;
469 }
470 return lltok::exclaim;
471}
472
473/// Lex all tokens that start with a ^ character.
474/// SummaryID ::= ^[0-9]+
475lltok::Kind LLLexer::LexCaret() {
476 // Handle SummaryID: ^[0-9]+
477 return LexUIntID(Token: lltok::SummaryID);
478}
479
480/// Lex all tokens that start with a # character.
481/// AttrGrpID ::= #[0-9]+
482/// Hash ::= #
483lltok::Kind LLLexer::LexHash() {
484 // Handle AttrGrpID: #[0-9]+
485 if (isdigit(static_cast<unsigned char>(CurPtr[0])))
486 return LexUIntID(Token: lltok::AttrGrpID);
487 return lltok::hash;
488}
489
490/// Lex a label, integer or byte types, keyword, or hexadecimal integer
491/// constant.
492/// Label [-a-zA-Z$._0-9]+:
493/// ByteType b[0-9]+
494/// IntegerType i[0-9]+
495/// Keyword sdiv, float, ...
496/// HexIntConstant [us]0x[0-9A-Fa-f]+
497/// HexFloatConstant f0x[0-9A-Fa-f]+
498lltok::Kind LLLexer::LexIdentifier() {
499 const char *StartChar = CurPtr;
500 const char IntOrByteIdentifier = CurPtr[-1];
501 const char *IntOrByteEnd =
502 (IntOrByteIdentifier == 'i' || IntOrByteIdentifier == 'b') ? nullptr
503 : StartChar;
504 const char *KeywordEnd = nullptr;
505
506 for (; isLabelChar(C: *CurPtr); ++CurPtr) {
507 // If we decide this is a byte or an integer, remember the end of the
508 // sequence.
509 if (!IntOrByteEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
510 IntOrByteEnd = CurPtr;
511 if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
512 *CurPtr != '_')
513 KeywordEnd = CurPtr;
514 }
515
516 // If we stopped due to a colon, unless we were directed to ignore it,
517 // this really is a label.
518 if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
519 StrVal.assign(first: StartChar-1, last: CurPtr++);
520 return lltok::LabelStr;
521 }
522
523 // Otherwise, this wasn't a label. If this was valid as a byte or an integer
524 // type, return it.
525 if (!IntOrByteEnd)
526 IntOrByteEnd = CurPtr;
527 if (IntOrByteEnd != StartChar) {
528 CurPtr = IntOrByteEnd;
529 uint64_t NumBits = atoull(Buffer: StartChar, End: CurPtr);
530 if (NumBits < IntegerType::MIN_INT_BITS ||
531 NumBits > IntegerType::MAX_INT_BITS) {
532 LexError(Msg: "bitwidth for integer or byte type out of range");
533 return lltok::Error;
534 }
535 if (IntOrByteIdentifier == 'i')
536 TyVal = IntegerType::get(C&: Context, NumBits);
537 else
538 TyVal = ByteType::get(C&: Context, NumBits);
539
540 return lltok::Type;
541 }
542
543 // Otherwise, this was a letter sequence. See which keyword this is.
544 if (!KeywordEnd) KeywordEnd = CurPtr;
545 CurPtr = KeywordEnd;
546 --StartChar;
547 StringRef Keyword(StartChar, CurPtr - StartChar);
548
549#define KEYWORD(STR) \
550 do { \
551 if (Keyword == #STR) \
552 return lltok::kw_##STR; \
553 } while (false)
554
555 KEYWORD(true); KEYWORD(false);
556 KEYWORD(declare); KEYWORD(define);
557 KEYWORD(global); KEYWORD(constant);
558 KEYWORD(br);
559
560 KEYWORD(dso_local);
561 KEYWORD(dso_preemptable);
562
563 KEYWORD(private);
564 KEYWORD(internal);
565 KEYWORD(available_externally);
566 KEYWORD(linkonce);
567 KEYWORD(linkonce_odr);
568 KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
569 KEYWORD(weak_odr);
570 KEYWORD(appending);
571 KEYWORD(dllimport);
572 KEYWORD(dllexport);
573 KEYWORD(common);
574 KEYWORD(default);
575 KEYWORD(hidden);
576 KEYWORD(protected);
577 KEYWORD(unnamed_addr);
578 KEYWORD(local_unnamed_addr);
579 KEYWORD(externally_initialized);
580 KEYWORD(extern_weak);
581 KEYWORD(external);
582 KEYWORD(thread_local);
583 KEYWORD(localdynamic);
584 KEYWORD(initialexec);
585 KEYWORD(localexec);
586 KEYWORD(zeroinitializer);
587 KEYWORD(undef);
588 KEYWORD(null);
589 KEYWORD(none);
590 KEYWORD(poison);
591 KEYWORD(to);
592 KEYWORD(caller);
593 KEYWORD(within);
594 KEYWORD(from);
595 KEYWORD(tail);
596 KEYWORD(musttail);
597 KEYWORD(notail);
598 KEYWORD(target);
599 KEYWORD(triple);
600 KEYWORD(source_filename);
601 KEYWORD(unwind);
602 KEYWORD(datalayout);
603 KEYWORD(volatile);
604 KEYWORD(elementwise);
605 KEYWORD(atomic);
606 KEYWORD(unordered);
607 KEYWORD(monotonic);
608 KEYWORD(acquire);
609 KEYWORD(release);
610 KEYWORD(acq_rel);
611 KEYWORD(seq_cst);
612 KEYWORD(syncscope);
613
614 KEYWORD(nnan);
615 KEYWORD(ninf);
616 KEYWORD(nsz);
617 KEYWORD(arcp);
618 KEYWORD(contract);
619 KEYWORD(reassoc);
620 KEYWORD(afn);
621 KEYWORD(fast);
622 KEYWORD(nuw);
623 KEYWORD(nsw);
624 KEYWORD(nusw);
625 KEYWORD(exact);
626 KEYWORD(disjoint);
627 KEYWORD(inbounds);
628 KEYWORD(nneg);
629 KEYWORD(samesign);
630 KEYWORD(inrange);
631 KEYWORD(addrspace);
632 KEYWORD(section);
633 KEYWORD(partition);
634 KEYWORD(code_model);
635 KEYWORD(alias);
636 KEYWORD(ifunc);
637 KEYWORD(module);
638 KEYWORD(asm);
639 KEYWORD(sideeffect);
640 KEYWORD(inteldialect);
641 KEYWORD(gc);
642 KEYWORD(prefix);
643 KEYWORD(prologue);
644 KEYWORD(prefalign);
645
646 KEYWORD(no_sanitize_address);
647 KEYWORD(no_sanitize_hwaddress);
648 KEYWORD(sanitize_address_dyninit);
649
650 KEYWORD(ccc);
651 KEYWORD(fastcc);
652 KEYWORD(coldcc);
653 KEYWORD(cfguard_checkcc);
654 KEYWORD(x86_stdcallcc);
655 KEYWORD(x86_fastcallcc);
656 KEYWORD(x86_thiscallcc);
657 KEYWORD(x86_vectorcallcc);
658 KEYWORD(arm_apcscc);
659 KEYWORD(arm_aapcscc);
660 KEYWORD(arm_aapcs_vfpcc);
661 KEYWORD(aarch64_vector_pcs);
662 KEYWORD(aarch64_sve_vector_pcs);
663 KEYWORD(aarch64_sme_preservemost_from_x0);
664 KEYWORD(aarch64_sme_preservemost_from_x1);
665 KEYWORD(aarch64_sme_preservemost_from_x2);
666 KEYWORD(msp430_intrcc);
667 KEYWORD(avr_intrcc);
668 KEYWORD(avr_signalcc);
669 KEYWORD(ptx_kernel);
670 KEYWORD(ptx_device);
671 KEYWORD(spir_kernel);
672 KEYWORD(spir_func);
673 KEYWORD(intel_ocl_bicc);
674 KEYWORD(x86_64_sysvcc);
675 KEYWORD(win64cc);
676 KEYWORD(x86_regcallcc);
677 KEYWORD(swiftcc);
678 KEYWORD(swifttailcc);
679 KEYWORD(anyregcc);
680 KEYWORD(preserve_mostcc);
681 KEYWORD(preserve_allcc);
682 KEYWORD(preserve_nonecc);
683 KEYWORD(ghccc);
684 KEYWORD(x86_intrcc);
685 KEYWORD(hhvmcc);
686 KEYWORD(hhvm_ccc);
687 KEYWORD(cxx_fast_tlscc);
688 KEYWORD(amdgpu_vs);
689 KEYWORD(amdgpu_ls);
690 KEYWORD(amdgpu_hs);
691 KEYWORD(amdgpu_es);
692 KEYWORD(amdgpu_gs);
693 KEYWORD(amdgpu_ps);
694 KEYWORD(amdgpu_cs);
695 KEYWORD(amdgpu_cs_chain);
696 KEYWORD(amdgpu_cs_chain_preserve);
697 KEYWORD(amdgpu_kernel);
698 KEYWORD(amdgpu_gfx);
699 KEYWORD(amdgpu_gfx_whole_wave);
700 KEYWORD(tailcc);
701 KEYWORD(m68k_rtdcc);
702 KEYWORD(graalcc);
703 KEYWORD(riscv_vector_cc);
704 KEYWORD(riscv_vls_cc);
705 KEYWORD(cheriot_compartmentcallcc);
706 KEYWORD(cheriot_compartmentcalleecc);
707 KEYWORD(cheriot_librarycallcc);
708
709 KEYWORD(cc);
710 KEYWORD(c);
711
712 KEYWORD(attributes);
713 KEYWORD(sync);
714 KEYWORD(async);
715
716#define GET_ATTR_NAMES
717#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
718 KEYWORD(DISPLAY_NAME);
719#include "llvm/IR/Attributes.inc"
720
721 KEYWORD(read);
722 KEYWORD(write);
723 KEYWORD(readwrite);
724 KEYWORD(argmem);
725 KEYWORD(target_mem0);
726 KEYWORD(target_mem1);
727 KEYWORD(target_mem);
728 KEYWORD(inaccessiblemem);
729 KEYWORD(errnomem);
730 KEYWORD(argmemonly);
731 KEYWORD(inaccessiblememonly);
732 KEYWORD(inaccessiblemem_or_argmemonly);
733 KEYWORD(nocapture);
734 KEYWORD(address_is_null);
735 KEYWORD(address);
736 KEYWORD(provenance);
737 KEYWORD(read_provenance);
738
739 // denormal_fpenv attribute
740 KEYWORD(ieee);
741 KEYWORD(preservesign);
742 KEYWORD(positivezero);
743 KEYWORD(dynamic);
744
745 // nofpclass attribute
746 KEYWORD(all);
747 KEYWORD(nan);
748 KEYWORD(snan);
749 KEYWORD(qnan);
750 KEYWORD(inf);
751 // ninf already a keyword
752 KEYWORD(pinf);
753 KEYWORD(norm);
754 KEYWORD(nnorm);
755 KEYWORD(pnorm);
756 // sub already a keyword
757 KEYWORD(nsub);
758 KEYWORD(psub);
759 KEYWORD(zero);
760 KEYWORD(nzero);
761 KEYWORD(pzero);
762
763 KEYWORD(type);
764 KEYWORD(opaque);
765
766 KEYWORD(comdat);
767
768 // Comdat types
769 KEYWORD(any);
770 KEYWORD(exactmatch);
771 KEYWORD(largest);
772 KEYWORD(nodeduplicate);
773 KEYWORD(samesize);
774
775 KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
776 KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
777 KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
778 KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
779
780 KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
781 KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
782 KEYWORD(fmaximum);
783 KEYWORD(fminimum);
784 KEYWORD(fmaximumnum);
785 KEYWORD(fminimumnum);
786 KEYWORD(uinc_wrap);
787 KEYWORD(udec_wrap);
788 KEYWORD(usub_cond);
789 KEYWORD(usub_sat);
790
791 KEYWORD(splat);
792 KEYWORD(vscale);
793 KEYWORD(x);
794 KEYWORD(blockaddress);
795 KEYWORD(dso_local_equivalent);
796 KEYWORD(no_cfi);
797 KEYWORD(ptrauth);
798
799 // Metadata types.
800 KEYWORD(distinct);
801
802 // Use-list order directives.
803 KEYWORD(uselistorder);
804
805 KEYWORD(personality);
806 KEYWORD(cleanup);
807 KEYWORD(catch);
808 KEYWORD(filter);
809
810 // Summary index keywords.
811 KEYWORD(path);
812 KEYWORD(hash);
813 KEYWORD(gv);
814 KEYWORD(guid);
815 KEYWORD(name);
816 KEYWORD(summaries);
817 KEYWORD(flags);
818 KEYWORD(blockcount);
819 KEYWORD(linkage);
820 KEYWORD(visibility);
821 KEYWORD(notEligibleToImport);
822 KEYWORD(live);
823 KEYWORD(dsoLocal);
824 KEYWORD(canAutoHide);
825 KEYWORD(importType);
826 KEYWORD(definition);
827 KEYWORD(declaration);
828 KEYWORD(noRenameOnPromotion);
829 KEYWORD(function);
830 KEYWORD(insts);
831 KEYWORD(funcFlags);
832 KEYWORD(readNone);
833 KEYWORD(readOnly);
834 KEYWORD(noRecurse);
835 KEYWORD(returnDoesNotAlias);
836 KEYWORD(noInline);
837 KEYWORD(alwaysInline);
838 KEYWORD(noUnwind);
839 KEYWORD(mayThrow);
840 KEYWORD(hasUnknownCall);
841 KEYWORD(mustBeUnreachable);
842 KEYWORD(calls);
843 KEYWORD(callee);
844 KEYWORD(params);
845 KEYWORD(param);
846 KEYWORD(hotness);
847 KEYWORD(unknown);
848 KEYWORD(critical);
849 // Deprecated, keep in order to support old files.
850 KEYWORD(relbf);
851 KEYWORD(variable);
852 KEYWORD(vTableFuncs);
853 KEYWORD(virtFunc);
854 KEYWORD(aliasee);
855 KEYWORD(refs);
856 KEYWORD(typeIdInfo);
857 KEYWORD(typeTests);
858 KEYWORD(typeTestAssumeVCalls);
859 KEYWORD(typeCheckedLoadVCalls);
860 KEYWORD(typeTestAssumeConstVCalls);
861 KEYWORD(typeCheckedLoadConstVCalls);
862 KEYWORD(vFuncId);
863 KEYWORD(offset);
864 KEYWORD(args);
865 KEYWORD(typeid);
866 KEYWORD(typeidCompatibleVTable);
867 KEYWORD(summary);
868 KEYWORD(typeTestRes);
869 KEYWORD(kind);
870 KEYWORD(unsat);
871 KEYWORD(byteArray);
872 KEYWORD(inline);
873 KEYWORD(single);
874 KEYWORD(allOnes);
875 KEYWORD(sizeM1BitWidth);
876 KEYWORD(alignLog2);
877 KEYWORD(sizeM1);
878 KEYWORD(bitMask);
879 KEYWORD(inlineBits);
880 KEYWORD(vcall_visibility);
881 KEYWORD(wpdResolutions);
882 KEYWORD(wpdRes);
883 KEYWORD(indir);
884 KEYWORD(singleImpl);
885 KEYWORD(branchFunnel);
886 KEYWORD(singleImplName);
887 KEYWORD(resByArg);
888 KEYWORD(byArg);
889 KEYWORD(uniformRetVal);
890 KEYWORD(uniqueRetVal);
891 KEYWORD(virtualConstProp);
892 KEYWORD(info);
893 KEYWORD(byte);
894 KEYWORD(bit);
895 KEYWORD(varFlags);
896 KEYWORD(callsites);
897 KEYWORD(clones);
898 KEYWORD(stackIds);
899 KEYWORD(allocs);
900 KEYWORD(versions);
901 KEYWORD(memProf);
902 KEYWORD(notcold);
903
904#undef KEYWORD
905
906 // Keywords for types.
907#define TYPEKEYWORD(STR, LLVMTY) \
908 do { \
909 if (Keyword == STR) { \
910 TyVal = LLVMTY; \
911 return lltok::Type; \
912 } \
913 } while (false)
914
915 TYPEKEYWORD("void", Type::getVoidTy(Context));
916 TYPEKEYWORD("half", Type::getHalfTy(Context));
917 TYPEKEYWORD("bfloat", Type::getBFloatTy(Context));
918 TYPEKEYWORD("float", Type::getFloatTy(Context));
919 TYPEKEYWORD("double", Type::getDoubleTy(Context));
920 TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
921 TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
922 TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
923 TYPEKEYWORD("label", Type::getLabelTy(Context));
924 TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
925 TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
926 TYPEKEYWORD("token", Type::getTokenTy(Context));
927 TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
928
929#undef TYPEKEYWORD
930
931 // Keywords for instructions.
932#define INSTKEYWORD(STR, Enum) \
933 do { \
934 if (Keyword == #STR) { \
935 UIntVal = Instruction::Enum; \
936 return lltok::kw_##STR; \
937 } \
938 } while (false)
939
940 INSTKEYWORD(fneg, FNeg);
941
942 INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
943 INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
944 INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
945 INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
946 INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
947 INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
948 INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
949 INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
950
951 INSTKEYWORD(phi, PHI);
952 INSTKEYWORD(call, Call);
953 INSTKEYWORD(trunc, Trunc);
954 INSTKEYWORD(zext, ZExt);
955 INSTKEYWORD(sext, SExt);
956 INSTKEYWORD(fptrunc, FPTrunc);
957 INSTKEYWORD(fpext, FPExt);
958 INSTKEYWORD(uitofp, UIToFP);
959 INSTKEYWORD(sitofp, SIToFP);
960 INSTKEYWORD(fptoui, FPToUI);
961 INSTKEYWORD(fptosi, FPToSI);
962 INSTKEYWORD(inttoptr, IntToPtr);
963 INSTKEYWORD(ptrtoaddr, PtrToAddr);
964 INSTKEYWORD(ptrtoint, PtrToInt);
965 INSTKEYWORD(bitcast, BitCast);
966 INSTKEYWORD(addrspacecast, AddrSpaceCast);
967 INSTKEYWORD(select, Select);
968 INSTKEYWORD(va_arg, VAArg);
969 INSTKEYWORD(ret, Ret);
970 INSTKEYWORD(switch, Switch);
971 INSTKEYWORD(indirectbr, IndirectBr);
972 INSTKEYWORD(invoke, Invoke);
973 INSTKEYWORD(resume, Resume);
974 INSTKEYWORD(unreachable, Unreachable);
975 INSTKEYWORD(callbr, CallBr);
976
977 INSTKEYWORD(alloca, Alloca);
978 INSTKEYWORD(load, Load);
979 INSTKEYWORD(store, Store);
980 INSTKEYWORD(cmpxchg, AtomicCmpXchg);
981 INSTKEYWORD(atomicrmw, AtomicRMW);
982 INSTKEYWORD(fence, Fence);
983 INSTKEYWORD(getelementptr, GetElementPtr);
984
985 INSTKEYWORD(extractelement, ExtractElement);
986 INSTKEYWORD(insertelement, InsertElement);
987 INSTKEYWORD(shufflevector, ShuffleVector);
988 INSTKEYWORD(extractvalue, ExtractValue);
989 INSTKEYWORD(insertvalue, InsertValue);
990 INSTKEYWORD(landingpad, LandingPad);
991 INSTKEYWORD(cleanupret, CleanupRet);
992 INSTKEYWORD(catchret, CatchRet);
993 INSTKEYWORD(catchswitch, CatchSwitch);
994 INSTKEYWORD(catchpad, CatchPad);
995 INSTKEYWORD(cleanuppad, CleanupPad);
996
997 INSTKEYWORD(freeze, Freeze);
998
999#undef INSTKEYWORD
1000
1001#define DWKEYWORD(TYPE, TOKEN) \
1002 do { \
1003 if (Keyword.starts_with("DW_" #TYPE "_")) { \
1004 StrVal.assign(Keyword.begin(), Keyword.end()); \
1005 return lltok::TOKEN; \
1006 } \
1007 } while (false)
1008
1009 DWKEYWORD(TAG, DwarfTag);
1010 DWKEYWORD(ATE, DwarfAttEncoding);
1011 DWKEYWORD(VIRTUALITY, DwarfVirtuality);
1012 DWKEYWORD(LLVM_LANG_DIALECT, DwarfLangDialect);
1013 DWKEYWORD(LANG, DwarfLang);
1014 DWKEYWORD(LNAME, DwarfSourceLangName);
1015 DWKEYWORD(CC, DwarfCC);
1016 DWKEYWORD(OP, DwarfOp);
1017 DWKEYWORD(MACINFO, DwarfMacinfo);
1018 DWKEYWORD(APPLE_ENUM_KIND, DwarfEnumKind);
1019
1020#undef DWKEYWORD
1021
1022// Keywords for debug record types.
1023#define DBGRECORDTYPEKEYWORD(STR) \
1024 do { \
1025 if (Keyword == "dbg_" #STR) { \
1026 StrVal = #STR; \
1027 return lltok::DbgRecordType; \
1028 } \
1029 } while (false)
1030
1031 DBGRECORDTYPEKEYWORD(value);
1032 DBGRECORDTYPEKEYWORD(declare);
1033 DBGRECORDTYPEKEYWORD(assign);
1034 DBGRECORDTYPEKEYWORD(label);
1035 DBGRECORDTYPEKEYWORD(declare_value);
1036#undef DBGRECORDTYPEKEYWORD
1037
1038 if (Keyword.starts_with(Prefix: "DIFlag")) {
1039 StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1040 return lltok::DIFlag;
1041 }
1042
1043 if (Keyword.starts_with(Prefix: "DISPFlag")) {
1044 StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1045 return lltok::DISPFlag;
1046 }
1047
1048 if (Keyword.starts_with(Prefix: "CSK_")) {
1049 StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1050 return lltok::ChecksumKind;
1051 }
1052
1053 if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
1054 Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
1055 StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1056 return lltok::EmissionKind;
1057 }
1058
1059 if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" ||
1060 Keyword == "Default") {
1061 StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1062 return lltok::NameTableKind;
1063 }
1064
1065 if (Keyword == "Binary" || Keyword == "Decimal" || Keyword == "Rational") {
1066 StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1067 return lltok::FixedPointKind;
1068 }
1069
1070 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
1071 // the CFE to avoid forcing it to deal with 64-bit numbers. Also check for
1072 // f0x[0-9A-Fa-f]+, which is the floating-point hexadecimal literal constant.
1073 if ((TokStart[0] == 'u' || TokStart[0] == 's' || TokStart[0] == 'f') &&
1074 TokStart[1] == '0' && TokStart[2] == 'x' &&
1075 isxdigit(static_cast<unsigned char>(TokStart[3]))) {
1076 bool IsFloatConst = TokStart[0] == 'f';
1077 size_t Len = CurPtr - TokStart - 3;
1078 uint32_t Bits = Len * 4;
1079 StringRef HexStr(TokStart + 3, Len);
1080 if (!all_of(Range&: HexStr, P: isxdigit)) {
1081 // Bad token, return it as an error.
1082 CurPtr = TokStart + 3;
1083 return lltok::Error;
1084 }
1085 APInt Tmp(Bits, HexStr, 16);
1086 uint32_t ActiveBits = Tmp.getActiveBits();
1087 if (!IsFloatConst && ActiveBits > 0 && ActiveBits < Bits)
1088 Tmp = Tmp.trunc(width: ActiveBits);
1089 APSIntVal = APSInt(Tmp, TokStart[0] != 's');
1090 return IsFloatConst ? lltok::FloatHexLiteral : lltok::APSInt;
1091 }
1092
1093 // If this is "cc1234", return this as just "cc".
1094 if (TokStart[0] == 'c' && TokStart[1] == 'c') {
1095 CurPtr = TokStart+2;
1096 return lltok::kw_cc;
1097 }
1098
1099 // Finally, if this isn't known, return an error.
1100 CurPtr = TokStart+1;
1101 return lltok::Error;
1102}
1103
1104/// Lex all tokens that start with a 0x prefix, knowing they match and are not
1105/// labels.
1106/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
1107/// HexFPConstant 0x[0-9A-Fa-f]+
1108/// HexFP80Constant 0xK[0-9A-Fa-f]+
1109/// HexFP128Constant 0xL[0-9A-Fa-f]+
1110/// HexPPC128Constant 0xM[0-9A-Fa-f]+
1111/// HexHalfConstant 0xH[0-9A-Fa-f]+
1112/// HexBFloatConstant 0xR[0-9A-Fa-f]+
1113lltok::Kind LLLexer::Lex0x() {
1114 CurPtr = TokStart + 2;
1115
1116 char Kind;
1117 if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
1118 CurPtr[0] == 'R') {
1119 Kind = *CurPtr++;
1120 } else {
1121 Kind = 'J';
1122 }
1123
1124 if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
1125 // Bad token, return it as an error.
1126 CurPtr = TokStart+1;
1127 return lltok::Error;
1128 }
1129
1130 while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
1131 ++CurPtr;
1132
1133 if (*CurPtr == '.') {
1134 // HexFPLiteral, following C's %a syntax
1135 return LexFloatStr();
1136 }
1137
1138 if (Kind == 'J') {
1139 // HexFPConstant - Floating point constant represented in IEEE format as a
1140 // hexadecimal number for when exponential notation is not precise enough.
1141 // Half, BFloat, Float, and double only.
1142 APFloatVal = APFloat(APFloat::IEEEdouble(),
1143 APInt(64, HexIntToVal(Buffer: TokStart + 2, End: CurPtr)));
1144 return lltok::APFloat;
1145 }
1146
1147 uint64_t Pair[2];
1148 switch (Kind) {
1149 default:
1150 llvm_unreachable("Unknown kind!");
1151 case 'K':
1152 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
1153 FP80HexToIntPair(Buffer: TokStart + 3, End: CurPtr, Pair);
1154 APSIntVal = APInt(80, Pair);
1155 return lltok::FloatHexLiteral;
1156 case 'L':
1157 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
1158 HexToIntPair(Buffer: TokStart + 3, End: CurPtr, Pair);
1159 APSIntVal = APInt(128, Pair);
1160 return lltok::FloatHexLiteral;
1161 case 'M':
1162 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
1163 HexToIntPair(Buffer: TokStart + 3, End: CurPtr, Pair);
1164 APSIntVal = APInt(128, Pair);
1165 return lltok::FloatHexLiteral;
1166 case 'H': {
1167 uint64_t Val = HexIntToVal(Buffer: TokStart + 3, End: CurPtr);
1168 if (!llvm::isUInt<16>(x: Val)) {
1169 LexError(Msg: "hexadecimal constant too large for half (16-bit)");
1170 return lltok::Error;
1171 }
1172 APSIntVal = APInt(16, Val);
1173 return lltok::FloatHexLiteral;
1174 }
1175 case 'R': {
1176 // Brain floating point
1177 uint64_t Val = HexIntToVal(Buffer: TokStart + 3, End: CurPtr);
1178 if (!llvm::isUInt<16>(x: Val)) {
1179 LexError(Msg: "hexadecimal constant too large for bfloat (16-bit)");
1180 return lltok::Error;
1181 }
1182 APSIntVal = APInt(16, Val);
1183 return lltok::FloatHexLiteral;
1184 }
1185 }
1186}
1187
1188/// Lex tokens for a label or a numeric constant, possibly starting with -.
1189/// Label [-a-zA-Z$._0-9]+:
1190/// NInteger -[0-9]+
1191/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1192/// PInteger [0-9]+
1193/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
1194/// HexFPConstant 0x[0-9A-Fa-f]+
1195/// HexFP80Constant 0xK[0-9A-Fa-f]+
1196/// HexFP128Constant 0xL[0-9A-Fa-f]+
1197/// HexPPC128Constant 0xM[0-9A-Fa-f]+
1198lltok::Kind LLLexer::LexDigitOrNegative() {
1199 // If the letter after the negative is not a number, this is probably a label.
1200 if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
1201 !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
1202 // Okay, this is not a number after the -, it's probably a label.
1203 if (const char *End = isLabelTail(CurPtr)) {
1204 StrVal.assign(first: TokStart, last: End-1);
1205 CurPtr = End;
1206 return lltok::LabelStr;
1207 }
1208
1209 // It might be a -inf, -nan, etc. Check if it's a float string (which will
1210 // also handle error conditions there).
1211 return LexFloatStr();
1212 }
1213
1214 // At this point, it is either a label, int or fp constant.
1215
1216 // Skip digits, we have at least one.
1217 for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1218 /*empty*/;
1219
1220 // Check if this is a fully-numeric label:
1221 if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
1222 uint64_t Val = atoull(Buffer: TokStart, End: CurPtr);
1223 ++CurPtr; // Skip the colon.
1224 if ((unsigned)Val != Val)
1225 LexError(Msg: "invalid value number (too large)");
1226 UIntVal = unsigned(Val);
1227 return lltok::LabelID;
1228 }
1229
1230 // Check to see if this really is a string label, e.g. "-1:".
1231 if (isLabelChar(C: CurPtr[0]) || CurPtr[0] == ':') {
1232 if (const char *End = isLabelTail(CurPtr)) {
1233 StrVal.assign(first: TokStart, last: End-1);
1234 CurPtr = End;
1235 return lltok::LabelStr;
1236 }
1237 }
1238
1239 // If the next character is a '.', then it is a fp value, otherwise its
1240 // integer.
1241 if (CurPtr[0] != '.') {
1242 if (TokStart[0] == '0' && TokStart[1] == 'x')
1243 return Lex0x();
1244 if (TokStart[0] == '-' && TokStart[1] == '0' && TokStart[2] == 'x')
1245 return LexFloatStr();
1246
1247 APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
1248 return lltok::APSInt;
1249 }
1250
1251 ++CurPtr;
1252
1253 // Skip over [0-9]*([eE][-+]?[0-9]+)?
1254 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1255
1256 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1257 if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1258 ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1259 isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1260 CurPtr += 2;
1261 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1262 }
1263 }
1264
1265 StrVal.assign(s: TokStart, n: CurPtr - TokStart);
1266 return lltok::FloatLiteral;
1267}
1268
1269/// Lex a floating point constant starting with +.
1270/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1271/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
1272/// HexFPSpecial [-+](inf|qnan|s?nan\(0x[0-9A-Fa-f]+\))
1273lltok::Kind LLLexer::LexPositive() {
1274 // If it's not numeric, check for special floating-point values.
1275 if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
1276 return LexFloatStr();
1277
1278 // Skip digits.
1279 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1280 /*empty*/;
1281
1282 // If the first non-digit is an x, check if it's a hex FP literal. LexFloatStr
1283 // will reanalyze TokStr..CurPtr to make sure that it's 0x and not 413x.
1284 if (CurPtr[0] == 'x')
1285 return LexFloatStr();
1286
1287 // At this point, we need a '.'.
1288 if (CurPtr[0] != '.') {
1289 CurPtr = TokStart + 1;
1290 return lltok::Error;
1291 }
1292
1293 ++CurPtr;
1294
1295 // Skip over [0-9]*([eE][-+]?[0-9]+)?
1296 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1297
1298 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1299 if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1300 ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1301 isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1302 CurPtr += 2;
1303 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1304 }
1305 }
1306
1307 StrVal.assign(s: TokStart, n: CurPtr - TokStart);
1308 return lltok::FloatLiteral;
1309}
1310
1311/// Lex all tokens that start with a + or - that could be a float literal.
1312/// HexFPLiteral [-+]?0x[0-9A-Fa-f]+.[0-9A-Fa-f]*[pP][-+]?[0-9]+
1313/// HexFPSpecial [-+](inf|qnan|s?nan\(0x[0-9A-Fa-f]+\))
1314lltok::Kind LLLexer::LexFloatStr() {
1315 // At the point we enter this function, we may have seen a few characters
1316 // already, but how many differs based on the entry point. Rewind to the
1317 // beginning just in case.
1318 CurPtr = TokStart;
1319
1320 // Check for optional sign.
1321 if (*CurPtr == '-' || *CurPtr == '+')
1322 ++CurPtr;
1323
1324 if (*CurPtr != '0') {
1325 // Check for keywords.
1326 const char *LabelStart = CurPtr;
1327 while (isLabelChar(C: *CurPtr))
1328 ++CurPtr;
1329 StringRef Label(LabelStart, CurPtr - LabelStart);
1330
1331 // Basic special values.
1332 if (Label == "inf") {
1333 // Copy from the beginning, to include the sign.
1334 StrVal.assign(s: TokStart, n: CurPtr - TokStart);
1335 return lltok::FloatLiteral;
1336 }
1337
1338 // APFloat::convertFromString doesn't support qnan, so translate it to a
1339 // nan payload string it does support.
1340 if (Label == "qnan") {
1341 StrVal = *TokStart == '-' ? "-nan(0)" : "nan(0)";
1342 return lltok::FloatLiteral;
1343 }
1344
1345 // NaN with payload.
1346 if ((Label == "nan" || Label == "snan") && *CurPtr == '(') {
1347 const char *Payload = ++CurPtr;
1348 while (*CurPtr && *CurPtr != ')')
1349 ++CurPtr;
1350
1351 // If no close parenthesis, it's a bad token, return it as an error.
1352 if (*CurPtr++ != ')') {
1353 CurPtr = TokStart + 1;
1354 LexError(Msg: "unclosed nan literal");
1355 return lltok::Error;
1356 }
1357
1358 StringRef PayloadStr(Payload, CurPtr - Payload);
1359 APInt Val;
1360 if (PayloadStr.consume_front(Prefix: "0x") && PayloadStr.getAsInteger(Radix: 16, Result&: Val)) {
1361 StrVal.assign(s: TokStart, n: CurPtr - TokStart);
1362 // Drop the leading + from the string, as APFloat::convertFromString
1363 // doesn't support leading + sign.
1364 if (StrVal[0] == '+')
1365 StrVal.erase(pos: 0, n: 1);
1366 return lltok::FloatLiteral;
1367 }
1368 }
1369
1370 // Bad token, return it as an error.
1371 LexError(Msg: "bad payload format for nan literal");
1372 CurPtr = TokStart + 1;
1373 return lltok::Error;
1374 }
1375 ++CurPtr;
1376
1377 if (*CurPtr++ != 'x') {
1378 // Bad token, return it as an error.
1379 CurPtr = TokStart + 1;
1380 return lltok::Error;
1381 }
1382
1383 if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
1384 // Bad token, return it as an error.
1385 CurPtr = TokStart + 1;
1386 return lltok::Error;
1387 }
1388
1389 while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
1390 ++CurPtr;
1391
1392 if (*CurPtr != '.') {
1393 // Bad token, return it as an error.
1394 CurPtr = TokStart + 1;
1395 return lltok::Error;
1396 }
1397
1398 ++CurPtr; // Eat the .
1399 while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
1400 ++CurPtr;
1401
1402 if (*CurPtr != 'p' && *CurPtr != 'P') {
1403 // Bad token, return it as an error.
1404 CurPtr = TokStart + 1;
1405 return lltok::Error;
1406 }
1407
1408 ++CurPtr;
1409 if (*CurPtr == '+' || *CurPtr == '-')
1410 ++CurPtr;
1411 while (isdigit(static_cast<unsigned char>(CurPtr[0])))
1412 ++CurPtr;
1413
1414 StrVal.assign(s: TokStart, n: CurPtr - TokStart);
1415 return lltok::FloatLiteral;
1416}
1417