1 | //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implement the Lexer for .ll files. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/AsmParser/LLLexer.h" |
14 | #include "llvm/ADT/APInt.h" |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/ADT/StringExtras.h" |
17 | #include "llvm/ADT/Twine.h" |
18 | #include "llvm/IR/DerivedTypes.h" |
19 | #include "llvm/IR/Instruction.h" |
20 | #include "llvm/Support/ErrorHandling.h" |
21 | #include "llvm/Support/SourceMgr.h" |
22 | #include <cassert> |
23 | #include <cctype> |
24 | #include <cstdio> |
25 | |
26 | using namespace llvm; |
27 | |
28 | bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { |
29 | ErrorInfo = SM.GetMessage(Loc: ErrorLoc, Kind: SourceMgr::DK_Error, Msg); |
30 | return true; |
31 | } |
32 | |
33 | void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { |
34 | SM.PrintMessage(Loc: WarningLoc, Kind: SourceMgr::DK_Warning, Msg); |
35 | } |
36 | |
37 | //===----------------------------------------------------------------------===// |
38 | // Helper functions. |
39 | //===----------------------------------------------------------------------===// |
40 | |
41 | // atoull - Convert an ascii string of decimal digits into the unsigned long |
42 | // long representation... this does not have to do input error checking, |
43 | // because we know that the input will be matched by a suitable regex... |
44 | // |
45 | uint64_t LLLexer::atoull(const char *Buffer, const char *End) { |
46 | uint64_t Result = 0; |
47 | for (; Buffer != End; Buffer++) { |
48 | uint64_t OldRes = Result; |
49 | Result *= 10; |
50 | Result += *Buffer-'0'; |
51 | if (Result < OldRes) { // Uh, oh, overflow detected!!! |
52 | Error(Msg: "constant bigger than 64 bits detected!" ); |
53 | return 0; |
54 | } |
55 | } |
56 | return Result; |
57 | } |
58 | |
59 | uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { |
60 | uint64_t Result = 0; |
61 | for (; Buffer != End; ++Buffer) { |
62 | uint64_t OldRes = Result; |
63 | Result *= 16; |
64 | Result += hexDigitValue(C: *Buffer); |
65 | |
66 | if (Result < OldRes) { // Uh, oh, overflow detected!!! |
67 | Error(Msg: "constant bigger than 64 bits detected!" ); |
68 | return 0; |
69 | } |
70 | } |
71 | return Result; |
72 | } |
73 | |
74 | void LLLexer::HexToIntPair(const char *Buffer, const char *End, |
75 | uint64_t Pair[2]) { |
76 | Pair[0] = 0; |
77 | if (End - Buffer >= 16) { |
78 | for (int i = 0; i < 16; i++, Buffer++) { |
79 | assert(Buffer != End); |
80 | Pair[0] *= 16; |
81 | Pair[0] += hexDigitValue(C: *Buffer); |
82 | } |
83 | } |
84 | Pair[1] = 0; |
85 | for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { |
86 | Pair[1] *= 16; |
87 | Pair[1] += hexDigitValue(C: *Buffer); |
88 | } |
89 | if (Buffer != End) |
90 | Error(Msg: "constant bigger than 128 bits detected!" ); |
91 | } |
92 | |
93 | /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into |
94 | /// { low64, high16 } as usual for an APInt. |
95 | void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, |
96 | uint64_t Pair[2]) { |
97 | Pair[1] = 0; |
98 | for (int i=0; i<4 && Buffer != End; i++, Buffer++) { |
99 | assert(Buffer != End); |
100 | Pair[1] *= 16; |
101 | Pair[1] += hexDigitValue(C: *Buffer); |
102 | } |
103 | Pair[0] = 0; |
104 | for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { |
105 | Pair[0] *= 16; |
106 | Pair[0] += hexDigitValue(C: *Buffer); |
107 | } |
108 | if (Buffer != End) |
109 | Error(Msg: "constant bigger than 128 bits detected!" ); |
110 | } |
111 | |
112 | // UnEscapeLexed - Run through the specified buffer and change \xx codes to the |
113 | // appropriate character. |
114 | static void UnEscapeLexed(std::string &Str) { |
115 | if (Str.empty()) return; |
116 | |
117 | char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); |
118 | char *BOut = Buffer; |
119 | for (char *BIn = Buffer; BIn != EndBuffer; ) { |
120 | if (BIn[0] == '\\') { |
121 | if (BIn < EndBuffer-1 && BIn[1] == '\\') { |
122 | *BOut++ = '\\'; // Two \ becomes one |
123 | BIn += 2; |
124 | } else if (BIn < EndBuffer-2 && |
125 | isxdigit(static_cast<unsigned char>(BIn[1])) && |
126 | isxdigit(static_cast<unsigned char>(BIn[2]))) { |
127 | *BOut = hexDigitValue(C: BIn[1]) * 16 + hexDigitValue(C: BIn[2]); |
128 | BIn += 3; // Skip over handled chars |
129 | ++BOut; |
130 | } else { |
131 | *BOut++ = *BIn++; |
132 | } |
133 | } else { |
134 | *BOut++ = *BIn++; |
135 | } |
136 | } |
137 | Str.resize(n: BOut-Buffer); |
138 | } |
139 | |
140 | /// isLabelChar - Return true for [-a-zA-Z$._0-9]. |
141 | static bool isLabelChar(char C) { |
142 | return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' || |
143 | C == '.' || C == '_'; |
144 | } |
145 | |
146 | /// isLabelTail - Return true if this pointer points to a valid end of a label. |
147 | static const char *isLabelTail(const char *CurPtr) { |
148 | while (true) { |
149 | if (CurPtr[0] == ':') return CurPtr+1; |
150 | if (!isLabelChar(C: CurPtr[0])) return nullptr; |
151 | ++CurPtr; |
152 | } |
153 | } |
154 | |
155 | //===----------------------------------------------------------------------===// |
156 | // Lexer definition. |
157 | //===----------------------------------------------------------------------===// |
158 | |
159 | LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, |
160 | LLVMContext &C) |
161 | : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) { |
162 | CurPtr = CurBuf.begin(); |
163 | } |
164 | |
165 | int LLLexer::getNextChar() { |
166 | char CurChar = *CurPtr++; |
167 | switch (CurChar) { |
168 | default: return (unsigned char)CurChar; |
169 | case 0: |
170 | // A nul character in the stream is either the end of the current buffer or |
171 | // a random nul in the file. Disambiguate that here. |
172 | if (CurPtr-1 != CurBuf.end()) |
173 | return 0; // Just whitespace. |
174 | |
175 | // Otherwise, return end of file. |
176 | --CurPtr; // Another call to lex will return EOF again. |
177 | return EOF; |
178 | } |
179 | } |
180 | |
181 | lltok::Kind LLLexer::LexToken() { |
182 | while (true) { |
183 | TokStart = CurPtr; |
184 | |
185 | int CurChar = getNextChar(); |
186 | switch (CurChar) { |
187 | default: |
188 | // Handle letters: [a-zA-Z_] |
189 | if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_') |
190 | return LexIdentifier(); |
191 | |
192 | return lltok::Error; |
193 | case EOF: return lltok::Eof; |
194 | case 0: |
195 | case ' ': |
196 | case '\t': |
197 | case '\n': |
198 | case '\r': |
199 | // Ignore whitespace. |
200 | continue; |
201 | case '+': return LexPositive(); |
202 | case '@': return LexAt(); |
203 | case '$': return LexDollar(); |
204 | case '%': return LexPercent(); |
205 | case '"': return LexQuote(); |
206 | case '.': |
207 | if (const char *Ptr = isLabelTail(CurPtr)) { |
208 | CurPtr = Ptr; |
209 | StrVal.assign(first: TokStart, last: CurPtr-1); |
210 | return lltok::LabelStr; |
211 | } |
212 | if (CurPtr[0] == '.' && CurPtr[1] == '.') { |
213 | CurPtr += 2; |
214 | return lltok::dotdotdot; |
215 | } |
216 | return lltok::Error; |
217 | case ';': |
218 | SkipLineComment(); |
219 | continue; |
220 | case '!': return LexExclaim(); |
221 | case '^': |
222 | return LexCaret(); |
223 | case ':': |
224 | return lltok::colon; |
225 | case '#': return LexHash(); |
226 | case '0': case '1': case '2': case '3': case '4': |
227 | case '5': case '6': case '7': case '8': case '9': |
228 | case '-': |
229 | return LexDigitOrNegative(); |
230 | case '=': return lltok::equal; |
231 | case '[': return lltok::lsquare; |
232 | case ']': return lltok::rsquare; |
233 | case '{': return lltok::lbrace; |
234 | case '}': return lltok::rbrace; |
235 | case '<': return lltok::less; |
236 | case '>': return lltok::greater; |
237 | case '(': return lltok::lparen; |
238 | case ')': return lltok::rparen; |
239 | case ',': return lltok::comma; |
240 | case '*': return lltok::star; |
241 | case '|': return lltok::bar; |
242 | } |
243 | } |
244 | } |
245 | |
246 | void LLLexer::() { |
247 | while (true) { |
248 | if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) |
249 | return; |
250 | } |
251 | } |
252 | |
253 | /// Lex all tokens that start with an @ character. |
254 | /// GlobalVar @\"[^\"]*\" |
255 | /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* |
256 | /// GlobalVarID @[0-9]+ |
257 | lltok::Kind LLLexer::LexAt() { |
258 | return LexVar(Var: lltok::GlobalVar, VarID: lltok::GlobalID); |
259 | } |
260 | |
261 | lltok::Kind LLLexer::LexDollar() { |
262 | if (const char *Ptr = isLabelTail(CurPtr: TokStart)) { |
263 | CurPtr = Ptr; |
264 | StrVal.assign(first: TokStart, last: CurPtr - 1); |
265 | return lltok::LabelStr; |
266 | } |
267 | |
268 | // Handle DollarStringConstant: $\"[^\"]*\" |
269 | if (CurPtr[0] == '"') { |
270 | ++CurPtr; |
271 | |
272 | while (true) { |
273 | int CurChar = getNextChar(); |
274 | |
275 | if (CurChar == EOF) { |
276 | Error(Msg: "end of file in COMDAT variable name" ); |
277 | return lltok::Error; |
278 | } |
279 | if (CurChar == '"') { |
280 | StrVal.assign(first: TokStart + 2, last: CurPtr - 1); |
281 | UnEscapeLexed(Str&: StrVal); |
282 | if (StringRef(StrVal).contains(C: 0)) { |
283 | Error(Msg: "Null bytes are not allowed in names" ); |
284 | return lltok::Error; |
285 | } |
286 | return lltok::ComdatVar; |
287 | } |
288 | } |
289 | } |
290 | |
291 | // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]* |
292 | if (ReadVarName()) |
293 | return lltok::ComdatVar; |
294 | |
295 | return lltok::Error; |
296 | } |
297 | |
298 | /// ReadString - Read a string until the closing quote. |
299 | lltok::Kind LLLexer::ReadString(lltok::Kind kind) { |
300 | const char *Start = CurPtr; |
301 | while (true) { |
302 | int CurChar = getNextChar(); |
303 | |
304 | if (CurChar == EOF) { |
305 | Error(Msg: "end of file in string constant" ); |
306 | return lltok::Error; |
307 | } |
308 | if (CurChar == '"') { |
309 | StrVal.assign(first: Start, last: CurPtr-1); |
310 | UnEscapeLexed(Str&: StrVal); |
311 | return kind; |
312 | } |
313 | } |
314 | } |
315 | |
316 | /// ReadVarName - Read the rest of a token containing a variable name. |
317 | bool LLLexer::ReadVarName() { |
318 | const char *NameStart = CurPtr; |
319 | if (isalpha(static_cast<unsigned char>(CurPtr[0])) || |
320 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
321 | CurPtr[0] == '.' || CurPtr[0] == '_') { |
322 | ++CurPtr; |
323 | while (isalnum(static_cast<unsigned char>(CurPtr[0])) || |
324 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
325 | CurPtr[0] == '.' || CurPtr[0] == '_') |
326 | ++CurPtr; |
327 | |
328 | StrVal.assign(first: NameStart, last: CurPtr); |
329 | return true; |
330 | } |
331 | return false; |
332 | } |
333 | |
334 | // Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is |
335 | // returned, otherwise the Error token is returned. |
336 | lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { |
337 | if (!isdigit(static_cast<unsigned char>(CurPtr[0]))) |
338 | return lltok::Error; |
339 | |
340 | for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) |
341 | /*empty*/; |
342 | |
343 | uint64_t Val = atoull(Buffer: TokStart + 1, End: CurPtr); |
344 | if ((unsigned)Val != Val) |
345 | Error(Msg: "invalid value number (too large)!" ); |
346 | UIntVal = unsigned(Val); |
347 | return Token; |
348 | } |
349 | |
350 | lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { |
351 | // Handle StringConstant: \"[^\"]*\" |
352 | if (CurPtr[0] == '"') { |
353 | ++CurPtr; |
354 | |
355 | while (true) { |
356 | int CurChar = getNextChar(); |
357 | |
358 | if (CurChar == EOF) { |
359 | Error(Msg: "end of file in global variable name" ); |
360 | return lltok::Error; |
361 | } |
362 | if (CurChar == '"') { |
363 | StrVal.assign(first: TokStart+2, last: CurPtr-1); |
364 | UnEscapeLexed(Str&: StrVal); |
365 | if (StringRef(StrVal).contains(C: 0)) { |
366 | Error(Msg: "Null bytes are not allowed in names" ); |
367 | return lltok::Error; |
368 | } |
369 | return Var; |
370 | } |
371 | } |
372 | } |
373 | |
374 | // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]* |
375 | if (ReadVarName()) |
376 | return Var; |
377 | |
378 | // Handle VarID: [0-9]+ |
379 | return LexUIntID(Token: VarID); |
380 | } |
381 | |
382 | /// Lex all tokens that start with a % character. |
383 | /// LocalVar ::= %\"[^\"]*\" |
384 | /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* |
385 | /// LocalVarID ::= %[0-9]+ |
386 | lltok::Kind LLLexer::LexPercent() { |
387 | return LexVar(Var: lltok::LocalVar, VarID: lltok::LocalVarID); |
388 | } |
389 | |
390 | /// Lex all tokens that start with a " character. |
391 | /// QuoteLabel "[^"]+": |
392 | /// StringConstant "[^"]*" |
393 | lltok::Kind LLLexer::LexQuote() { |
394 | lltok::Kind kind = ReadString(kind: lltok::StringConstant); |
395 | if (kind == lltok::Error || kind == lltok::Eof) |
396 | return kind; |
397 | |
398 | if (CurPtr[0] == ':') { |
399 | ++CurPtr; |
400 | if (StringRef(StrVal).contains(C: 0)) { |
401 | Error(Msg: "Null bytes are not allowed in names" ); |
402 | kind = lltok::Error; |
403 | } else { |
404 | kind = lltok::LabelStr; |
405 | } |
406 | } |
407 | |
408 | return kind; |
409 | } |
410 | |
411 | /// Lex all tokens that start with a ! character. |
412 | /// !foo |
413 | /// ! |
414 | lltok::Kind LLLexer::LexExclaim() { |
415 | // Lex a metadata name as a MetadataVar. |
416 | if (isalpha(static_cast<unsigned char>(CurPtr[0])) || |
417 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
418 | CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { |
419 | ++CurPtr; |
420 | while (isalnum(static_cast<unsigned char>(CurPtr[0])) || |
421 | CurPtr[0] == '-' || CurPtr[0] == '$' || |
422 | CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') |
423 | ++CurPtr; |
424 | |
425 | StrVal.assign(first: TokStart+1, last: CurPtr); // Skip ! |
426 | UnEscapeLexed(Str&: StrVal); |
427 | return lltok::MetadataVar; |
428 | } |
429 | return lltok::exclaim; |
430 | } |
431 | |
432 | /// Lex all tokens that start with a ^ character. |
433 | /// SummaryID ::= ^[0-9]+ |
434 | lltok::Kind LLLexer::LexCaret() { |
435 | // Handle SummaryID: ^[0-9]+ |
436 | return LexUIntID(Token: lltok::SummaryID); |
437 | } |
438 | |
439 | /// Lex all tokens that start with a # character. |
440 | /// AttrGrpID ::= #[0-9]+ |
441 | /// Hash ::= # |
442 | lltok::Kind LLLexer::LexHash() { |
443 | // Handle AttrGrpID: #[0-9]+ |
444 | if (isdigit(static_cast<unsigned char>(CurPtr[0]))) |
445 | return LexUIntID(Token: lltok::AttrGrpID); |
446 | return lltok::hash; |
447 | } |
448 | |
449 | /// Lex a label, integer type, keyword, or hexadecimal integer constant. |
450 | /// Label [-a-zA-Z$._0-9]+: |
451 | /// IntegerType i[0-9]+ |
452 | /// Keyword sdiv, float, ... |
453 | /// HexIntConstant [us]0x[0-9A-Fa-f]+ |
454 | lltok::Kind LLLexer::LexIdentifier() { |
455 | const char *StartChar = CurPtr; |
456 | const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; |
457 | const char *KeywordEnd = nullptr; |
458 | |
459 | for (; isLabelChar(C: *CurPtr); ++CurPtr) { |
460 | // If we decide this is an integer, remember the end of the sequence. |
461 | if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr))) |
462 | IntEnd = CurPtr; |
463 | if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) && |
464 | *CurPtr != '_') |
465 | KeywordEnd = CurPtr; |
466 | } |
467 | |
468 | // If we stopped due to a colon, unless we were directed to ignore it, |
469 | // this really is a label. |
470 | if (!IgnoreColonInIdentifiers && *CurPtr == ':') { |
471 | StrVal.assign(first: StartChar-1, last: CurPtr++); |
472 | return lltok::LabelStr; |
473 | } |
474 | |
475 | // Otherwise, this wasn't a label. If this was valid as an integer type, |
476 | // return it. |
477 | if (!IntEnd) IntEnd = CurPtr; |
478 | if (IntEnd != StartChar) { |
479 | CurPtr = IntEnd; |
480 | uint64_t NumBits = atoull(Buffer: StartChar, End: CurPtr); |
481 | if (NumBits < IntegerType::MIN_INT_BITS || |
482 | NumBits > IntegerType::MAX_INT_BITS) { |
483 | Error(Msg: "bitwidth for integer type out of range!" ); |
484 | return lltok::Error; |
485 | } |
486 | TyVal = IntegerType::get(C&: Context, NumBits); |
487 | return lltok::Type; |
488 | } |
489 | |
490 | // Otherwise, this was a letter sequence. See which keyword this is. |
491 | if (!KeywordEnd) KeywordEnd = CurPtr; |
492 | CurPtr = KeywordEnd; |
493 | --StartChar; |
494 | StringRef Keyword(StartChar, CurPtr - StartChar); |
495 | |
496 | #define KEYWORD(STR) \ |
497 | do { \ |
498 | if (Keyword == #STR) \ |
499 | return lltok::kw_##STR; \ |
500 | } while (false) |
501 | |
502 | KEYWORD(true); KEYWORD(false); |
503 | KEYWORD(declare); KEYWORD(define); |
504 | KEYWORD(global); KEYWORD(constant); |
505 | |
506 | KEYWORD(dso_local); |
507 | KEYWORD(dso_preemptable); |
508 | |
509 | KEYWORD(private); |
510 | KEYWORD(internal); |
511 | KEYWORD(available_externally); |
512 | KEYWORD(linkonce); |
513 | KEYWORD(linkonce_odr); |
514 | KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg". |
515 | KEYWORD(weak_odr); |
516 | KEYWORD(appending); |
517 | KEYWORD(dllimport); |
518 | KEYWORD(dllexport); |
519 | KEYWORD(common); |
520 | KEYWORD(default); |
521 | KEYWORD(hidden); |
522 | KEYWORD(protected); |
523 | KEYWORD(unnamed_addr); |
524 | KEYWORD(local_unnamed_addr); |
525 | KEYWORD(externally_initialized); |
526 | KEYWORD(extern_weak); |
527 | KEYWORD(external); |
528 | KEYWORD(thread_local); |
529 | KEYWORD(localdynamic); |
530 | KEYWORD(initialexec); |
531 | KEYWORD(localexec); |
532 | KEYWORD(zeroinitializer); |
533 | KEYWORD(undef); |
534 | KEYWORD(null); |
535 | KEYWORD(none); |
536 | KEYWORD(poison); |
537 | KEYWORD(to); |
538 | KEYWORD(caller); |
539 | KEYWORD(within); |
540 | KEYWORD(from); |
541 | KEYWORD(tail); |
542 | KEYWORD(musttail); |
543 | KEYWORD(notail); |
544 | KEYWORD(target); |
545 | KEYWORD(triple); |
546 | KEYWORD(source_filename); |
547 | KEYWORD(unwind); |
548 | KEYWORD(datalayout); |
549 | KEYWORD(volatile); |
550 | KEYWORD(atomic); |
551 | KEYWORD(unordered); |
552 | KEYWORD(monotonic); |
553 | KEYWORD(acquire); |
554 | KEYWORD(release); |
555 | KEYWORD(acq_rel); |
556 | KEYWORD(seq_cst); |
557 | KEYWORD(syncscope); |
558 | |
559 | KEYWORD(nnan); |
560 | KEYWORD(ninf); |
561 | KEYWORD(nsz); |
562 | KEYWORD(arcp); |
563 | KEYWORD(contract); |
564 | KEYWORD(reassoc); |
565 | KEYWORD(afn); |
566 | KEYWORD(fast); |
567 | KEYWORD(nuw); |
568 | KEYWORD(nsw); |
569 | KEYWORD(nusw); |
570 | KEYWORD(exact); |
571 | KEYWORD(disjoint); |
572 | KEYWORD(inbounds); |
573 | KEYWORD(nneg); |
574 | KEYWORD(inrange); |
575 | KEYWORD(addrspace); |
576 | KEYWORD(section); |
577 | KEYWORD(partition); |
578 | KEYWORD(code_model); |
579 | KEYWORD(alias); |
580 | KEYWORD(ifunc); |
581 | KEYWORD(module); |
582 | KEYWORD(asm); |
583 | KEYWORD(sideeffect); |
584 | KEYWORD(inteldialect); |
585 | KEYWORD(gc); |
586 | KEYWORD(prefix); |
587 | KEYWORD(prologue); |
588 | |
589 | KEYWORD(no_sanitize_address); |
590 | KEYWORD(no_sanitize_hwaddress); |
591 | KEYWORD(sanitize_address_dyninit); |
592 | |
593 | KEYWORD(ccc); |
594 | KEYWORD(fastcc); |
595 | KEYWORD(coldcc); |
596 | KEYWORD(cfguard_checkcc); |
597 | KEYWORD(x86_stdcallcc); |
598 | KEYWORD(x86_fastcallcc); |
599 | KEYWORD(x86_thiscallcc); |
600 | KEYWORD(x86_vectorcallcc); |
601 | KEYWORD(arm_apcscc); |
602 | KEYWORD(arm_aapcscc); |
603 | KEYWORD(arm_aapcs_vfpcc); |
604 | KEYWORD(aarch64_vector_pcs); |
605 | KEYWORD(aarch64_sve_vector_pcs); |
606 | KEYWORD(aarch64_sme_preservemost_from_x0); |
607 | KEYWORD(aarch64_sme_preservemost_from_x1); |
608 | KEYWORD(aarch64_sme_preservemost_from_x2); |
609 | KEYWORD(msp430_intrcc); |
610 | KEYWORD(avr_intrcc); |
611 | KEYWORD(avr_signalcc); |
612 | KEYWORD(ptx_kernel); |
613 | KEYWORD(ptx_device); |
614 | KEYWORD(spir_kernel); |
615 | KEYWORD(spir_func); |
616 | KEYWORD(intel_ocl_bicc); |
617 | KEYWORD(x86_64_sysvcc); |
618 | KEYWORD(win64cc); |
619 | KEYWORD(x86_regcallcc); |
620 | KEYWORD(swiftcc); |
621 | KEYWORD(swifttailcc); |
622 | KEYWORD(anyregcc); |
623 | KEYWORD(preserve_mostcc); |
624 | KEYWORD(preserve_allcc); |
625 | KEYWORD(preserve_nonecc); |
626 | KEYWORD(ghccc); |
627 | KEYWORD(x86_intrcc); |
628 | KEYWORD(hhvmcc); |
629 | KEYWORD(hhvm_ccc); |
630 | KEYWORD(cxx_fast_tlscc); |
631 | KEYWORD(amdgpu_vs); |
632 | KEYWORD(amdgpu_ls); |
633 | KEYWORD(amdgpu_hs); |
634 | KEYWORD(amdgpu_es); |
635 | KEYWORD(amdgpu_gs); |
636 | KEYWORD(amdgpu_ps); |
637 | KEYWORD(amdgpu_cs); |
638 | KEYWORD(amdgpu_cs_chain); |
639 | KEYWORD(amdgpu_cs_chain_preserve); |
640 | KEYWORD(amdgpu_kernel); |
641 | KEYWORD(amdgpu_gfx); |
642 | KEYWORD(tailcc); |
643 | KEYWORD(m68k_rtdcc); |
644 | KEYWORD(graalcc); |
645 | KEYWORD(riscv_vector_cc); |
646 | |
647 | KEYWORD(cc); |
648 | KEYWORD(c); |
649 | |
650 | KEYWORD(attributes); |
651 | KEYWORD(sync); |
652 | KEYWORD(async); |
653 | |
654 | #define GET_ATTR_NAMES |
655 | #define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \ |
656 | KEYWORD(DISPLAY_NAME); |
657 | #include "llvm/IR/Attributes.inc" |
658 | |
659 | KEYWORD(read); |
660 | KEYWORD(write); |
661 | KEYWORD(readwrite); |
662 | KEYWORD(argmem); |
663 | KEYWORD(inaccessiblemem); |
664 | KEYWORD(argmemonly); |
665 | KEYWORD(inaccessiblememonly); |
666 | KEYWORD(inaccessiblemem_or_argmemonly); |
667 | |
668 | // nofpclass attribute |
669 | KEYWORD(all); |
670 | KEYWORD(nan); |
671 | KEYWORD(snan); |
672 | KEYWORD(qnan); |
673 | KEYWORD(inf); |
674 | // ninf already a keyword |
675 | KEYWORD(pinf); |
676 | KEYWORD(norm); |
677 | KEYWORD(nnorm); |
678 | KEYWORD(pnorm); |
679 | // sub already a keyword |
680 | KEYWORD(nsub); |
681 | KEYWORD(psub); |
682 | KEYWORD(zero); |
683 | KEYWORD(nzero); |
684 | KEYWORD(pzero); |
685 | |
686 | KEYWORD(type); |
687 | KEYWORD(opaque); |
688 | |
689 | KEYWORD(comdat); |
690 | |
691 | // Comdat types |
692 | KEYWORD(any); |
693 | KEYWORD(exactmatch); |
694 | KEYWORD(largest); |
695 | KEYWORD(nodeduplicate); |
696 | KEYWORD(samesize); |
697 | |
698 | KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); |
699 | KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); |
700 | KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); |
701 | KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); |
702 | |
703 | KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); |
704 | KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); |
705 | KEYWORD(uinc_wrap); |
706 | KEYWORD(udec_wrap); |
707 | |
708 | KEYWORD(splat); |
709 | KEYWORD(vscale); |
710 | KEYWORD(x); |
711 | KEYWORD(blockaddress); |
712 | KEYWORD(dso_local_equivalent); |
713 | KEYWORD(no_cfi); |
714 | KEYWORD(ptrauth); |
715 | |
716 | // Metadata types. |
717 | KEYWORD(distinct); |
718 | |
719 | // Use-list order directives. |
720 | KEYWORD(uselistorder); |
721 | KEYWORD(uselistorder_bb); |
722 | |
723 | KEYWORD(personality); |
724 | KEYWORD(cleanup); |
725 | KEYWORD(catch); |
726 | KEYWORD(filter); |
727 | |
728 | // Summary index keywords. |
729 | KEYWORD(path); |
730 | KEYWORD(hash); |
731 | KEYWORD(gv); |
732 | KEYWORD(guid); |
733 | KEYWORD(name); |
734 | KEYWORD(summaries); |
735 | KEYWORD(flags); |
736 | KEYWORD(blockcount); |
737 | KEYWORD(linkage); |
738 | KEYWORD(visibility); |
739 | KEYWORD(notEligibleToImport); |
740 | KEYWORD(live); |
741 | KEYWORD(dsoLocal); |
742 | KEYWORD(canAutoHide); |
743 | KEYWORD(importType); |
744 | KEYWORD(definition); |
745 | KEYWORD(declaration); |
746 | KEYWORD(function); |
747 | KEYWORD(insts); |
748 | KEYWORD(funcFlags); |
749 | KEYWORD(readNone); |
750 | KEYWORD(readOnly); |
751 | KEYWORD(noRecurse); |
752 | KEYWORD(returnDoesNotAlias); |
753 | KEYWORD(noInline); |
754 | KEYWORD(alwaysInline); |
755 | KEYWORD(noUnwind); |
756 | KEYWORD(mayThrow); |
757 | KEYWORD(hasUnknownCall); |
758 | KEYWORD(mustBeUnreachable); |
759 | KEYWORD(calls); |
760 | KEYWORD(callee); |
761 | KEYWORD(params); |
762 | KEYWORD(param); |
763 | KEYWORD(hotness); |
764 | KEYWORD(unknown); |
765 | KEYWORD(critical); |
766 | KEYWORD(relbf); |
767 | KEYWORD(variable); |
768 | KEYWORD(vTableFuncs); |
769 | KEYWORD(virtFunc); |
770 | KEYWORD(aliasee); |
771 | KEYWORD(refs); |
772 | KEYWORD(typeIdInfo); |
773 | KEYWORD(typeTests); |
774 | KEYWORD(typeTestAssumeVCalls); |
775 | KEYWORD(typeCheckedLoadVCalls); |
776 | KEYWORD(typeTestAssumeConstVCalls); |
777 | KEYWORD(typeCheckedLoadConstVCalls); |
778 | KEYWORD(vFuncId); |
779 | KEYWORD(offset); |
780 | KEYWORD(args); |
781 | KEYWORD(typeid); |
782 | KEYWORD(typeidCompatibleVTable); |
783 | KEYWORD(summary); |
784 | KEYWORD(typeTestRes); |
785 | KEYWORD(kind); |
786 | KEYWORD(unsat); |
787 | KEYWORD(byteArray); |
788 | KEYWORD(inline); |
789 | KEYWORD(single); |
790 | KEYWORD(allOnes); |
791 | KEYWORD(sizeM1BitWidth); |
792 | KEYWORD(alignLog2); |
793 | KEYWORD(sizeM1); |
794 | KEYWORD(bitMask); |
795 | KEYWORD(inlineBits); |
796 | KEYWORD(vcall_visibility); |
797 | KEYWORD(wpdResolutions); |
798 | KEYWORD(wpdRes); |
799 | KEYWORD(indir); |
800 | KEYWORD(singleImpl); |
801 | KEYWORD(branchFunnel); |
802 | KEYWORD(singleImplName); |
803 | KEYWORD(resByArg); |
804 | KEYWORD(byArg); |
805 | KEYWORD(uniformRetVal); |
806 | KEYWORD(uniqueRetVal); |
807 | KEYWORD(virtualConstProp); |
808 | KEYWORD(info); |
809 | KEYWORD(byte); |
810 | KEYWORD(bit); |
811 | KEYWORD(varFlags); |
812 | KEYWORD(callsites); |
813 | KEYWORD(clones); |
814 | KEYWORD(stackIds); |
815 | KEYWORD(allocs); |
816 | KEYWORD(versions); |
817 | KEYWORD(memProf); |
818 | KEYWORD(notcold); |
819 | |
820 | #undef KEYWORD |
821 | |
822 | // Keywords for types. |
823 | #define TYPEKEYWORD(STR, LLVMTY) \ |
824 | do { \ |
825 | if (Keyword == STR) { \ |
826 | TyVal = LLVMTY; \ |
827 | return lltok::Type; \ |
828 | } \ |
829 | } while (false) |
830 | |
831 | TYPEKEYWORD("void" , Type::getVoidTy(Context)); |
832 | TYPEKEYWORD("half" , Type::getHalfTy(Context)); |
833 | TYPEKEYWORD("bfloat" , Type::getBFloatTy(Context)); |
834 | TYPEKEYWORD("float" , Type::getFloatTy(Context)); |
835 | TYPEKEYWORD("double" , Type::getDoubleTy(Context)); |
836 | TYPEKEYWORD("x86_fp80" , Type::getX86_FP80Ty(Context)); |
837 | TYPEKEYWORD("fp128" , Type::getFP128Ty(Context)); |
838 | TYPEKEYWORD("ppc_fp128" , Type::getPPC_FP128Ty(Context)); |
839 | TYPEKEYWORD("label" , Type::getLabelTy(Context)); |
840 | TYPEKEYWORD("metadata" , Type::getMetadataTy(Context)); |
841 | TYPEKEYWORD("x86_mmx" , Type::getX86_MMXTy(Context)); |
842 | TYPEKEYWORD("x86_amx" , Type::getX86_AMXTy(Context)); |
843 | TYPEKEYWORD("token" , Type::getTokenTy(Context)); |
844 | TYPEKEYWORD("ptr" , PointerType::getUnqual(Context)); |
845 | |
846 | #undef TYPEKEYWORD |
847 | |
848 | // Keywords for instructions. |
849 | #define INSTKEYWORD(STR, Enum) \ |
850 | do { \ |
851 | if (Keyword == #STR) { \ |
852 | UIntVal = Instruction::Enum; \ |
853 | return lltok::kw_##STR; \ |
854 | } \ |
855 | } while (false) |
856 | |
857 | INSTKEYWORD(fneg, FNeg); |
858 | |
859 | INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); |
860 | INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); |
861 | INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); |
862 | INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); |
863 | INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); |
864 | INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); |
865 | INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); |
866 | INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); |
867 | |
868 | INSTKEYWORD(phi, PHI); |
869 | INSTKEYWORD(call, Call); |
870 | INSTKEYWORD(trunc, Trunc); |
871 | INSTKEYWORD(zext, ZExt); |
872 | INSTKEYWORD(sext, SExt); |
873 | INSTKEYWORD(fptrunc, FPTrunc); |
874 | INSTKEYWORD(fpext, FPExt); |
875 | INSTKEYWORD(uitofp, UIToFP); |
876 | INSTKEYWORD(sitofp, SIToFP); |
877 | INSTKEYWORD(fptoui, FPToUI); |
878 | INSTKEYWORD(fptosi, FPToSI); |
879 | INSTKEYWORD(inttoptr, IntToPtr); |
880 | INSTKEYWORD(ptrtoint, PtrToInt); |
881 | INSTKEYWORD(bitcast, BitCast); |
882 | INSTKEYWORD(addrspacecast, AddrSpaceCast); |
883 | INSTKEYWORD(select, Select); |
884 | INSTKEYWORD(va_arg, VAArg); |
885 | INSTKEYWORD(ret, Ret); |
886 | INSTKEYWORD(br, Br); |
887 | INSTKEYWORD(switch, Switch); |
888 | INSTKEYWORD(indirectbr, IndirectBr); |
889 | INSTKEYWORD(invoke, Invoke); |
890 | INSTKEYWORD(resume, Resume); |
891 | INSTKEYWORD(unreachable, Unreachable); |
892 | INSTKEYWORD(callbr, CallBr); |
893 | |
894 | INSTKEYWORD(alloca, Alloca); |
895 | INSTKEYWORD(load, Load); |
896 | INSTKEYWORD(store, Store); |
897 | INSTKEYWORD(cmpxchg, AtomicCmpXchg); |
898 | INSTKEYWORD(atomicrmw, AtomicRMW); |
899 | INSTKEYWORD(fence, Fence); |
900 | INSTKEYWORD(getelementptr, GetElementPtr); |
901 | |
902 | INSTKEYWORD(extractelement, ExtractElement); |
903 | INSTKEYWORD(insertelement, InsertElement); |
904 | INSTKEYWORD(shufflevector, ShuffleVector); |
905 | INSTKEYWORD(extractvalue, ExtractValue); |
906 | INSTKEYWORD(insertvalue, InsertValue); |
907 | INSTKEYWORD(landingpad, LandingPad); |
908 | INSTKEYWORD(cleanupret, CleanupRet); |
909 | INSTKEYWORD(catchret, CatchRet); |
910 | INSTKEYWORD(catchswitch, CatchSwitch); |
911 | INSTKEYWORD(catchpad, CatchPad); |
912 | INSTKEYWORD(cleanuppad, CleanupPad); |
913 | |
914 | INSTKEYWORD(freeze, Freeze); |
915 | |
916 | #undef INSTKEYWORD |
917 | |
918 | #define DWKEYWORD(TYPE, TOKEN) \ |
919 | do { \ |
920 | if (Keyword.starts_with("DW_" #TYPE "_")) { \ |
921 | StrVal.assign(Keyword.begin(), Keyword.end()); \ |
922 | return lltok::TOKEN; \ |
923 | } \ |
924 | } while (false) |
925 | |
926 | DWKEYWORD(TAG, DwarfTag); |
927 | DWKEYWORD(ATE, DwarfAttEncoding); |
928 | DWKEYWORD(VIRTUALITY, DwarfVirtuality); |
929 | DWKEYWORD(LANG, DwarfLang); |
930 | DWKEYWORD(CC, DwarfCC); |
931 | DWKEYWORD(OP, DwarfOp); |
932 | DWKEYWORD(MACINFO, DwarfMacinfo); |
933 | |
934 | #undef DWKEYWORD |
935 | |
936 | // Keywords for debug record types. |
937 | #define DBGRECORDTYPEKEYWORD(STR) \ |
938 | do { \ |
939 | if (Keyword == "dbg_" #STR) { \ |
940 | StrVal = #STR; \ |
941 | return lltok::DbgRecordType; \ |
942 | } \ |
943 | } while (false) |
944 | |
945 | DBGRECORDTYPEKEYWORD(value); |
946 | DBGRECORDTYPEKEYWORD(declare); |
947 | DBGRECORDTYPEKEYWORD(assign); |
948 | DBGRECORDTYPEKEYWORD(label); |
949 | #undef DBGRECORDTYPEKEYWORD |
950 | |
951 | if (Keyword.starts_with(Prefix: "DIFlag" )) { |
952 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
953 | return lltok::DIFlag; |
954 | } |
955 | |
956 | if (Keyword.starts_with(Prefix: "DISPFlag" )) { |
957 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
958 | return lltok::DISPFlag; |
959 | } |
960 | |
961 | if (Keyword.starts_with(Prefix: "CSK_" )) { |
962 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
963 | return lltok::ChecksumKind; |
964 | } |
965 | |
966 | if (Keyword == "NoDebug" || Keyword == "FullDebug" || |
967 | Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly" ) { |
968 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
969 | return lltok::EmissionKind; |
970 | } |
971 | |
972 | if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" || |
973 | Keyword == "Default" ) { |
974 | StrVal.assign(first: Keyword.begin(), last: Keyword.end()); |
975 | return lltok::NameTableKind; |
976 | } |
977 | |
978 | // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by |
979 | // the CFE to avoid forcing it to deal with 64-bit numbers. |
980 | if ((TokStart[0] == 'u' || TokStart[0] == 's') && |
981 | TokStart[1] == '0' && TokStart[2] == 'x' && |
982 | isxdigit(static_cast<unsigned char>(TokStart[3]))) { |
983 | int len = CurPtr-TokStart-3; |
984 | uint32_t bits = len * 4; |
985 | StringRef HexStr(TokStart + 3, len); |
986 | if (!all_of(Range&: HexStr, P: isxdigit)) { |
987 | // Bad token, return it as an error. |
988 | CurPtr = TokStart+3; |
989 | return lltok::Error; |
990 | } |
991 | APInt Tmp(bits, HexStr, 16); |
992 | uint32_t activeBits = Tmp.getActiveBits(); |
993 | if (activeBits > 0 && activeBits < bits) |
994 | Tmp = Tmp.trunc(width: activeBits); |
995 | APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); |
996 | return lltok::APSInt; |
997 | } |
998 | |
999 | // If this is "cc1234", return this as just "cc". |
1000 | if (TokStart[0] == 'c' && TokStart[1] == 'c') { |
1001 | CurPtr = TokStart+2; |
1002 | return lltok::kw_cc; |
1003 | } |
1004 | |
1005 | // Finally, if this isn't known, return an error. |
1006 | CurPtr = TokStart+1; |
1007 | return lltok::Error; |
1008 | } |
1009 | |
1010 | /// Lex all tokens that start with a 0x prefix, knowing they match and are not |
1011 | /// labels. |
1012 | /// HexFPConstant 0x[0-9A-Fa-f]+ |
1013 | /// HexFP80Constant 0xK[0-9A-Fa-f]+ |
1014 | /// HexFP128Constant 0xL[0-9A-Fa-f]+ |
1015 | /// HexPPC128Constant 0xM[0-9A-Fa-f]+ |
1016 | /// HexHalfConstant 0xH[0-9A-Fa-f]+ |
1017 | /// HexBFloatConstant 0xR[0-9A-Fa-f]+ |
1018 | lltok::Kind LLLexer::Lex0x() { |
1019 | CurPtr = TokStart + 2; |
1020 | |
1021 | char Kind; |
1022 | if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || |
1023 | CurPtr[0] == 'R') { |
1024 | Kind = *CurPtr++; |
1025 | } else { |
1026 | Kind = 'J'; |
1027 | } |
1028 | |
1029 | if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) { |
1030 | // Bad token, return it as an error. |
1031 | CurPtr = TokStart+1; |
1032 | return lltok::Error; |
1033 | } |
1034 | |
1035 | while (isxdigit(static_cast<unsigned char>(CurPtr[0]))) |
1036 | ++CurPtr; |
1037 | |
1038 | if (Kind == 'J') { |
1039 | // HexFPConstant - Floating point constant represented in IEEE format as a |
1040 | // hexadecimal number for when exponential notation is not precise enough. |
1041 | // Half, BFloat, Float, and double only. |
1042 | APFloatVal = APFloat(APFloat::IEEEdouble(), |
1043 | APInt(64, HexIntToVal(Buffer: TokStart + 2, End: CurPtr))); |
1044 | return lltok::APFloat; |
1045 | } |
1046 | |
1047 | uint64_t Pair[2]; |
1048 | switch (Kind) { |
1049 | default: llvm_unreachable("Unknown kind!" ); |
1050 | case 'K': |
1051 | // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) |
1052 | FP80HexToIntPair(Buffer: TokStart+3, End: CurPtr, Pair); |
1053 | APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair)); |
1054 | return lltok::APFloat; |
1055 | case 'L': |
1056 | // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) |
1057 | HexToIntPair(Buffer: TokStart+3, End: CurPtr, Pair); |
1058 | APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair)); |
1059 | return lltok::APFloat; |
1060 | case 'M': |
1061 | // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) |
1062 | HexToIntPair(Buffer: TokStart+3, End: CurPtr, Pair); |
1063 | APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair)); |
1064 | return lltok::APFloat; |
1065 | case 'H': |
1066 | APFloatVal = APFloat(APFloat::IEEEhalf(), |
1067 | APInt(16,HexIntToVal(Buffer: TokStart+3, End: CurPtr))); |
1068 | return lltok::APFloat; |
1069 | case 'R': |
1070 | // Brain floating point |
1071 | APFloatVal = APFloat(APFloat::BFloat(), |
1072 | APInt(16, HexIntToVal(Buffer: TokStart + 3, End: CurPtr))); |
1073 | return lltok::APFloat; |
1074 | } |
1075 | } |
1076 | |
1077 | /// Lex tokens for a label or a numeric constant, possibly starting with -. |
1078 | /// Label [-a-zA-Z$._0-9]+: |
1079 | /// NInteger -[0-9]+ |
1080 | /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? |
1081 | /// PInteger [0-9]+ |
1082 | /// HexFPConstant 0x[0-9A-Fa-f]+ |
1083 | /// HexFP80Constant 0xK[0-9A-Fa-f]+ |
1084 | /// HexFP128Constant 0xL[0-9A-Fa-f]+ |
1085 | /// HexPPC128Constant 0xM[0-9A-Fa-f]+ |
1086 | lltok::Kind LLLexer::LexDigitOrNegative() { |
1087 | // If the letter after the negative is not a number, this is probably a label. |
1088 | if (!isdigit(static_cast<unsigned char>(TokStart[0])) && |
1089 | !isdigit(static_cast<unsigned char>(CurPtr[0]))) { |
1090 | // Okay, this is not a number after the -, it's probably a label. |
1091 | if (const char *End = isLabelTail(CurPtr)) { |
1092 | StrVal.assign(first: TokStart, last: End-1); |
1093 | CurPtr = End; |
1094 | return lltok::LabelStr; |
1095 | } |
1096 | |
1097 | return lltok::Error; |
1098 | } |
1099 | |
1100 | // At this point, it is either a label, int or fp constant. |
1101 | |
1102 | // Skip digits, we have at least one. |
1103 | for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) |
1104 | /*empty*/; |
1105 | |
1106 | // Check if this is a fully-numeric label: |
1107 | if (isdigit(TokStart[0]) && CurPtr[0] == ':') { |
1108 | uint64_t Val = atoull(Buffer: TokStart, End: CurPtr); |
1109 | ++CurPtr; // Skip the colon. |
1110 | if ((unsigned)Val != Val) |
1111 | Error(Msg: "invalid value number (too large)!" ); |
1112 | UIntVal = unsigned(Val); |
1113 | return lltok::LabelID; |
1114 | } |
1115 | |
1116 | // Check to see if this really is a string label, e.g. "-1:". |
1117 | if (isLabelChar(C: CurPtr[0]) || CurPtr[0] == ':') { |
1118 | if (const char *End = isLabelTail(CurPtr)) { |
1119 | StrVal.assign(first: TokStart, last: End-1); |
1120 | CurPtr = End; |
1121 | return lltok::LabelStr; |
1122 | } |
1123 | } |
1124 | |
1125 | // If the next character is a '.', then it is a fp value, otherwise its |
1126 | // integer. |
1127 | if (CurPtr[0] != '.') { |
1128 | if (TokStart[0] == '0' && TokStart[1] == 'x') |
1129 | return Lex0x(); |
1130 | APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart)); |
1131 | return lltok::APSInt; |
1132 | } |
1133 | |
1134 | ++CurPtr; |
1135 | |
1136 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
1137 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
1138 | |
1139 | if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { |
1140 | if (isdigit(static_cast<unsigned char>(CurPtr[1])) || |
1141 | ((CurPtr[1] == '-' || CurPtr[1] == '+') && |
1142 | isdigit(static_cast<unsigned char>(CurPtr[2])))) { |
1143 | CurPtr += 2; |
1144 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
1145 | } |
1146 | } |
1147 | |
1148 | APFloatVal = APFloat(APFloat::IEEEdouble(), |
1149 | StringRef(TokStart, CurPtr - TokStart)); |
1150 | return lltok::APFloat; |
1151 | } |
1152 | |
1153 | /// Lex a floating point constant starting with +. |
1154 | /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? |
1155 | lltok::Kind LLLexer::LexPositive() { |
1156 | // If the letter after the negative is a number, this is probably not a |
1157 | // label. |
1158 | if (!isdigit(static_cast<unsigned char>(CurPtr[0]))) |
1159 | return lltok::Error; |
1160 | |
1161 | // Skip digits. |
1162 | for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) |
1163 | /*empty*/; |
1164 | |
1165 | // At this point, we need a '.'. |
1166 | if (CurPtr[0] != '.') { |
1167 | CurPtr = TokStart+1; |
1168 | return lltok::Error; |
1169 | } |
1170 | |
1171 | ++CurPtr; |
1172 | |
1173 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
1174 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
1175 | |
1176 | if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { |
1177 | if (isdigit(static_cast<unsigned char>(CurPtr[1])) || |
1178 | ((CurPtr[1] == '-' || CurPtr[1] == '+') && |
1179 | isdigit(static_cast<unsigned char>(CurPtr[2])))) { |
1180 | CurPtr += 2; |
1181 | while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr; |
1182 | } |
1183 | } |
1184 | |
1185 | APFloatVal = APFloat(APFloat::IEEEdouble(), |
1186 | StringRef(TokStart, CurPtr - TokStart)); |
1187 | return lltok::APFloat; |
1188 | } |
1189 | |