1 | //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implement the Lexer for TableGen. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "TGLexer.h" |
14 | #include "llvm/ADT/ArrayRef.h" |
15 | #include "llvm/ADT/StringSwitch.h" |
16 | #include "llvm/ADT/Twine.h" |
17 | #include "llvm/Config/config.h" // for strtoull()/strtoll() define |
18 | #include "llvm/Support/Compiler.h" |
19 | #include "llvm/Support/MemoryBuffer.h" |
20 | #include "llvm/Support/SourceMgr.h" |
21 | #include "llvm/TableGen/Error.h" |
22 | #include <algorithm> |
23 | #include <cctype> |
24 | #include <cerrno> |
25 | #include <cstdint> |
26 | #include <cstdio> |
27 | #include <cstdlib> |
28 | #include <cstring> |
29 | |
30 | using namespace llvm; |
31 | |
32 | namespace { |
33 | // A list of supported preprocessing directives with their |
34 | // internal token kinds and names. |
35 | struct { |
36 | tgtok::TokKind Kind; |
37 | const char *Word; |
38 | } PreprocessorDirs[] = { |
39 | { .Kind: tgtok::Ifdef, .Word: "ifdef" }, |
40 | { .Kind: tgtok::Ifndef, .Word: "ifndef" }, |
41 | { .Kind: tgtok::Else, .Word: "else" }, |
42 | { .Kind: tgtok::Endif, .Word: "endif" }, |
43 | { .Kind: tgtok::Define, .Word: "define" } |
44 | }; |
45 | } // end anonymous namespace |
46 | |
47 | TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { |
48 | CurBuffer = SrcMgr.getMainFileID(); |
49 | CurBuf = SrcMgr.getMemoryBuffer(i: CurBuffer)->getBuffer(); |
50 | CurPtr = CurBuf.begin(); |
51 | TokStart = nullptr; |
52 | |
53 | // Pretend that we enter the "top-level" include file. |
54 | PrepIncludeStack.push_back( |
55 | x: std::make_unique<std::vector<PreprocessorControlDesc>>()); |
56 | |
57 | // Put all macros defined in the command line into the DefinedMacros set. |
58 | for (const std::string &MacroName : Macros) |
59 | DefinedMacros.insert(key: MacroName); |
60 | } |
61 | |
62 | SMLoc TGLexer::getLoc() const { |
63 | return SMLoc::getFromPointer(Ptr: TokStart); |
64 | } |
65 | |
66 | SMRange TGLexer::getLocRange() const { |
67 | return {getLoc(), SMLoc::getFromPointer(Ptr: CurPtr)}; |
68 | } |
69 | |
70 | /// ReturnError - Set the error to the specified string at the specified |
71 | /// location. This is defined to always return tgtok::Error. |
72 | tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) { |
73 | PrintError(ErrorLoc: Loc, Msg); |
74 | return tgtok::Error; |
75 | } |
76 | |
77 | tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { |
78 | return ReturnError(Loc: SMLoc::getFromPointer(Ptr: Loc), Msg); |
79 | } |
80 | |
81 | bool TGLexer::processEOF() { |
82 | SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(i: CurBuffer); |
83 | if (ParentIncludeLoc != SMLoc()) { |
84 | // If prepExitInclude() detects a problem with the preprocessing |
85 | // control stack, it will return false. Pretend that we reached |
86 | // the final EOF and stop lexing more tokens by returning false |
87 | // to LexToken(). |
88 | if (!prepExitInclude(IncludeStackMustBeEmpty: false)) |
89 | return false; |
90 | |
91 | CurBuffer = SrcMgr.FindBufferContainingLoc(Loc: ParentIncludeLoc); |
92 | CurBuf = SrcMgr.getMemoryBuffer(i: CurBuffer)->getBuffer(); |
93 | CurPtr = ParentIncludeLoc.getPointer(); |
94 | // Make sure TokStart points into the parent file's buffer. |
95 | // LexToken() assigns to it before calling getNextChar(), |
96 | // so it is pointing into the included file now. |
97 | TokStart = CurPtr; |
98 | return true; |
99 | } |
100 | |
101 | // Pretend that we exit the "top-level" include file. |
102 | // Note that in case of an error (e.g. control stack imbalance) |
103 | // the routine will issue a fatal error. |
104 | prepExitInclude(IncludeStackMustBeEmpty: true); |
105 | return false; |
106 | } |
107 | |
108 | int TGLexer::getNextChar() { |
109 | char CurChar = *CurPtr++; |
110 | switch (CurChar) { |
111 | default: |
112 | return (unsigned char)CurChar; |
113 | |
114 | case 0: { |
115 | // A NUL character in the stream is either the end of the current buffer or |
116 | // a spurious NUL in the file. Disambiguate that here. |
117 | if (CurPtr - 1 == CurBuf.end()) { |
118 | --CurPtr; // Arrange for another call to return EOF again. |
119 | return EOF; |
120 | } |
121 | PrintError(ErrorLoc: getLoc(), |
122 | Msg: "NUL character is invalid in source; treated as space" ); |
123 | return ' '; |
124 | } |
125 | |
126 | case '\n': |
127 | case '\r': |
128 | // Handle the newline character by ignoring it and incrementing the line |
129 | // count. However, be careful about 'dos style' files with \n\r in them. |
130 | // Only treat a \n\r or \r\n as a single line. |
131 | if ((*CurPtr == '\n' || (*CurPtr == '\r')) && |
132 | *CurPtr != CurChar) |
133 | ++CurPtr; // Eat the two char newline sequence. |
134 | return '\n'; |
135 | } |
136 | } |
137 | |
138 | int TGLexer::peekNextChar(int Index) const { |
139 | return *(CurPtr + Index); |
140 | } |
141 | |
142 | tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { |
143 | TokStart = CurPtr; |
144 | // This always consumes at least one character. |
145 | int CurChar = getNextChar(); |
146 | |
147 | switch (CurChar) { |
148 | default: |
149 | // Handle letters: [a-zA-Z_] |
150 | if (isalpha(CurChar) || CurChar == '_') |
151 | return LexIdentifier(); |
152 | |
153 | // Unknown character, emit an error. |
154 | return ReturnError(Loc: TokStart, Msg: "Unexpected character" ); |
155 | case EOF: |
156 | // Lex next token, if we just left an include file. |
157 | // Note that leaving an include file means that the next |
158 | // symbol is located at the end of the 'include "..."' |
159 | // construct, so LexToken() is called with default |
160 | // false parameter. |
161 | if (processEOF()) |
162 | return LexToken(); |
163 | |
164 | // Return EOF denoting the end of lexing. |
165 | return tgtok::Eof; |
166 | |
167 | case ':': return tgtok::colon; |
168 | case ';': return tgtok::semi; |
169 | case ',': return tgtok::comma; |
170 | case '<': return tgtok::less; |
171 | case '>': return tgtok::greater; |
172 | case ']': return tgtok::r_square; |
173 | case '{': return tgtok::l_brace; |
174 | case '}': return tgtok::r_brace; |
175 | case '(': return tgtok::l_paren; |
176 | case ')': return tgtok::r_paren; |
177 | case '=': return tgtok::equal; |
178 | case '?': return tgtok::question; |
179 | case '#': |
180 | if (FileOrLineStart) { |
181 | tgtok::TokKind Kind = prepIsDirective(); |
182 | if (Kind != tgtok::Error) |
183 | return lexPreprocessor(Kind); |
184 | } |
185 | |
186 | return tgtok::paste; |
187 | |
188 | // The period is a separate case so we can recognize the "..." |
189 | // range punctuator. |
190 | case '.': |
191 | if (peekNextChar(Index: 0) == '.') { |
192 | ++CurPtr; // Eat second dot. |
193 | if (peekNextChar(Index: 0) == '.') { |
194 | ++CurPtr; // Eat third dot. |
195 | return tgtok::dotdotdot; |
196 | } |
197 | return ReturnError(Loc: TokStart, Msg: "Invalid '..' punctuation" ); |
198 | } |
199 | return tgtok::dot; |
200 | |
201 | case '\r': |
202 | PrintFatalError(Msg: "getNextChar() must never return '\r'" ); |
203 | return tgtok::Error; |
204 | |
205 | case ' ': |
206 | case '\t': |
207 | // Ignore whitespace. |
208 | return LexToken(FileOrLineStart); |
209 | case '\n': |
210 | // Ignore whitespace, and identify the new line. |
211 | return LexToken(FileOrLineStart: true); |
212 | case '/': |
213 | // If this is the start of a // comment, skip until the end of the line or |
214 | // the end of the buffer. |
215 | if (*CurPtr == '/') |
216 | SkipBCPLComment(); |
217 | else if (*CurPtr == '*') { |
218 | if (SkipCComment()) |
219 | return tgtok::Error; |
220 | } else // Otherwise, this is an error. |
221 | return ReturnError(Loc: TokStart, Msg: "Unexpected character" ); |
222 | return LexToken(FileOrLineStart); |
223 | case '-': case '+': |
224 | case '0': case '1': case '2': case '3': case '4': case '5': case '6': |
225 | case '7': case '8': case '9': { |
226 | int NextChar = 0; |
227 | if (isdigit(CurChar)) { |
228 | // Allow identifiers to start with a number if it is followed by |
229 | // an identifier. This can happen with paste operations like |
230 | // foo#8i. |
231 | int i = 0; |
232 | do { |
233 | NextChar = peekNextChar(Index: i++); |
234 | } while (isdigit(NextChar)); |
235 | |
236 | if (NextChar == 'x' || NextChar == 'b') { |
237 | // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most |
238 | // likely a number. |
239 | int NextNextChar = peekNextChar(Index: i); |
240 | switch (NextNextChar) { |
241 | default: |
242 | break; |
243 | case '0': case '1': |
244 | if (NextChar == 'b') |
245 | return LexNumber(); |
246 | [[fallthrough]]; |
247 | case '2': case '3': case '4': case '5': |
248 | case '6': case '7': case '8': case '9': |
249 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
250 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
251 | if (NextChar == 'x') |
252 | return LexNumber(); |
253 | break; |
254 | } |
255 | } |
256 | } |
257 | |
258 | if (isalpha(NextChar) || NextChar == '_') |
259 | return LexIdentifier(); |
260 | |
261 | return LexNumber(); |
262 | } |
263 | case '"': return LexString(); |
264 | case '$': return LexVarName(); |
265 | case '[': return LexBracket(); |
266 | case '!': return LexExclaim(); |
267 | } |
268 | } |
269 | |
270 | /// LexString - Lex "[^"]*" |
271 | tgtok::TokKind TGLexer::LexString() { |
272 | const char *StrStart = CurPtr; |
273 | |
274 | CurStrVal = "" ; |
275 | |
276 | while (*CurPtr != '"') { |
277 | // If we hit the end of the buffer, report an error. |
278 | if (*CurPtr == 0 && CurPtr == CurBuf.end()) |
279 | return ReturnError(Loc: StrStart, Msg: "End of file in string literal" ); |
280 | |
281 | if (*CurPtr == '\n' || *CurPtr == '\r') |
282 | return ReturnError(Loc: StrStart, Msg: "End of line in string literal" ); |
283 | |
284 | if (*CurPtr != '\\') { |
285 | CurStrVal += *CurPtr++; |
286 | continue; |
287 | } |
288 | |
289 | ++CurPtr; |
290 | |
291 | switch (*CurPtr) { |
292 | case '\\': case '\'': case '"': |
293 | // These turn into their literal character. |
294 | CurStrVal += *CurPtr++; |
295 | break; |
296 | case 't': |
297 | CurStrVal += '\t'; |
298 | ++CurPtr; |
299 | break; |
300 | case 'n': |
301 | CurStrVal += '\n'; |
302 | ++CurPtr; |
303 | break; |
304 | |
305 | case '\n': |
306 | case '\r': |
307 | return ReturnError(Loc: CurPtr, Msg: "escaped newlines not supported in tblgen" ); |
308 | |
309 | // If we hit the end of the buffer, report an error. |
310 | case '\0': |
311 | if (CurPtr == CurBuf.end()) |
312 | return ReturnError(Loc: StrStart, Msg: "End of file in string literal" ); |
313 | [[fallthrough]]; |
314 | default: |
315 | return ReturnError(Loc: CurPtr, Msg: "invalid escape in string literal" ); |
316 | } |
317 | } |
318 | |
319 | ++CurPtr; |
320 | return tgtok::StrVal; |
321 | } |
322 | |
323 | tgtok::TokKind TGLexer::LexVarName() { |
324 | if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') |
325 | return ReturnError(Loc: TokStart, Msg: "Invalid variable name" ); |
326 | |
327 | // Otherwise, we're ok, consume the rest of the characters. |
328 | const char *VarNameStart = CurPtr++; |
329 | |
330 | while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') |
331 | ++CurPtr; |
332 | |
333 | CurStrVal.assign(first: VarNameStart, last: CurPtr); |
334 | return tgtok::VarName; |
335 | } |
336 | |
337 | tgtok::TokKind TGLexer::LexIdentifier() { |
338 | // The first letter is [a-zA-Z_]. |
339 | const char *IdentStart = TokStart; |
340 | |
341 | // Match the rest of the identifier regex: [0-9a-zA-Z_]* |
342 | while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') |
343 | ++CurPtr; |
344 | |
345 | // Check to see if this identifier is a reserved keyword. |
346 | StringRef Str(IdentStart, CurPtr-IdentStart); |
347 | |
348 | tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) |
349 | .Case(S: "int" , Value: tgtok::Int) |
350 | .Case(S: "bit" , Value: tgtok::Bit) |
351 | .Case(S: "bits" , Value: tgtok::Bits) |
352 | .Case(S: "string" , Value: tgtok::String) |
353 | .Case(S: "list" , Value: tgtok::List) |
354 | .Case(S: "code" , Value: tgtok::Code) |
355 | .Case(S: "dag" , Value: tgtok::Dag) |
356 | .Case(S: "class" , Value: tgtok::Class) |
357 | .Case(S: "def" , Value: tgtok::Def) |
358 | .Case(S: "true" , Value: tgtok::TrueVal) |
359 | .Case(S: "false" , Value: tgtok::FalseVal) |
360 | .Case(S: "foreach" , Value: tgtok::Foreach) |
361 | .Case(S: "defm" , Value: tgtok::Defm) |
362 | .Case(S: "defset" , Value: tgtok::Defset) |
363 | .Case(S: "deftype" , Value: tgtok::Deftype) |
364 | .Case(S: "multiclass" , Value: tgtok::MultiClass) |
365 | .Case(S: "field" , Value: tgtok::Field) |
366 | .Case(S: "let" , Value: tgtok::Let) |
367 | .Case(S: "in" , Value: tgtok::In) |
368 | .Case(S: "defvar" , Value: tgtok::Defvar) |
369 | .Case(S: "include" , Value: tgtok::Include) |
370 | .Case(S: "if" , Value: tgtok::If) |
371 | .Case(S: "then" , Value: tgtok::Then) |
372 | .Case(S: "else" , Value: tgtok::ElseKW) |
373 | .Case(S: "assert" , Value: tgtok::Assert) |
374 | .Case(S: "dump" , Value: tgtok::Dump) |
375 | .Default(Value: tgtok::Id); |
376 | |
377 | // A couple of tokens require special processing. |
378 | switch (Kind) { |
379 | case tgtok::Include: |
380 | if (LexInclude()) return tgtok::Error; |
381 | return Lex(); |
382 | case tgtok::Id: |
383 | CurStrVal.assign(first: Str.begin(), last: Str.end()); |
384 | break; |
385 | default: |
386 | break; |
387 | } |
388 | |
389 | return Kind; |
390 | } |
391 | |
392 | /// LexInclude - We just read the "include" token. Get the string token that |
393 | /// comes next and enter the include. |
394 | bool TGLexer::LexInclude() { |
395 | // The token after the include must be a string. |
396 | tgtok::TokKind Tok = LexToken(); |
397 | if (Tok == tgtok::Error) return true; |
398 | if (Tok != tgtok::StrVal) { |
399 | PrintError(ErrorLoc: getLoc(), Msg: "Expected filename after include" ); |
400 | return true; |
401 | } |
402 | |
403 | // Get the string. |
404 | std::string Filename = CurStrVal; |
405 | std::string IncludedFile; |
406 | |
407 | CurBuffer = SrcMgr.AddIncludeFile(Filename, IncludeLoc: SMLoc::getFromPointer(Ptr: CurPtr), |
408 | IncludedFile); |
409 | if (!CurBuffer) { |
410 | PrintError(ErrorLoc: getLoc(), Msg: "Could not find include file '" + Filename + "'" ); |
411 | return true; |
412 | } |
413 | |
414 | Dependencies.insert(x: IncludedFile); |
415 | // Save the line number and lex buffer of the includer. |
416 | CurBuf = SrcMgr.getMemoryBuffer(i: CurBuffer)->getBuffer(); |
417 | CurPtr = CurBuf.begin(); |
418 | |
419 | PrepIncludeStack.push_back( |
420 | x: std::make_unique<std::vector<PreprocessorControlDesc>>()); |
421 | return false; |
422 | } |
423 | |
424 | /// SkipBCPLComment - Skip over the comment by finding the next CR or LF. |
425 | /// Or we may end up at the end of the buffer. |
426 | void TGLexer::() { |
427 | ++CurPtr; // skip the second slash. |
428 | auto EOLPos = CurBuf.find_first_of(Chars: "\r\n" , From: CurPtr - CurBuf.data()); |
429 | CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos; |
430 | } |
431 | |
432 | /// SkipCComment - This skips C-style /**/ comments. The only difference from C |
433 | /// is that we allow nesting. |
434 | bool TGLexer::() { |
435 | ++CurPtr; // skip the star. |
436 | unsigned = 1; |
437 | |
438 | while (true) { |
439 | int CurChar = getNextChar(); |
440 | switch (CurChar) { |
441 | case EOF: |
442 | PrintError(Loc: TokStart, Msg: "Unterminated comment!" ); |
443 | return true; |
444 | case '*': |
445 | // End of the comment? |
446 | if (CurPtr[0] != '/') break; |
447 | |
448 | ++CurPtr; // End the */. |
449 | if (--CommentDepth == 0) |
450 | return false; |
451 | break; |
452 | case '/': |
453 | // Start of a nested comment? |
454 | if (CurPtr[0] != '*') break; |
455 | ++CurPtr; |
456 | ++CommentDepth; |
457 | break; |
458 | } |
459 | } |
460 | } |
461 | |
462 | /// LexNumber - Lex: |
463 | /// [-+]?[0-9]+ |
464 | /// 0x[0-9a-fA-F]+ |
465 | /// 0b[01]+ |
466 | tgtok::TokKind TGLexer::LexNumber() { |
467 | unsigned Base = 0; |
468 | const char *NumStart; |
469 | |
470 | // Check if it's a hex or a binary value. |
471 | if (CurPtr[-1] == '0') { |
472 | NumStart = CurPtr + 1; |
473 | if (CurPtr[0] == 'x') { |
474 | Base = 16; |
475 | do |
476 | ++CurPtr; |
477 | while (isxdigit(CurPtr[0])); |
478 | } else if (CurPtr[0] == 'b') { |
479 | Base = 2; |
480 | do |
481 | ++CurPtr; |
482 | while (CurPtr[0] == '0' || CurPtr[0] == '1'); |
483 | } |
484 | } |
485 | |
486 | // For a hex or binary value, we always convert it to an unsigned value. |
487 | bool IsMinus = false; |
488 | |
489 | // Check if it's a decimal value. |
490 | if (Base == 0) { |
491 | // Check for a sign without a digit. |
492 | if (!isdigit(CurPtr[0])) { |
493 | if (CurPtr[-1] == '-') |
494 | return tgtok::minus; |
495 | else if (CurPtr[-1] == '+') |
496 | return tgtok::plus; |
497 | } |
498 | |
499 | Base = 10; |
500 | NumStart = TokStart; |
501 | IsMinus = CurPtr[-1] == '-'; |
502 | |
503 | while (isdigit(CurPtr[0])) |
504 | ++CurPtr; |
505 | } |
506 | |
507 | // Requires at least one digit. |
508 | if (CurPtr == NumStart) |
509 | return ReturnError(Loc: TokStart, Msg: "Invalid number" ); |
510 | |
511 | errno = 0; |
512 | if (IsMinus) |
513 | CurIntVal = strtoll(nptr: NumStart, endptr: nullptr, base: Base); |
514 | else |
515 | CurIntVal = strtoull(nptr: NumStart, endptr: nullptr, base: Base); |
516 | |
517 | if (errno == EINVAL) |
518 | return ReturnError(Loc: TokStart, Msg: "Invalid number" ); |
519 | if (errno == ERANGE) |
520 | return ReturnError(Loc: TokStart, Msg: "Number out of range" ); |
521 | |
522 | return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal; |
523 | } |
524 | |
525 | /// LexBracket - We just read '['. If this is a code block, return it, |
526 | /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' |
527 | tgtok::TokKind TGLexer::LexBracket() { |
528 | if (CurPtr[0] != '{') |
529 | return tgtok::l_square; |
530 | ++CurPtr; |
531 | const char *CodeStart = CurPtr; |
532 | while (true) { |
533 | int Char = getNextChar(); |
534 | if (Char == EOF) break; |
535 | |
536 | if (Char != '}') continue; |
537 | |
538 | Char = getNextChar(); |
539 | if (Char == EOF) break; |
540 | if (Char == ']') { |
541 | CurStrVal.assign(first: CodeStart, last: CurPtr-2); |
542 | return tgtok::CodeFragment; |
543 | } |
544 | } |
545 | |
546 | return ReturnError(Loc: CodeStart - 2, Msg: "Unterminated code block" ); |
547 | } |
548 | |
549 | /// LexExclaim - Lex '!' and '![a-zA-Z]+'. |
550 | tgtok::TokKind TGLexer::LexExclaim() { |
551 | if (!isalpha(*CurPtr)) |
552 | return ReturnError(Loc: CurPtr - 1, Msg: "Invalid \"!operator\"" ); |
553 | |
554 | const char *Start = CurPtr++; |
555 | while (isalpha(*CurPtr)) |
556 | ++CurPtr; |
557 | |
558 | // Check to see which operator this is. |
559 | tgtok::TokKind Kind = |
560 | StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start)) |
561 | .Case(S: "eq" , Value: tgtok::XEq) |
562 | .Case(S: "ne" , Value: tgtok::XNe) |
563 | .Case(S: "le" , Value: tgtok::XLe) |
564 | .Case(S: "lt" , Value: tgtok::XLt) |
565 | .Case(S: "ge" , Value: tgtok::XGe) |
566 | .Case(S: "gt" , Value: tgtok::XGt) |
567 | .Case(S: "if" , Value: tgtok::XIf) |
568 | .Case(S: "cond" , Value: tgtok::XCond) |
569 | .Case(S: "isa" , Value: tgtok::XIsA) |
570 | .Case(S: "head" , Value: tgtok::XHead) |
571 | .Case(S: "tail" , Value: tgtok::XTail) |
572 | .Case(S: "size" , Value: tgtok::XSize) |
573 | .Case(S: "con" , Value: tgtok::XConcat) |
574 | .Case(S: "dag" , Value: tgtok::XDag) |
575 | .Case(S: "add" , Value: tgtok::XADD) |
576 | .Case(S: "sub" , Value: tgtok::XSUB) |
577 | .Case(S: "mul" , Value: tgtok::XMUL) |
578 | .Case(S: "div" , Value: tgtok::XDIV) |
579 | .Case(S: "not" , Value: tgtok::XNOT) |
580 | .Case(S: "logtwo" , Value: tgtok::XLOG2) |
581 | .Case(S: "and" , Value: tgtok::XAND) |
582 | .Case(S: "or" , Value: tgtok::XOR) |
583 | .Case(S: "xor" , Value: tgtok::XXOR) |
584 | .Case(S: "shl" , Value: tgtok::XSHL) |
585 | .Case(S: "sra" , Value: tgtok::XSRA) |
586 | .Case(S: "srl" , Value: tgtok::XSRL) |
587 | .Case(S: "cast" , Value: tgtok::XCast) |
588 | .Case(S: "empty" , Value: tgtok::XEmpty) |
589 | .Case(S: "subst" , Value: tgtok::XSubst) |
590 | .Case(S: "foldl" , Value: tgtok::XFoldl) |
591 | .Case(S: "foreach" , Value: tgtok::XForEach) |
592 | .Case(S: "filter" , Value: tgtok::XFilter) |
593 | .Case(S: "listconcat" , Value: tgtok::XListConcat) |
594 | .Case(S: "listsplat" , Value: tgtok::XListSplat) |
595 | .Case(S: "listremove" , Value: tgtok::XListRemove) |
596 | .Case(S: "range" , Value: tgtok::XRange) |
597 | .Case(S: "strconcat" , Value: tgtok::XStrConcat) |
598 | .Case(S: "interleave" , Value: tgtok::XInterleave) |
599 | .Case(S: "substr" , Value: tgtok::XSubstr) |
600 | .Case(S: "find" , Value: tgtok::XFind) |
601 | .Cases(S0: "setdagop" , S1: "setop" , Value: tgtok::XSetDagOp) // !setop is deprecated. |
602 | .Cases(S0: "getdagop" , S1: "getop" , Value: tgtok::XGetDagOp) // !getop is deprecated. |
603 | .Case(S: "getdagarg" , Value: tgtok::XGetDagArg) |
604 | .Case(S: "getdagname" , Value: tgtok::XGetDagName) |
605 | .Case(S: "setdagarg" , Value: tgtok::XSetDagArg) |
606 | .Case(S: "setdagname" , Value: tgtok::XSetDagName) |
607 | .Case(S: "exists" , Value: tgtok::XExists) |
608 | .Case(S: "tolower" , Value: tgtok::XToLower) |
609 | .Case(S: "toupper" , Value: tgtok::XToUpper) |
610 | .Case(S: "repr" , Value: tgtok::XRepr) |
611 | .Default(Value: tgtok::Error); |
612 | |
613 | return Kind != tgtok::Error ? Kind : ReturnError(Loc: Start-1, Msg: "Unknown operator" ); |
614 | } |
615 | |
616 | bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { |
617 | // Report an error, if preprocessor control stack for the current |
618 | // file is not empty. |
619 | if (!PrepIncludeStack.back()->empty()) { |
620 | prepReportPreprocessorStackError(); |
621 | |
622 | return false; |
623 | } |
624 | |
625 | // Pop the preprocessing controls from the include stack. |
626 | if (PrepIncludeStack.empty()) { |
627 | PrintFatalError(Msg: "Preprocessor include stack is empty" ); |
628 | } |
629 | |
630 | PrepIncludeStack.pop_back(); |
631 | |
632 | if (IncludeStackMustBeEmpty) { |
633 | if (!PrepIncludeStack.empty()) |
634 | PrintFatalError(Msg: "Preprocessor include stack is not empty" ); |
635 | } else { |
636 | if (PrepIncludeStack.empty()) |
637 | PrintFatalError(Msg: "Preprocessor include stack is empty" ); |
638 | } |
639 | |
640 | return true; |
641 | } |
642 | |
643 | tgtok::TokKind TGLexer::prepIsDirective() const { |
644 | for (const auto &PD : PreprocessorDirs) { |
645 | int NextChar = *CurPtr; |
646 | bool Match = true; |
647 | unsigned I = 0; |
648 | for (; I < strlen(s: PD.Word); ++I) { |
649 | if (NextChar != PD.Word[I]) { |
650 | Match = false; |
651 | break; |
652 | } |
653 | |
654 | NextChar = peekNextChar(Index: I + 1); |
655 | } |
656 | |
657 | // Check for whitespace after the directive. If there is no whitespace, |
658 | // then we do not recognize it as a preprocessing directive. |
659 | if (Match) { |
660 | tgtok::TokKind Kind = PD.Kind; |
661 | |
662 | // New line and EOF may follow only #else/#endif. It will be reported |
663 | // as an error for #ifdef/#define after the call to prepLexMacroName(). |
664 | if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || |
665 | NextChar == '\n' || |
666 | // It looks like TableGen does not support '\r' as the actual |
667 | // carriage return, e.g. getNextChar() treats a single '\r' |
668 | // as '\n'. So we do the same here. |
669 | NextChar == '\r') |
670 | return Kind; |
671 | |
672 | // Allow comments after some directives, e.g.: |
673 | // #else// OR #else/**/ |
674 | // #endif// OR #endif/**/ |
675 | // |
676 | // Note that we do allow comments after #ifdef/#define here, e.g. |
677 | // #ifdef/**/ AND #ifdef// |
678 | // #define/**/ AND #define// |
679 | // |
680 | // These cases will be reported as incorrect after calling |
681 | // prepLexMacroName(). We could have supported C-style comments |
682 | // after #ifdef/#define, but this would complicate the code |
683 | // for little benefit. |
684 | if (NextChar == '/') { |
685 | NextChar = peekNextChar(Index: I + 1); |
686 | |
687 | if (NextChar == '*' || NextChar == '/') |
688 | return Kind; |
689 | |
690 | // Pretend that we do not recognize the directive. |
691 | } |
692 | } |
693 | } |
694 | |
695 | return tgtok::Error; |
696 | } |
697 | |
698 | bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { |
699 | TokStart = CurPtr; |
700 | |
701 | for (const auto &PD : PreprocessorDirs) |
702 | if (PD.Kind == Kind) { |
703 | // Advance CurPtr to the end of the preprocessing word. |
704 | CurPtr += strlen(s: PD.Word); |
705 | return true; |
706 | } |
707 | |
708 | PrintFatalError(Msg: "Unsupported preprocessing token in " |
709 | "prepEatPreprocessorDirective()" ); |
710 | return false; |
711 | } |
712 | |
713 | tgtok::TokKind TGLexer::lexPreprocessor( |
714 | tgtok::TokKind Kind, bool ReturnNextLiveToken) { |
715 | |
716 | // We must be looking at a preprocessing directive. Eat it! |
717 | if (!prepEatPreprocessorDirective(Kind)) |
718 | PrintFatalError(Msg: "lexPreprocessor() called for unknown " |
719 | "preprocessor directive" ); |
720 | |
721 | if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) { |
722 | StringRef MacroName = prepLexMacroName(); |
723 | StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef" ; |
724 | if (MacroName.empty()) |
725 | return ReturnError(Loc: TokStart, Msg: "Expected macro name after " + IfTokName); |
726 | |
727 | bool MacroIsDefined = DefinedMacros.count(Key: MacroName) != 0; |
728 | |
729 | // Canonicalize ifndef's MacroIsDefined to its ifdef equivalent. |
730 | if (Kind == tgtok::Ifndef) |
731 | MacroIsDefined = !MacroIsDefined; |
732 | |
733 | // Regardless of whether we are processing tokens or not, |
734 | // we put the #ifdef control on stack. |
735 | // Note that MacroIsDefined has been canonicalized against ifdef. |
736 | PrepIncludeStack.back()->push_back( |
737 | x: {.Kind: tgtok::Ifdef, .IsDefined: MacroIsDefined, .SrcPos: SMLoc::getFromPointer(Ptr: TokStart)}); |
738 | |
739 | if (!prepSkipDirectiveEnd()) |
740 | return ReturnError(Loc: CurPtr, Msg: "Only comments are supported after " + |
741 | IfTokName + " NAME" ); |
742 | |
743 | // If we were not processing tokens before this #ifdef, |
744 | // then just return back to the lines skipping code. |
745 | if (!ReturnNextLiveToken) |
746 | return Kind; |
747 | |
748 | // If we were processing tokens before this #ifdef, |
749 | // and the macro is defined, then just return the next token. |
750 | if (MacroIsDefined) |
751 | return LexToken(); |
752 | |
753 | // We were processing tokens before this #ifdef, and the macro |
754 | // is not defined, so we have to start skipping the lines. |
755 | // If the skipping is successful, it will return the token following |
756 | // either #else or #endif corresponding to this #ifdef. |
757 | if (prepSkipRegion(MustNeverBeFalse: ReturnNextLiveToken)) |
758 | return LexToken(); |
759 | |
760 | return tgtok::Error; |
761 | } else if (Kind == tgtok::Else) { |
762 | // Check if this #else is correct before calling prepSkipDirectiveEnd(), |
763 | // which will move CurPtr away from the beginning of #else. |
764 | if (PrepIncludeStack.back()->empty()) |
765 | return ReturnError(Loc: TokStart, Msg: "#else without #ifdef or #ifndef" ); |
766 | |
767 | PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back(); |
768 | |
769 | if (IfdefEntry.Kind != tgtok::Ifdef) { |
770 | PrintError(Loc: TokStart, Msg: "double #else" ); |
771 | return ReturnError(Loc: IfdefEntry.SrcPos, Msg: "Previous #else is here" ); |
772 | } |
773 | |
774 | // Replace the corresponding #ifdef's control with its negation |
775 | // on the control stack. |
776 | PrepIncludeStack.back()->pop_back(); |
777 | PrepIncludeStack.back()->push_back( |
778 | x: {.Kind: Kind, .IsDefined: !IfdefEntry.IsDefined, .SrcPos: SMLoc::getFromPointer(Ptr: TokStart)}); |
779 | |
780 | if (!prepSkipDirectiveEnd()) |
781 | return ReturnError(Loc: CurPtr, Msg: "Only comments are supported after #else" ); |
782 | |
783 | // If we were processing tokens before this #else, |
784 | // we have to start skipping lines until the matching #endif. |
785 | if (ReturnNextLiveToken) { |
786 | if (prepSkipRegion(MustNeverBeFalse: ReturnNextLiveToken)) |
787 | return LexToken(); |
788 | |
789 | return tgtok::Error; |
790 | } |
791 | |
792 | // Return to the lines skipping code. |
793 | return Kind; |
794 | } else if (Kind == tgtok::Endif) { |
795 | // Check if this #endif is correct before calling prepSkipDirectiveEnd(), |
796 | // which will move CurPtr away from the beginning of #endif. |
797 | if (PrepIncludeStack.back()->empty()) |
798 | return ReturnError(Loc: TokStart, Msg: "#endif without #ifdef" ); |
799 | |
800 | auto &IfdefOrElseEntry = PrepIncludeStack.back()->back(); |
801 | |
802 | if (IfdefOrElseEntry.Kind != tgtok::Ifdef && |
803 | IfdefOrElseEntry.Kind != tgtok::Else) { |
804 | PrintFatalError(Msg: "Invalid preprocessor control on the stack" ); |
805 | return tgtok::Error; |
806 | } |
807 | |
808 | if (!prepSkipDirectiveEnd()) |
809 | return ReturnError(Loc: CurPtr, Msg: "Only comments are supported after #endif" ); |
810 | |
811 | PrepIncludeStack.back()->pop_back(); |
812 | |
813 | // If we were processing tokens before this #endif, then |
814 | // we should continue it. |
815 | if (ReturnNextLiveToken) { |
816 | return LexToken(); |
817 | } |
818 | |
819 | // Return to the lines skipping code. |
820 | return Kind; |
821 | } else if (Kind == tgtok::Define) { |
822 | StringRef MacroName = prepLexMacroName(); |
823 | if (MacroName.empty()) |
824 | return ReturnError(Loc: TokStart, Msg: "Expected macro name after #define" ); |
825 | |
826 | if (!DefinedMacros.insert(key: MacroName).second) |
827 | PrintWarning(WarningLoc: getLoc(), |
828 | Msg: "Duplicate definition of macro: " + Twine(MacroName)); |
829 | |
830 | if (!prepSkipDirectiveEnd()) |
831 | return ReturnError(Loc: CurPtr, |
832 | Msg: "Only comments are supported after #define NAME" ); |
833 | |
834 | if (!ReturnNextLiveToken) { |
835 | PrintFatalError(Msg: "#define must be ignored during the lines skipping" ); |
836 | return tgtok::Error; |
837 | } |
838 | |
839 | return LexToken(); |
840 | } |
841 | |
842 | PrintFatalError(Msg: "Preprocessing directive is not supported" ); |
843 | return tgtok::Error; |
844 | } |
845 | |
846 | bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { |
847 | if (!MustNeverBeFalse) |
848 | PrintFatalError(Msg: "Invalid recursion." ); |
849 | |
850 | do { |
851 | // Skip all symbols to the line end. |
852 | while (*CurPtr != '\n') |
853 | ++CurPtr; |
854 | |
855 | // Find the first non-whitespace symbol in the next line(s). |
856 | if (!prepSkipLineBegin()) |
857 | return false; |
858 | |
859 | // If the first non-blank/comment symbol on the line is '#', |
860 | // it may be a start of preprocessing directive. |
861 | // |
862 | // If it is not '#' just go to the next line. |
863 | if (*CurPtr == '#') |
864 | ++CurPtr; |
865 | else |
866 | continue; |
867 | |
868 | tgtok::TokKind Kind = prepIsDirective(); |
869 | |
870 | // If we did not find a preprocessing directive or it is #define, |
871 | // then just skip to the next line. We do not have to do anything |
872 | // for #define in the line-skipping mode. |
873 | if (Kind == tgtok::Error || Kind == tgtok::Define) |
874 | continue; |
875 | |
876 | tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, ReturnNextLiveToken: false); |
877 | |
878 | // If lexPreprocessor() encountered an error during lexing this |
879 | // preprocessor idiom, then return false to the calling lexPreprocessor(). |
880 | // This will force tgtok::Error to be returned to the tokens processing. |
881 | if (ProcessedKind == tgtok::Error) |
882 | return false; |
883 | |
884 | if (Kind != ProcessedKind) |
885 | PrintFatalError(Msg: "prepIsDirective() and lexPreprocessor() " |
886 | "returned different token kinds" ); |
887 | |
888 | // If this preprocessing directive enables tokens processing, |
889 | // then return to the lexPreprocessor() and get to the next token. |
890 | // We can move from line-skipping mode to processing tokens only |
891 | // due to #else or #endif. |
892 | if (prepIsProcessingEnabled()) { |
893 | if (Kind != tgtok::Else && Kind != tgtok::Endif) { |
894 | PrintFatalError(Msg: "Tokens processing was enabled by an unexpected " |
895 | "preprocessing directive" ); |
896 | return false; |
897 | } |
898 | |
899 | return true; |
900 | } |
901 | } while (CurPtr != CurBuf.end()); |
902 | |
903 | // We have reached the end of the file, but never left the lines-skipping |
904 | // mode. This means there is no matching #endif. |
905 | prepReportPreprocessorStackError(); |
906 | return false; |
907 | } |
908 | |
909 | StringRef TGLexer::prepLexMacroName() { |
910 | // Skip whitespaces between the preprocessing directive and the macro name. |
911 | while (*CurPtr == ' ' || *CurPtr == '\t') |
912 | ++CurPtr; |
913 | |
914 | TokStart = CurPtr; |
915 | // Macro names start with [a-zA-Z_]. |
916 | if (*CurPtr != '_' && !isalpha(*CurPtr)) |
917 | return "" ; |
918 | |
919 | // Match the rest of the identifier regex: [0-9a-zA-Z_]* |
920 | while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') |
921 | ++CurPtr; |
922 | |
923 | return StringRef(TokStart, CurPtr - TokStart); |
924 | } |
925 | |
926 | bool TGLexer::prepSkipLineBegin() { |
927 | while (CurPtr != CurBuf.end()) { |
928 | switch (*CurPtr) { |
929 | case ' ': |
930 | case '\t': |
931 | case '\n': |
932 | case '\r': |
933 | break; |
934 | |
935 | case '/': { |
936 | int NextChar = peekNextChar(Index: 1); |
937 | if (NextChar == '*') { |
938 | // Skip C-style comment. |
939 | // Note that we do not care about skipping the C++-style comments. |
940 | // If the line contains "//", it may not contain any processable |
941 | // preprocessing directive. Just return CurPtr pointing to |
942 | // the first '/' in this case. We also do not care about |
943 | // incorrect symbols after the first '/' - we are in lines-skipping |
944 | // mode, so incorrect code is allowed to some extent. |
945 | |
946 | // Set TokStart to the beginning of the comment to enable proper |
947 | // diagnostic printing in case of error in SkipCComment(). |
948 | TokStart = CurPtr; |
949 | |
950 | // CurPtr must point to '*' before call to SkipCComment(). |
951 | ++CurPtr; |
952 | if (SkipCComment()) |
953 | return false; |
954 | } else { |
955 | // CurPtr points to the non-whitespace '/'. |
956 | return true; |
957 | } |
958 | |
959 | // We must not increment CurPtr after the comment was lexed. |
960 | continue; |
961 | } |
962 | |
963 | default: |
964 | return true; |
965 | } |
966 | |
967 | ++CurPtr; |
968 | } |
969 | |
970 | // We have reached the end of the file. Return to the lines skipping |
971 | // code, and allow it to handle the EOF as needed. |
972 | return true; |
973 | } |
974 | |
975 | bool TGLexer::prepSkipDirectiveEnd() { |
976 | while (CurPtr != CurBuf.end()) { |
977 | switch (*CurPtr) { |
978 | case ' ': |
979 | case '\t': |
980 | break; |
981 | |
982 | case '\n': |
983 | case '\r': |
984 | return true; |
985 | |
986 | case '/': { |
987 | int NextChar = peekNextChar(Index: 1); |
988 | if (NextChar == '/') { |
989 | // Skip C++-style comment. |
990 | // We may just return true now, but let's skip to the line/buffer end |
991 | // to simplify the method specification. |
992 | ++CurPtr; |
993 | SkipBCPLComment(); |
994 | } else if (NextChar == '*') { |
995 | // When we are skipping C-style comment at the end of a preprocessing |
996 | // directive, we can skip several lines. If any meaningful TD token |
997 | // follows the end of the C-style comment on the same line, it will |
998 | // be considered as an invalid usage of TD token. |
999 | // For example, we want to forbid usages like this one: |
1000 | // #define MACRO class Class {} |
1001 | // But with C-style comments we also disallow the following: |
1002 | // #define MACRO /* This macro is used |
1003 | // to ... */ class Class {} |
1004 | // One can argue that this should be allowed, but it does not seem |
1005 | // to be worth of the complication. Moreover, this matches |
1006 | // the C preprocessor behavior. |
1007 | |
1008 | // Set TokStart to the beginning of the comment to enable proper |
1009 | // diagnostic printer in case of error in SkipCComment(). |
1010 | TokStart = CurPtr; |
1011 | ++CurPtr; |
1012 | if (SkipCComment()) |
1013 | return false; |
1014 | } else { |
1015 | TokStart = CurPtr; |
1016 | PrintError(Loc: CurPtr, Msg: "Unexpected character" ); |
1017 | return false; |
1018 | } |
1019 | |
1020 | // We must not increment CurPtr after the comment was lexed. |
1021 | continue; |
1022 | } |
1023 | |
1024 | default: |
1025 | // Do not allow any non-whitespaces after the directive. |
1026 | TokStart = CurPtr; |
1027 | return false; |
1028 | } |
1029 | |
1030 | ++CurPtr; |
1031 | } |
1032 | |
1033 | return true; |
1034 | } |
1035 | |
1036 | bool TGLexer::prepIsProcessingEnabled() { |
1037 | for (const PreprocessorControlDesc &I : |
1038 | llvm::reverse(C&: *PrepIncludeStack.back())) |
1039 | if (!I.IsDefined) |
1040 | return false; |
1041 | |
1042 | return true; |
1043 | } |
1044 | |
1045 | void TGLexer::prepReportPreprocessorStackError() { |
1046 | if (PrepIncludeStack.back()->empty()) |
1047 | PrintFatalError(Msg: "prepReportPreprocessorStackError() called with " |
1048 | "empty control stack" ); |
1049 | |
1050 | auto &PrepControl = PrepIncludeStack.back()->back(); |
1051 | PrintError(Loc: CurBuf.end(), Msg: "Reached EOF without matching #endif" ); |
1052 | PrintError(ErrorLoc: PrepControl.SrcPos, Msg: "The latest preprocessor control is here" ); |
1053 | |
1054 | TokStart = CurPtr; |
1055 | } |
1056 | |