1//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the clang::Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15#define LLVM_CLANG_LEX_PREPROCESSOR_H
16
17#include "clang/Basic/Diagnostic.h"
18#include "clang/Basic/DiagnosticIDs.h"
19#include "clang/Basic/IdentifierTable.h"
20#include "clang/Basic/LLVM.h"
21#include "clang/Basic/LangOptions.h"
22#include "clang/Basic/Module.h"
23#include "clang/Basic/SourceLocation.h"
24#include "clang/Basic/SourceManager.h"
25#include "clang/Basic/TokenKinds.h"
26#include "clang/Lex/HeaderSearch.h"
27#include "clang/Lex/Lexer.h"
28#include "clang/Lex/MacroInfo.h"
29#include "clang/Lex/ModuleLoader.h"
30#include "clang/Lex/ModuleMap.h"
31#include "clang/Lex/PPCallbacks.h"
32#include "clang/Lex/PPEmbedParameters.h"
33#include "clang/Lex/Token.h"
34#include "clang/Lex/TokenLexer.h"
35#include "llvm/ADT/APSInt.h"
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/FoldingSet.h"
39#include "llvm/ADT/FunctionExtras.h"
40#include "llvm/ADT/PointerUnion.h"
41#include "llvm/ADT/STLExtras.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/StringRef.h"
45#include "llvm/ADT/TinyPtrVector.h"
46#include "llvm/ADT/iterator_range.h"
47#include "llvm/Support/Allocator.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/Registry.h"
50#include <cassert>
51#include <cstddef>
52#include <cstdint>
53#include <map>
54#include <memory>
55#include <optional>
56#include <string>
57#include <utility>
58#include <vector>
59
60namespace llvm {
61
62template<unsigned InternalLen> class SmallString;
63
64} // namespace llvm
65
66namespace clang {
67
68class CodeCompletionHandler;
69class CommentHandler;
70class DirectoryEntry;
71class EmptylineHandler;
72class ExternalPreprocessorSource;
73class FileEntry;
74class FileManager;
75class HeaderSearch;
76class MacroArgs;
77class PragmaHandler;
78class PragmaNamespace;
79class PreprocessingRecord;
80class PreprocessorLexer;
81class PreprocessorOptions;
82class ScratchBuffer;
83class TargetInfo;
84
85namespace Builtin {
86class Context;
87}
88
89/// Stores token information for comparing actual tokens with
90/// predefined values. Only handles simple tokens and identifiers.
91class TokenValue {
92 tok::TokenKind Kind;
93 IdentifierInfo *II;
94
95public:
96 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
97 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
98 assert(Kind != tok::identifier &&
99 "Identifiers should be created by TokenValue(IdentifierInfo *)");
100 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
101 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
102 }
103
104 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
105
106 bool operator==(const Token &Tok) const {
107 return Tok.getKind() == Kind &&
108 (!II || II == Tok.getIdentifierInfo());
109 }
110};
111
112/// Context in which macro name is used.
113enum MacroUse {
114 // other than #define or #undef
115 MU_Other = 0,
116
117 // macro name specified in #define
118 MU_Define = 1,
119
120 // macro name specified in #undef
121 MU_Undef = 2
122};
123
124enum class EmbedResult {
125 Invalid = -1, // Parsing error occurred.
126 NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
127 Found = 1, // Corresponds to __STDC_EMBED_FOUND__
128 Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
129};
130
131/// Engages in a tight little dance with the lexer to efficiently
132/// preprocess tokens.
133///
134/// Lexers know only about tokens within a single source file, and don't
135/// know anything about preprocessor-level issues like the \#include stack,
136/// token expansion, etc.
137class Preprocessor {
138 friend class VAOptDefinitionContext;
139 friend class VariadicMacroScopeGuard;
140
141 llvm::unique_function<void(const clang::Token &)> OnToken;
142 std::shared_ptr<PreprocessorOptions> PPOpts;
143 DiagnosticsEngine *Diags;
144 const LangOptions &LangOpts;
145 const TargetInfo *Target = nullptr;
146 const TargetInfo *AuxTarget = nullptr;
147 FileManager &FileMgr;
148 SourceManager &SourceMgr;
149 std::unique_ptr<ScratchBuffer> ScratchBuf;
150 HeaderSearch &HeaderInfo;
151 ModuleLoader &TheModuleLoader;
152
153 /// External source of macros.
154 ExternalPreprocessorSource *ExternalSource;
155
156 /// A BumpPtrAllocator object used to quickly allocate and release
157 /// objects internal to the Preprocessor.
158 llvm::BumpPtrAllocator BP;
159
160 /// Identifiers for builtin macros and other builtins.
161 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
162 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
163 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
164 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
165 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
166 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
167 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
168 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
169 IdentifierInfo *Ident__identifier; // __identifier
170 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
171 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
172 IdentifierInfo *Ident__has_feature; // __has_feature
173 IdentifierInfo *Ident__has_extension; // __has_extension
174 IdentifierInfo *Ident__has_builtin; // __has_builtin
175 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin
176 IdentifierInfo *Ident__has_attribute; // __has_attribute
177 IdentifierInfo *Ident__has_embed; // __has_embed
178 IdentifierInfo *Ident__has_include; // __has_include
179 IdentifierInfo *Ident__has_include_next; // __has_include_next
180 IdentifierInfo *Ident__has_warning; // __has_warning
181 IdentifierInfo *Ident__is_identifier; // __is_identifier
182 IdentifierInfo *Ident__building_module; // __building_module
183 IdentifierInfo *Ident__MODULE__; // __MODULE__
184 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
185 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
186 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
187 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
188 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
189 IdentifierInfo *Ident__is_target_os; // __is_target_os
190 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
191 IdentifierInfo *Ident__is_target_variant_os;
192 IdentifierInfo *Ident__is_target_variant_environment;
193 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD
194
195 // Weak, only valid (and set) while InMacroArgs is true.
196 Token* ArgMacro;
197
198 SourceLocation DATELoc, TIMELoc;
199
200 // FEM_UnsetOnCommandLine means that an explicit evaluation method was
201 // not specified on the command line. The target is queried to set the
202 // default evaluation method.
203 LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
204 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
205
206 // The most recent pragma location where the floating point evaluation
207 // method was modified. This is used to determine whether the
208 // 'pragma clang fp eval_method' was used whithin the current scope.
209 SourceLocation LastFPEvalPragmaLocation;
210
211 LangOptions::FPEvalMethodKind TUFPEvalMethod =
212 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
213
214 // Next __COUNTER__ value, starts at 0.
215 unsigned CounterValue = 0;
216
217 enum {
218 /// Maximum depth of \#includes.
219 MaxAllowedIncludeStackDepth = 200
220 };
221
222 // State that is set before the preprocessor begins.
223 bool KeepComments : 1;
224 bool KeepMacroComments : 1;
225 bool SuppressIncludeNotFoundError : 1;
226
227 // State that changes while the preprocessor runs:
228 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
229
230 /// Whether the preprocessor owns the header search object.
231 bool OwnsHeaderSearch : 1;
232
233 /// True if macro expansion is disabled.
234 bool DisableMacroExpansion : 1;
235
236 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
237 /// when parsing preprocessor directives.
238 bool MacroExpansionInDirectivesOverride : 1;
239
240 class ResetMacroExpansionHelper;
241
242 /// Whether we have already loaded macros from the external source.
243 mutable bool ReadMacrosFromExternalSource : 1;
244
245 /// True if pragmas are enabled.
246 bool PragmasEnabled : 1;
247
248 /// True if the current build action is a preprocessing action.
249 bool PreprocessedOutput : 1;
250
251 /// True if we are currently preprocessing a #if or #elif directive
252 bool ParsingIfOrElifDirective;
253
254 /// True if we are pre-expanding macro arguments.
255 bool InMacroArgPreExpansion;
256
257 /// Mapping/lookup information for all identifiers in
258 /// the program, including program keywords.
259 mutable IdentifierTable Identifiers;
260
261 /// This table contains all the selectors in the program.
262 ///
263 /// Unlike IdentifierTable above, this table *isn't* populated by the
264 /// preprocessor. It is declared/expanded here because its role/lifetime is
265 /// conceptually similar to the IdentifierTable. In addition, the current
266 /// control flow (in clang::ParseAST()), make it convenient to put here.
267 ///
268 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
269 /// the lifetime of the preprocessor.
270 SelectorTable Selectors;
271
272 /// Information about builtins.
273 std::unique_ptr<Builtin::Context> BuiltinInfo;
274
275 /// Tracks all of the pragmas that the client registered
276 /// with this preprocessor.
277 std::unique_ptr<PragmaNamespace> PragmaHandlers;
278
279 /// Pragma handlers of the original source is stored here during the
280 /// parsing of a model file.
281 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
282
283 /// Tracks all of the comment handlers that the client registered
284 /// with this preprocessor.
285 std::vector<CommentHandler *> CommentHandlers;
286
287 /// Empty line handler.
288 EmptylineHandler *Emptyline = nullptr;
289
290 /// True to avoid tearing down the lexer etc on EOF
291 bool IncrementalProcessing = false;
292
293public:
294 /// The kind of translation unit we are processing.
295 const TranslationUnitKind TUKind;
296
297 /// Returns a pointer into the given file's buffer that's guaranteed
298 /// to be between tokens. The returned pointer is always before \p Start.
299 /// The maximum distance betweenthe returned pointer and \p Start is
300 /// limited by a constant value, but also an implementation detail.
301 /// If no such check point exists, \c nullptr is returned.
302 const char *getCheckPoint(FileID FID, const char *Start) const;
303
304private:
305 /// The code-completion handler.
306 CodeCompletionHandler *CodeComplete = nullptr;
307
308 /// The file that we're performing code-completion for, if any.
309 const FileEntry *CodeCompletionFile = nullptr;
310
311 /// The offset in file for the code-completion point.
312 unsigned CodeCompletionOffset = 0;
313
314 /// The location for the code-completion point. This gets instantiated
315 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
316 SourceLocation CodeCompletionLoc;
317
318 /// The start location for the file of the code-completion point.
319 ///
320 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
321 /// for preprocessing.
322 SourceLocation CodeCompletionFileLoc;
323
324 /// The source location of the \c import contextual keyword we just
325 /// lexed, if any.
326 SourceLocation ModuleImportLoc;
327
328 /// The import path for named module that we're currently processing.
329 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
330
331 llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
332 unsigned CheckPointCounter = 0;
333
334 /// Whether the import is an `@import` or a standard c++ modules import.
335 bool IsAtImport = false;
336
337 /// Whether the last token we lexed was an '@'.
338 bool LastTokenWasAt = false;
339
340 /// A position within a C++20 import-seq.
341 class StdCXXImportSeq {
342 public:
343 enum State : int {
344 // Positive values represent a number of unclosed brackets.
345 AtTopLevel = 0,
346 AfterTopLevelTokenSeq = -1,
347 AfterExport = -2,
348 AfterImportSeq = -3,
349 };
350
351 StdCXXImportSeq(State S) : S(S) {}
352
353 /// Saw any kind of open bracket.
354 void handleOpenBracket() {
355 S = static_cast<State>(std::max<int>(a: S, b: 0) + 1);
356 }
357 /// Saw any kind of close bracket other than '}'.
358 void handleCloseBracket() {
359 S = static_cast<State>(std::max<int>(a: S, b: 1) - 1);
360 }
361 /// Saw a close brace.
362 void handleCloseBrace() {
363 handleCloseBracket();
364 if (S == AtTopLevel && !AfterHeaderName)
365 S = AfterTopLevelTokenSeq;
366 }
367 /// Saw a semicolon.
368 void handleSemi() {
369 if (atTopLevel()) {
370 S = AfterTopLevelTokenSeq;
371 AfterHeaderName = false;
372 }
373 }
374
375 /// Saw an 'export' identifier.
376 void handleExport() {
377 if (S == AfterTopLevelTokenSeq)
378 S = AfterExport;
379 else if (S <= 0)
380 S = AtTopLevel;
381 }
382 /// Saw an 'import' identifier.
383 void handleImport() {
384 if (S == AfterTopLevelTokenSeq || S == AfterExport)
385 S = AfterImportSeq;
386 else if (S <= 0)
387 S = AtTopLevel;
388 }
389
390 /// Saw a 'header-name' token; do not recognize any more 'import' tokens
391 /// until we reach a top-level semicolon.
392 void handleHeaderName() {
393 if (S == AfterImportSeq)
394 AfterHeaderName = true;
395 handleMisc();
396 }
397
398 /// Saw any other token.
399 void handleMisc() {
400 if (S <= 0)
401 S = AtTopLevel;
402 }
403
404 bool atTopLevel() { return S <= 0; }
405 bool afterImportSeq() { return S == AfterImportSeq; }
406 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
407
408 private:
409 State S;
410 /// Whether we're in the pp-import-suffix following the header-name in a
411 /// pp-import. If so, a close-brace is not sufficient to end the
412 /// top-level-token-seq of an import-seq.
413 bool AfterHeaderName = false;
414 };
415
416 /// Our current position within a C++20 import-seq.
417 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
418
419 /// Track whether we are in a Global Module Fragment
420 class TrackGMF {
421 public:
422 enum GMFState : int {
423 GMFActive = 1,
424 MaybeGMF = 0,
425 BeforeGMFIntroducer = -1,
426 GMFAbsentOrEnded = -2,
427 };
428
429 TrackGMF(GMFState S) : S(S) {}
430
431 /// Saw a semicolon.
432 void handleSemi() {
433 // If it is immediately after the first instance of the module keyword,
434 // then that introduces the GMF.
435 if (S == MaybeGMF)
436 S = GMFActive;
437 }
438
439 /// Saw an 'export' identifier.
440 void handleExport() {
441 // The presence of an 'export' keyword always ends or excludes a GMF.
442 S = GMFAbsentOrEnded;
443 }
444
445 /// Saw an 'import' identifier.
446 void handleImport(bool AfterTopLevelTokenSeq) {
447 // If we see this before any 'module' kw, then we have no GMF.
448 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
449 S = GMFAbsentOrEnded;
450 }
451
452 /// Saw a 'module' identifier.
453 void handleModule(bool AfterTopLevelTokenSeq) {
454 // This was the first module identifier and not preceded by any token
455 // that would exclude a GMF. It could begin a GMF, but only if directly
456 // followed by a semicolon.
457 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
458 S = MaybeGMF;
459 else
460 S = GMFAbsentOrEnded;
461 }
462
463 /// Saw any other token.
464 void handleMisc() {
465 // We saw something other than ; after the 'module' kw, so not a GMF.
466 if (S == MaybeGMF)
467 S = GMFAbsentOrEnded;
468 }
469
470 bool inGMF() { return S == GMFActive; }
471
472 private:
473 /// Track the transitions into and out of a Global Module Fragment,
474 /// if one is present.
475 GMFState S;
476 };
477
478 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
479
480 /// Track the status of the c++20 module decl.
481 ///
482 /// module-declaration:
483 /// 'export'[opt] 'module' module-name module-partition[opt]
484 /// attribute-specifier-seq[opt] ';'
485 ///
486 /// module-name:
487 /// module-name-qualifier[opt] identifier
488 ///
489 /// module-partition:
490 /// ':' module-name-qualifier[opt] identifier
491 ///
492 /// module-name-qualifier:
493 /// identifier '.'
494 /// module-name-qualifier identifier '.'
495 ///
496 /// Transition state:
497 ///
498 /// NotAModuleDecl --- export ---> FoundExport
499 /// NotAModuleDecl --- module ---> ImplementationCandidate
500 /// FoundExport --- module ---> InterfaceCandidate
501 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate
502 /// ImplementationCandidate --- period ---> ImplementationCandidate
503 /// ImplementationCandidate --- colon ---> ImplementationCandidate
504 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate
505 /// InterfaceCandidate --- period ---> InterfaceCandidate
506 /// InterfaceCandidate --- colon ---> InterfaceCandidate
507 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation
508 /// NamedModuleInterface --- Semi ---> NamedModuleInterface
509 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation
510 /// NamedModuleInterface --- Anything ---> NamedModuleInterface
511 ///
512 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
513 /// soon since we don't support any module attributes yet.
514 class ModuleDeclSeq {
515 enum ModuleDeclState : int {
516 NotAModuleDecl,
517 FoundExport,
518 InterfaceCandidate,
519 ImplementationCandidate,
520 NamedModuleInterface,
521 NamedModuleImplementation,
522 };
523
524 public:
525 ModuleDeclSeq() = default;
526
527 void handleExport() {
528 if (State == NotAModuleDecl)
529 State = FoundExport;
530 else if (!isNamedModule())
531 reset();
532 }
533
534 void handleModule() {
535 if (State == FoundExport)
536 State = InterfaceCandidate;
537 else if (State == NotAModuleDecl)
538 State = ImplementationCandidate;
539 else if (!isNamedModule())
540 reset();
541 }
542
543 void handleIdentifier(IdentifierInfo *Identifier) {
544 if (isModuleCandidate() && Identifier)
545 Name += Identifier->getName().str();
546 else if (!isNamedModule())
547 reset();
548 }
549
550 void handleColon() {
551 if (isModuleCandidate())
552 Name += ":";
553 else if (!isNamedModule())
554 reset();
555 }
556
557 void handlePeriod() {
558 if (isModuleCandidate())
559 Name += ".";
560 else if (!isNamedModule())
561 reset();
562 }
563
564 void handleSemi() {
565 if (!Name.empty() && isModuleCandidate()) {
566 if (State == InterfaceCandidate)
567 State = NamedModuleInterface;
568 else if (State == ImplementationCandidate)
569 State = NamedModuleImplementation;
570 else
571 llvm_unreachable("Unimaged ModuleDeclState.");
572 } else if (!isNamedModule())
573 reset();
574 }
575
576 void handleMisc() {
577 if (!isNamedModule())
578 reset();
579 }
580
581 bool isModuleCandidate() const {
582 return State == InterfaceCandidate || State == ImplementationCandidate;
583 }
584
585 bool isNamedModule() const {
586 return State == NamedModuleInterface ||
587 State == NamedModuleImplementation;
588 }
589
590 bool isNamedInterface() const { return State == NamedModuleInterface; }
591
592 bool isImplementationUnit() const {
593 return State == NamedModuleImplementation && !getName().contains(C: ':');
594 }
595
596 StringRef getName() const {
597 assert(isNamedModule() && "Can't get name from a non named module");
598 return Name;
599 }
600
601 StringRef getPrimaryName() const {
602 assert(isNamedModule() && "Can't get name from a non named module");
603 return getName().split(Separator: ':').first;
604 }
605
606 void reset() {
607 Name.clear();
608 State = NotAModuleDecl;
609 }
610
611 private:
612 ModuleDeclState State = NotAModuleDecl;
613 std::string Name;
614 };
615
616 ModuleDeclSeq ModuleDeclState;
617
618 /// Whether the module import expects an identifier next. Otherwise,
619 /// it expects a '.' or ';'.
620 bool ModuleImportExpectsIdentifier = false;
621
622 /// The identifier and source location of the currently-active
623 /// \#pragma clang arc_cf_code_audited begin.
624 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
625
626 /// The source location of the currently-active
627 /// \#pragma clang assume_nonnull begin.
628 SourceLocation PragmaAssumeNonNullLoc;
629
630 /// Set only for preambles which end with an active
631 /// \#pragma clang assume_nonnull begin.
632 ///
633 /// When the preamble is loaded into the main file,
634 /// `PragmaAssumeNonNullLoc` will be set to this to
635 /// replay the unterminated assume_nonnull.
636 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
637
638 /// True if we hit the code-completion point.
639 bool CodeCompletionReached = false;
640
641 /// The code completion token containing the information
642 /// on the stem that is to be code completed.
643 IdentifierInfo *CodeCompletionII = nullptr;
644
645 /// Range for the code completion token.
646 SourceRange CodeCompletionTokenRange;
647
648 /// The directory that the main file should be considered to occupy,
649 /// if it does not correspond to a real file (as happens when building a
650 /// module).
651 OptionalDirectoryEntryRef MainFileDir;
652
653 /// The number of bytes that we will initially skip when entering the
654 /// main file, along with a flag that indicates whether skipping this number
655 /// of bytes will place the lexer at the start of a line.
656 ///
657 /// This is used when loading a precompiled preamble.
658 std::pair<int, bool> SkipMainFilePreamble;
659
660 /// Whether we hit an error due to reaching max allowed include depth. Allows
661 /// to avoid hitting the same error over and over again.
662 bool HasReachedMaxIncludeDepth = false;
663
664 /// The number of currently-active calls to Lex.
665 ///
666 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
667 /// require asking for multiple additional tokens. This counter makes it
668 /// possible for Lex to detect whether it's producing a token for the end
669 /// of phase 4 of translation or for some other situation.
670 unsigned LexLevel = 0;
671
672 /// The number of (LexLevel 0) preprocessor tokens.
673 unsigned TokenCount = 0;
674
675 /// Preprocess every token regardless of LexLevel.
676 bool PreprocessToken = false;
677
678 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
679 /// warning, or zero for unlimited.
680 unsigned MaxTokens = 0;
681 SourceLocation MaxTokensOverrideLoc;
682
683public:
684 struct PreambleSkipInfo {
685 SourceLocation HashTokenLoc;
686 SourceLocation IfTokenLoc;
687 bool FoundNonSkipPortion;
688 bool FoundElse;
689 SourceLocation ElseLoc;
690
691 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
692 bool FoundNonSkipPortion, bool FoundElse,
693 SourceLocation ElseLoc)
694 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
695 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
696 ElseLoc(ElseLoc) {}
697 };
698
699 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
700
701private:
702 friend class ASTReader;
703 friend class MacroArgs;
704
705 class PreambleConditionalStackStore {
706 enum State {
707 Off = 0,
708 Recording = 1,
709 Replaying = 2,
710 };
711
712 public:
713 PreambleConditionalStackStore() = default;
714
715 void startRecording() { ConditionalStackState = Recording; }
716 void startReplaying() { ConditionalStackState = Replaying; }
717 bool isRecording() const { return ConditionalStackState == Recording; }
718 bool isReplaying() const { return ConditionalStackState == Replaying; }
719
720 ArrayRef<PPConditionalInfo> getStack() const {
721 return ConditionalStack;
722 }
723
724 void doneReplaying() {
725 ConditionalStack.clear();
726 ConditionalStackState = Off;
727 }
728
729 void setStack(ArrayRef<PPConditionalInfo> s) {
730 if (!isRecording() && !isReplaying())
731 return;
732 ConditionalStack.clear();
733 ConditionalStack.append(in_start: s.begin(), in_end: s.end());
734 }
735
736 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
737
738 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
739
740 void clearSkipInfo() { SkipInfo.reset(); }
741
742 std::optional<PreambleSkipInfo> SkipInfo;
743
744 private:
745 SmallVector<PPConditionalInfo, 4> ConditionalStack;
746 State ConditionalStackState = Off;
747 } PreambleConditionalStack;
748
749 /// The current top of the stack that we're lexing from if
750 /// not expanding a macro and we are lexing directly from source code.
751 ///
752 /// Only one of CurLexer, or CurTokenLexer will be non-null.
753 std::unique_ptr<Lexer> CurLexer;
754
755 /// The current top of the stack that we're lexing from
756 /// if not expanding a macro.
757 ///
758 /// This is an alias for CurLexer.
759 PreprocessorLexer *CurPPLexer = nullptr;
760
761 /// Used to find the current FileEntry, if CurLexer is non-null
762 /// and if applicable.
763 ///
764 /// This allows us to implement \#include_next and find directory-specific
765 /// properties.
766 ConstSearchDirIterator CurDirLookup = nullptr;
767
768 /// The current macro we are expanding, if we are expanding a macro.
769 ///
770 /// One of CurLexer and CurTokenLexer must be null.
771 std::unique_ptr<TokenLexer> CurTokenLexer;
772
773 /// The kind of lexer we're currently working with.
774 typedef bool (*LexerCallback)(Preprocessor &, Token &);
775 LexerCallback CurLexerCallback = &CLK_Lexer;
776
777 /// If the current lexer is for a submodule that is being built, this
778 /// is that submodule.
779 Module *CurLexerSubmodule = nullptr;
780
781 /// Keeps track of the stack of files currently
782 /// \#included, and macros currently being expanded from, not counting
783 /// CurLexer/CurTokenLexer.
784 struct IncludeStackInfo {
785 LexerCallback CurLexerCallback;
786 Module *TheSubmodule;
787 std::unique_ptr<Lexer> TheLexer;
788 PreprocessorLexer *ThePPLexer;
789 std::unique_ptr<TokenLexer> TheTokenLexer;
790 ConstSearchDirIterator TheDirLookup;
791
792 // The following constructors are completely useless copies of the default
793 // versions, only needed to pacify MSVC.
794 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
795 std::unique_ptr<Lexer> &&TheLexer,
796 PreprocessorLexer *ThePPLexer,
797 std::unique_ptr<TokenLexer> &&TheTokenLexer,
798 ConstSearchDirIterator TheDirLookup)
799 : CurLexerCallback(std::move(CurLexerCallback)),
800 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
801 ThePPLexer(std::move(ThePPLexer)),
802 TheTokenLexer(std::move(TheTokenLexer)),
803 TheDirLookup(std::move(TheDirLookup)) {}
804 };
805 std::vector<IncludeStackInfo> IncludeMacroStack;
806
807 /// Actions invoked when some preprocessor activity is
808 /// encountered (e.g. a file is \#included, etc).
809 std::unique_ptr<PPCallbacks> Callbacks;
810
811 struct MacroExpandsInfo {
812 Token Tok;
813 MacroDefinition MD;
814 SourceRange Range;
815
816 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
817 : Tok(Tok), MD(MD), Range(Range) {}
818 };
819 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
820
821 /// Information about a name that has been used to define a module macro.
822 struct ModuleMacroInfo {
823 /// The most recent macro directive for this identifier.
824 MacroDirective *MD;
825
826 /// The active module macros for this identifier.
827 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
828
829 /// The generation number at which we last updated ActiveModuleMacros.
830 /// \see Preprocessor::VisibleModules.
831 unsigned ActiveModuleMacrosGeneration = 0;
832
833 /// Whether this macro name is ambiguous.
834 bool IsAmbiguous = false;
835
836 /// The module macros that are overridden by this macro.
837 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
838
839 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
840 };
841
842 /// The state of a macro for an identifier.
843 class MacroState {
844 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
845
846 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
847 const IdentifierInfo *II) const {
848 if (II->isOutOfDate())
849 PP.updateOutOfDateIdentifier(II: *II);
850 // FIXME: Find a spare bit on IdentifierInfo and store a
851 // HasModuleMacros flag.
852 if (!II->hasMacroDefinition() ||
853 (!PP.getLangOpts().Modules &&
854 !PP.getLangOpts().ModulesLocalVisibility) ||
855 !PP.CurSubmoduleState->VisibleModules.getGeneration())
856 return nullptr;
857
858 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
859 if (!Info) {
860 Info = new (PP.getPreprocessorAllocator())
861 ModuleMacroInfo(State.get<MacroDirective *>());
862 State = Info;
863 }
864
865 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
866 Info->ActiveModuleMacrosGeneration)
867 PP.updateModuleMacroInfo(II, Info&: *Info);
868 return Info;
869 }
870
871 public:
872 MacroState() : MacroState(nullptr) {}
873 MacroState(MacroDirective *MD) : State(MD) {}
874
875 MacroState(MacroState &&O) noexcept : State(O.State) {
876 O.State = (MacroDirective *)nullptr;
877 }
878
879 MacroState &operator=(MacroState &&O) noexcept {
880 auto S = O.State;
881 O.State = (MacroDirective *)nullptr;
882 State = S;
883 return *this;
884 }
885
886 ~MacroState() {
887 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
888 Info->~ModuleMacroInfo();
889 }
890
891 MacroDirective *getLatest() const {
892 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
893 return Info->MD;
894 return State.get<MacroDirective*>();
895 }
896
897 void setLatest(MacroDirective *MD) {
898 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
899 Info->MD = MD;
900 else
901 State = MD;
902 }
903
904 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
905 auto *Info = getModuleInfo(PP, II);
906 return Info ? Info->IsAmbiguous : false;
907 }
908
909 ArrayRef<ModuleMacro *>
910 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
911 if (auto *Info = getModuleInfo(PP, II))
912 return Info->ActiveModuleMacros;
913 return std::nullopt;
914 }
915
916 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
917 SourceManager &SourceMgr) const {
918 // FIXME: Incorporate module macros into the result of this.
919 if (auto *Latest = getLatest())
920 return Latest->findDirectiveAtLoc(L: Loc, SM: SourceMgr);
921 return {};
922 }
923
924 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
925 if (auto *Info = getModuleInfo(PP, II)) {
926 Info->OverriddenMacros.insert(I: Info->OverriddenMacros.end(),
927 From: Info->ActiveModuleMacros.begin(),
928 To: Info->ActiveModuleMacros.end());
929 Info->ActiveModuleMacros.clear();
930 Info->IsAmbiguous = false;
931 }
932 }
933
934 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
935 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
936 return Info->OverriddenMacros;
937 return std::nullopt;
938 }
939
940 void setOverriddenMacros(Preprocessor &PP,
941 ArrayRef<ModuleMacro *> Overrides) {
942 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
943 if (!Info) {
944 if (Overrides.empty())
945 return;
946 Info = new (PP.getPreprocessorAllocator())
947 ModuleMacroInfo(State.get<MacroDirective *>());
948 State = Info;
949 }
950 Info->OverriddenMacros.clear();
951 Info->OverriddenMacros.insert(I: Info->OverriddenMacros.end(),
952 From: Overrides.begin(), To: Overrides.end());
953 Info->ActiveModuleMacrosGeneration = 0;
954 }
955 };
956
957 /// For each IdentifierInfo that was associated with a macro, we
958 /// keep a mapping to the history of all macro definitions and #undefs in
959 /// the reverse order (the latest one is in the head of the list).
960 ///
961 /// This mapping lives within the \p CurSubmoduleState.
962 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
963
964 struct SubmoduleState;
965
966 /// Information about a submodule that we're currently building.
967 struct BuildingSubmoduleInfo {
968 /// The module that we are building.
969 Module *M;
970
971 /// The location at which the module was included.
972 SourceLocation ImportLoc;
973
974 /// Whether we entered this submodule via a pragma.
975 bool IsPragma;
976
977 /// The previous SubmoduleState.
978 SubmoduleState *OuterSubmoduleState;
979
980 /// The number of pending module macro names when we started building this.
981 unsigned OuterPendingModuleMacroNames;
982
983 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
984 SubmoduleState *OuterSubmoduleState,
985 unsigned OuterPendingModuleMacroNames)
986 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
987 OuterSubmoduleState(OuterSubmoduleState),
988 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
989 };
990 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
991
992 /// Information about a submodule's preprocessor state.
993 struct SubmoduleState {
994 /// The macros for the submodule.
995 MacroMap Macros;
996
997 /// The set of modules that are visible within the submodule.
998 VisibleModuleSet VisibleModules;
999
1000 // FIXME: CounterValue?
1001 // FIXME: PragmaPushMacroInfo?
1002 };
1003 std::map<Module *, SubmoduleState> Submodules;
1004
1005 /// The preprocessor state for preprocessing outside of any submodule.
1006 SubmoduleState NullSubmoduleState;
1007
1008 /// The current submodule state. Will be \p NullSubmoduleState if we're not
1009 /// in a submodule.
1010 SubmoduleState *CurSubmoduleState;
1011
1012 /// The files that have been included.
1013 IncludedFilesSet IncludedFiles;
1014
1015 /// The set of top-level modules that affected preprocessing, but were not
1016 /// imported.
1017 llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1018
1019 /// The set of known macros exported from modules.
1020 llvm::FoldingSet<ModuleMacro> ModuleMacros;
1021
1022 /// The names of potential module macros that we've not yet processed.
1023 llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1024
1025 /// The list of module macros, for each identifier, that are not overridden by
1026 /// any other module macro.
1027 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1028 LeafModuleMacros;
1029
1030 /// Macros that we want to warn because they are not used at the end
1031 /// of the translation unit.
1032 ///
1033 /// We store just their SourceLocations instead of
1034 /// something like MacroInfo*. The benefit of this is that when we are
1035 /// deserializing from PCH, we don't need to deserialize identifier & macros
1036 /// just so that we can report that they are unused, we just warn using
1037 /// the SourceLocations of this set (that will be filled by the ASTReader).
1038 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1039 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1040
1041 /// This is a pair of an optional message and source location used for pragmas
1042 /// that annotate macros like pragma clang restrict_expansion and pragma clang
1043 /// deprecated. This pair stores the optional message and the location of the
1044 /// annotation pragma for use producing diagnostics and notes.
1045 using MsgLocationPair = std::pair<std::string, SourceLocation>;
1046
1047 struct MacroAnnotationInfo {
1048 SourceLocation Location;
1049 std::string Message;
1050 };
1051
1052 struct MacroAnnotations {
1053 std::optional<MacroAnnotationInfo> DeprecationInfo;
1054 std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1055 std::optional<SourceLocation> FinalAnnotationLoc;
1056
1057 static MacroAnnotations makeDeprecation(SourceLocation Loc,
1058 std::string Msg) {
1059 return MacroAnnotations{.DeprecationInfo: MacroAnnotationInfo{.Location: Loc, .Message: std::move(Msg)},
1060 .RestrictExpansionInfo: std::nullopt, .FinalAnnotationLoc: std::nullopt};
1061 }
1062
1063 static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
1064 std::string Msg) {
1065 return MacroAnnotations{
1066 .DeprecationInfo: std::nullopt, .RestrictExpansionInfo: MacroAnnotationInfo{.Location: Loc, .Message: std::move(Msg)}, .FinalAnnotationLoc: std::nullopt};
1067 }
1068
1069 static MacroAnnotations makeFinal(SourceLocation Loc) {
1070 return MacroAnnotations{.DeprecationInfo: std::nullopt, .RestrictExpansionInfo: std::nullopt, .FinalAnnotationLoc: Loc};
1071 }
1072 };
1073
1074 /// Warning information for macro annotations.
1075 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1076
1077 /// A "freelist" of MacroArg objects that can be
1078 /// reused for quick allocation.
1079 MacroArgs *MacroArgCache = nullptr;
1080
1081 /// For each IdentifierInfo used in a \#pragma push_macro directive,
1082 /// we keep a MacroInfo stack used to restore the previous macro value.
1083 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1084 PragmaPushMacroInfo;
1085
1086 // Various statistics we track for performance analysis.
1087 unsigned NumDirectives = 0;
1088 unsigned NumDefined = 0;
1089 unsigned NumUndefined = 0;
1090 unsigned NumPragma = 0;
1091 unsigned NumIf = 0;
1092 unsigned NumElse = 0;
1093 unsigned NumEndif = 0;
1094 unsigned NumEnteredSourceFiles = 0;
1095 unsigned MaxIncludeStackDepth = 0;
1096 unsigned NumMacroExpanded = 0;
1097 unsigned NumFnMacroExpanded = 0;
1098 unsigned NumBuiltinMacroExpanded = 0;
1099 unsigned NumFastMacroExpanded = 0;
1100 unsigned NumTokenPaste = 0;
1101 unsigned NumFastTokenPaste = 0;
1102 unsigned NumSkipped = 0;
1103
1104 /// The predefined macros that preprocessor should use from the
1105 /// command line etc.
1106 std::string Predefines;
1107
1108 /// The file ID for the preprocessor predefines.
1109 FileID PredefinesFileID;
1110
1111 /// The file ID for the PCH through header.
1112 FileID PCHThroughHeaderFileID;
1113
1114 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1115 bool SkippingUntilPragmaHdrStop = false;
1116
1117 /// Whether tokens are being skipped until the through header is seen.
1118 bool SkippingUntilPCHThroughHeader = false;
1119
1120 /// \{
1121 /// Cache of macro expanders to reduce malloc traffic.
1122 enum { TokenLexerCacheSize = 8 };
1123 unsigned NumCachedTokenLexers;
1124 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1125 /// \}
1126
1127 /// Keeps macro expanded tokens for TokenLexers.
1128 //
1129 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1130 /// going to lex in the cache and when it finishes the tokens are removed
1131 /// from the end of the cache.
1132 SmallVector<Token, 16> MacroExpandedTokens;
1133 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1134
1135 /// A record of the macro definitions and expansions that
1136 /// occurred during preprocessing.
1137 ///
1138 /// This is an optional side structure that can be enabled with
1139 /// \c createPreprocessingRecord() prior to preprocessing.
1140 PreprocessingRecord *Record = nullptr;
1141
1142 /// Cached tokens state.
1143 using CachedTokensTy = SmallVector<Token, 1>;
1144
1145 /// Cached tokens are stored here when we do backtracking or
1146 /// lookahead. They are "lexed" by the CachingLex() method.
1147 CachedTokensTy CachedTokens;
1148
1149 /// The position of the cached token that CachingLex() should
1150 /// "lex" next.
1151 ///
1152 /// If it points beyond the CachedTokens vector, it means that a normal
1153 /// Lex() should be invoked.
1154 CachedTokensTy::size_type CachedLexPos = 0;
1155
1156 /// Stack of backtrack positions, allowing nested backtracks.
1157 ///
1158 /// The EnableBacktrackAtThisPos() method pushes a position to
1159 /// indicate where CachedLexPos should be set when the BackTrack() method is
1160 /// invoked (at which point the last position is popped).
1161 std::vector<CachedTokensTy::size_type> BacktrackPositions;
1162
1163 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1164 /// This is used to guard against calling this function recursively.
1165 ///
1166 /// See comments at the use-site for more context about why it is needed.
1167 bool SkippingExcludedConditionalBlock = false;
1168
1169 /// Keeps track of skipped range mappings that were recorded while skipping
1170 /// excluded conditional directives. It maps the source buffer pointer at
1171 /// the beginning of a skipped block, to the number of bytes that should be
1172 /// skipped.
1173 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1174
1175 void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1176
1177public:
1178 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
1179 DiagnosticsEngine &diags, const LangOptions &LangOpts,
1180 SourceManager &SM, HeaderSearch &Headers,
1181 ModuleLoader &TheModuleLoader,
1182 IdentifierInfoLookup *IILookup = nullptr,
1183 bool OwnsHeaderSearch = false,
1184 TranslationUnitKind TUKind = TU_Complete);
1185
1186 ~Preprocessor();
1187
1188 /// Initialize the preprocessor using information about the target.
1189 ///
1190 /// \param Target is owned by the caller and must remain valid for the
1191 /// lifetime of the preprocessor.
1192 /// \param AuxTarget is owned by the caller and must remain valid for
1193 /// the lifetime of the preprocessor.
1194 void Initialize(const TargetInfo &Target,
1195 const TargetInfo *AuxTarget = nullptr);
1196
1197 /// Initialize the preprocessor to parse a model file
1198 ///
1199 /// To parse model files the preprocessor of the original source is reused to
1200 /// preserver the identifier table. However to avoid some duplicate
1201 /// information in the preprocessor some cleanup is needed before it is used
1202 /// to parse model files. This method does that cleanup.
1203 void InitializeForModelFile();
1204
1205 /// Cleanup after model file parsing
1206 void FinalizeForModelFile();
1207
1208 /// Retrieve the preprocessor options used to initialize this
1209 /// preprocessor.
1210 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
1211
1212 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1213 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1214
1215 const LangOptions &getLangOpts() const { return LangOpts; }
1216 const TargetInfo &getTargetInfo() const { return *Target; }
1217 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1218 FileManager &getFileManager() const { return FileMgr; }
1219 SourceManager &getSourceManager() const { return SourceMgr; }
1220 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1221
1222 IdentifierTable &getIdentifierTable() { return Identifiers; }
1223 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1224 SelectorTable &getSelectorTable() { return Selectors; }
1225 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1226 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1227
1228 void setExternalSource(ExternalPreprocessorSource *Source) {
1229 ExternalSource = Source;
1230 }
1231
1232 ExternalPreprocessorSource *getExternalSource() const {
1233 return ExternalSource;
1234 }
1235
1236 /// Retrieve the module loader associated with this preprocessor.
1237 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1238
1239 bool hadModuleLoaderFatalFailure() const {
1240 return TheModuleLoader.HadFatalFailure;
1241 }
1242
1243 /// Retrieve the number of Directives that have been processed by the
1244 /// Preprocessor.
1245 unsigned getNumDirectives() const {
1246 return NumDirectives;
1247 }
1248
1249 /// True if we are currently preprocessing a #if or #elif directive
1250 bool isParsingIfOrElifDirective() const {
1251 return ParsingIfOrElifDirective;
1252 }
1253
1254 /// Control whether the preprocessor retains comments in output.
1255 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1256 this->KeepComments = KeepComments | KeepMacroComments;
1257 this->KeepMacroComments = KeepMacroComments;
1258 }
1259
1260 bool getCommentRetentionState() const { return KeepComments; }
1261
1262 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1263 bool getPragmasEnabled() const { return PragmasEnabled; }
1264
1265 void SetSuppressIncludeNotFoundError(bool Suppress) {
1266 SuppressIncludeNotFoundError = Suppress;
1267 }
1268
1269 bool GetSuppressIncludeNotFoundError() {
1270 return SuppressIncludeNotFoundError;
1271 }
1272
1273 /// Sets whether the preprocessor is responsible for producing output or if
1274 /// it is producing tokens to be consumed by Parse and Sema.
1275 void setPreprocessedOutput(bool IsPreprocessedOutput) {
1276 PreprocessedOutput = IsPreprocessedOutput;
1277 }
1278
1279 /// Returns true if the preprocessor is responsible for generating output,
1280 /// false if it is producing tokens to be consumed by Parse and Sema.
1281 bool isPreprocessedOutput() const { return PreprocessedOutput; }
1282
1283 /// Return true if we are lexing directly from the specified lexer.
1284 bool isCurrentLexer(const PreprocessorLexer *L) const {
1285 return CurPPLexer == L;
1286 }
1287
1288 /// Return the current lexer being lexed from.
1289 ///
1290 /// Note that this ignores any potentially active macro expansions and _Pragma
1291 /// expansions going on at the time.
1292 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1293
1294 /// Return the current file lexer being lexed from.
1295 ///
1296 /// Note that this ignores any potentially active macro expansions and _Pragma
1297 /// expansions going on at the time.
1298 PreprocessorLexer *getCurrentFileLexer() const;
1299
1300 /// Return the submodule owning the file being lexed. This may not be
1301 /// the current module if we have changed modules since entering the file.
1302 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1303
1304 /// Returns the FileID for the preprocessor predefines.
1305 FileID getPredefinesFileID() const { return PredefinesFileID; }
1306
1307 /// \{
1308 /// Accessors for preprocessor callbacks.
1309 ///
1310 /// Note that this class takes ownership of any PPCallbacks object given to
1311 /// it.
1312 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1313 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1314 if (Callbacks)
1315 C = std::make_unique<PPChainedCallbacks>(args: std::move(C),
1316 args: std::move(Callbacks));
1317 Callbacks = std::move(C);
1318 }
1319 /// \}
1320
1321 /// Get the number of tokens processed so far.
1322 unsigned getTokenCount() const { return TokenCount; }
1323
1324 /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1325 unsigned getMaxTokens() const { return MaxTokens; }
1326
1327 void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1328 MaxTokens = Value;
1329 MaxTokensOverrideLoc = Loc;
1330 };
1331
1332 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1333
1334 /// Register a function that would be called on each token in the final
1335 /// expanded token stream.
1336 /// This also reports annotation tokens produced by the parser.
1337 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1338 OnToken = std::move(F);
1339 }
1340
1341 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1342
1343 bool isMacroDefined(StringRef Id) {
1344 return isMacroDefined(II: &Identifiers.get(Name: Id));
1345 }
1346 bool isMacroDefined(const IdentifierInfo *II) {
1347 return II->hasMacroDefinition() &&
1348 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1349 }
1350
1351 /// Determine whether II is defined as a macro within the module M,
1352 /// if that is a module that we've already preprocessed. Does not check for
1353 /// macros imported into M.
1354 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1355 if (!II->hasMacroDefinition())
1356 return false;
1357 auto I = Submodules.find(x: M);
1358 if (I == Submodules.end())
1359 return false;
1360 auto J = I->second.Macros.find(Val: II);
1361 if (J == I->second.Macros.end())
1362 return false;
1363 auto *MD = J->second.getLatest();
1364 return MD && MD->isDefined();
1365 }
1366
1367 MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1368 if (!II->hasMacroDefinition())
1369 return {};
1370
1371 MacroState &S = CurSubmoduleState->Macros[II];
1372 auto *MD = S.getLatest();
1373 while (isa_and_nonnull<VisibilityMacroDirective>(Val: MD))
1374 MD = MD->getPrevious();
1375 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(Val: MD),
1376 S.getActiveModuleMacros(PP&: *this, II),
1377 S.isAmbiguous(PP&: *this, II));
1378 }
1379
1380 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1381 SourceLocation Loc) {
1382 if (!II->hadMacroDefinition())
1383 return {};
1384
1385 MacroState &S = CurSubmoduleState->Macros[II];
1386 MacroDirective::DefInfo DI;
1387 if (auto *MD = S.getLatest())
1388 DI = MD->findDirectiveAtLoc(L: Loc, SM: getSourceManager());
1389 // FIXME: Compute the set of active module macros at the specified location.
1390 return MacroDefinition(DI.getDirective(),
1391 S.getActiveModuleMacros(PP&: *this, II),
1392 S.isAmbiguous(PP&: *this, II));
1393 }
1394
1395 /// Given an identifier, return its latest non-imported MacroDirective
1396 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1397 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1398 if (!II->hasMacroDefinition())
1399 return nullptr;
1400
1401 auto *MD = getLocalMacroDirectiveHistory(II);
1402 if (!MD || MD->getDefinition().isUndefined())
1403 return nullptr;
1404
1405 return MD;
1406 }
1407
1408 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1409 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1410 }
1411
1412 MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1413 if (!II->hasMacroDefinition())
1414 return nullptr;
1415 if (auto MD = getMacroDefinition(II))
1416 return MD.getMacroInfo();
1417 return nullptr;
1418 }
1419
1420 /// Given an identifier, return the latest non-imported macro
1421 /// directive for that identifier.
1422 ///
1423 /// One can iterate over all previous macro directives from the most recent
1424 /// one.
1425 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1426
1427 /// Add a directive to the macro directive history for this identifier.
1428 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1429 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1430 SourceLocation Loc) {
1431 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1432 appendMacroDirective(II, MD);
1433 return MD;
1434 }
1435 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1436 MacroInfo *MI) {
1437 return appendDefMacroDirective(II, MI, Loc: MI->getDefinitionLoc());
1438 }
1439
1440 /// Set a MacroDirective that was loaded from a PCH file.
1441 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1442 MacroDirective *MD);
1443
1444 /// Register an exported macro for a module and identifier.
1445 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II,
1446 MacroInfo *Macro,
1447 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1448 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1449
1450 /// Get the list of leaf (non-overridden) module macros for a name.
1451 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1452 if (II->isOutOfDate())
1453 updateOutOfDateIdentifier(II: *II);
1454 auto I = LeafModuleMacros.find(Val: II);
1455 if (I != LeafModuleMacros.end())
1456 return I->second;
1457 return std::nullopt;
1458 }
1459
1460 /// Get the list of submodules that we're currently building.
1461 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1462 return BuildingSubmoduleStack;
1463 }
1464
1465 /// \{
1466 /// Iterators for the macro history table. Currently defined macros have
1467 /// IdentifierInfo::hasMacroDefinition() set and an empty
1468 /// MacroInfo::getUndefLoc() at the head of the list.
1469 using macro_iterator = MacroMap::const_iterator;
1470
1471 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1472 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1473
1474 llvm::iterator_range<macro_iterator>
1475 macros(bool IncludeExternalMacros = true) const {
1476 macro_iterator begin = macro_begin(IncludeExternalMacros);
1477 macro_iterator end = macro_end(IncludeExternalMacros);
1478 return llvm::make_range(x: begin, y: end);
1479 }
1480
1481 /// \}
1482
1483 /// Mark the given clang module as affecting the current clang module or translation unit.
1484 void markClangModuleAsAffecting(Module *M) {
1485 assert(M->isModuleMapModule());
1486 if (!BuildingSubmoduleStack.empty()) {
1487 if (M != BuildingSubmoduleStack.back().M)
1488 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(X: M);
1489 } else {
1490 AffectingClangModules.insert(X: M);
1491 }
1492 }
1493
1494 /// Get the set of top-level clang modules that affected preprocessing, but were not
1495 /// imported.
1496 const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const {
1497 return AffectingClangModules;
1498 }
1499
1500 /// Mark the file as included.
1501 /// Returns true if this is the first time the file was included.
1502 bool markIncluded(FileEntryRef File) {
1503 HeaderInfo.getFileInfo(FE: File);
1504 return IncludedFiles.insert(V: File).second;
1505 }
1506
1507 /// Return true if this header has already been included.
1508 bool alreadyIncluded(FileEntryRef File) const {
1509 HeaderInfo.getFileInfo(FE: File);
1510 return IncludedFiles.count(V: File);
1511 }
1512
1513 /// Get the set of included files.
1514 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1515 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1516
1517 /// Return the name of the macro defined before \p Loc that has
1518 /// spelling \p Tokens. If there are multiple macros with same spelling,
1519 /// return the last one defined.
1520 StringRef getLastMacroWithSpelling(SourceLocation Loc,
1521 ArrayRef<TokenValue> Tokens) const;
1522
1523 /// Get the predefines for this processor.
1524 /// Used by some third-party tools to inspect and add predefines (see
1525 /// https://github.com/llvm/llvm-project/issues/57483).
1526 const std::string &getPredefines() const { return Predefines; }
1527
1528 /// Set the predefines for this Preprocessor.
1529 ///
1530 /// These predefines are automatically injected when parsing the main file.
1531 void setPredefines(std::string P) { Predefines = std::move(P); }
1532
1533 /// Return information about the specified preprocessor
1534 /// identifier token.
1535 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1536 return &Identifiers.get(Name);
1537 }
1538
1539 /// Add the specified pragma handler to this preprocessor.
1540 ///
1541 /// If \p Namespace is non-null, then it is a token required to exist on the
1542 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1543 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1544 void AddPragmaHandler(PragmaHandler *Handler) {
1545 AddPragmaHandler(Namespace: StringRef(), Handler);
1546 }
1547
1548 /// Remove the specific pragma handler from this preprocessor.
1549 ///
1550 /// If \p Namespace is non-null, then it should be the namespace that
1551 /// \p Handler was added to. It is an error to remove a handler that
1552 /// has not been registered.
1553 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1554 void RemovePragmaHandler(PragmaHandler *Handler) {
1555 RemovePragmaHandler(Namespace: StringRef(), Handler);
1556 }
1557
1558 /// Install empty handlers for all pragmas (making them ignored).
1559 void IgnorePragmas();
1560
1561 /// Set empty line handler.
1562 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1563
1564 EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1565
1566 /// Add the specified comment handler to the preprocessor.
1567 void addCommentHandler(CommentHandler *Handler);
1568
1569 /// Remove the specified comment handler.
1570 ///
1571 /// It is an error to remove a handler that has not been registered.
1572 void removeCommentHandler(CommentHandler *Handler);
1573
1574 /// Set the code completion handler to the given object.
1575 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1576 CodeComplete = &Handler;
1577 }
1578
1579 /// Retrieve the current code-completion handler.
1580 CodeCompletionHandler *getCodeCompletionHandler() const {
1581 return CodeComplete;
1582 }
1583
1584 /// Clear out the code completion handler.
1585 void clearCodeCompletionHandler() {
1586 CodeComplete = nullptr;
1587 }
1588
1589 /// Hook used by the lexer to invoke the "included file" code
1590 /// completion point.
1591 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1592
1593 /// Hook used by the lexer to invoke the "natural language" code
1594 /// completion point.
1595 void CodeCompleteNaturalLanguage();
1596
1597 /// Set the code completion token for filtering purposes.
1598 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1599 CodeCompletionII = Filter;
1600 }
1601
1602 /// Set the code completion token range for detecting replacement range later
1603 /// on.
1604 void setCodeCompletionTokenRange(const SourceLocation Start,
1605 const SourceLocation End) {
1606 CodeCompletionTokenRange = {Start, End};
1607 }
1608 SourceRange getCodeCompletionTokenRange() const {
1609 return CodeCompletionTokenRange;
1610 }
1611
1612 /// Get the code completion token for filtering purposes.
1613 StringRef getCodeCompletionFilter() {
1614 if (CodeCompletionII)
1615 return CodeCompletionII->getName();
1616 return {};
1617 }
1618
1619 /// Retrieve the preprocessing record, or NULL if there is no
1620 /// preprocessing record.
1621 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1622
1623 /// Create a new preprocessing record, which will keep track of
1624 /// all macro expansions, macro definitions, etc.
1625 void createPreprocessingRecord();
1626
1627 /// Returns true if the FileEntry is the PCH through header.
1628 bool isPCHThroughHeader(const FileEntry *FE);
1629
1630 /// True if creating a PCH with a through header.
1631 bool creatingPCHWithThroughHeader();
1632
1633 /// True if using a PCH with a through header.
1634 bool usingPCHWithThroughHeader();
1635
1636 /// True if creating a PCH with a #pragma hdrstop.
1637 bool creatingPCHWithPragmaHdrStop();
1638
1639 /// True if using a PCH with a #pragma hdrstop.
1640 bool usingPCHWithPragmaHdrStop();
1641
1642 /// Skip tokens until after the #include of the through header or
1643 /// until after a #pragma hdrstop.
1644 void SkipTokensWhileUsingPCH();
1645
1646 /// Process directives while skipping until the through header or
1647 /// #pragma hdrstop is found.
1648 void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1649 SourceLocation HashLoc);
1650
1651 /// Enter the specified FileID as the main source file,
1652 /// which implicitly adds the builtin defines etc.
1653 void EnterMainSourceFile();
1654
1655 /// Inform the preprocessor callbacks that processing is complete.
1656 void EndSourceFile();
1657
1658 /// Add a source file to the top of the include stack and
1659 /// start lexing tokens from it instead of the current buffer.
1660 ///
1661 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1662 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1663 SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1664
1665 /// Add a Macro to the top of the include stack and start lexing
1666 /// tokens from it instead of the current buffer.
1667 ///
1668 /// \param Args specifies the tokens input to a function-like macro.
1669 /// \param ILEnd specifies the location of the ')' for a function-like macro
1670 /// or the identifier for an object-like macro.
1671 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1672 MacroArgs *Args);
1673
1674private:
1675 /// Add a "macro" context to the top of the include stack,
1676 /// which will cause the lexer to start returning the specified tokens.
1677 ///
1678 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1679 /// will not be subject to further macro expansion. Otherwise, these tokens
1680 /// will be re-macro-expanded when/if expansion is enabled.
1681 ///
1682 /// If \p OwnsTokens is false, this method assumes that the specified stream
1683 /// of tokens has a permanent owner somewhere, so they do not need to be
1684 /// copied. If it is true, it assumes the array of tokens is allocated with
1685 /// \c new[] and the Preprocessor will delete[] it.
1686 ///
1687 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1688 /// set, see the flag documentation for details.
1689 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1690 bool DisableMacroExpansion, bool OwnsTokens,
1691 bool IsReinject);
1692
1693public:
1694 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1695 bool DisableMacroExpansion, bool IsReinject) {
1696 EnterTokenStream(Toks: Toks.release(), NumToks, DisableMacroExpansion, OwnsTokens: true,
1697 IsReinject);
1698 }
1699
1700 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1701 bool IsReinject) {
1702 EnterTokenStream(Toks: Toks.data(), NumToks: Toks.size(), DisableMacroExpansion, OwnsTokens: false,
1703 IsReinject);
1704 }
1705
1706 /// Pop the current lexer/macro exp off the top of the lexer stack.
1707 ///
1708 /// This should only be used in situations where the current state of the
1709 /// top-of-stack lexer is known.
1710 void RemoveTopOfLexerStack();
1711
1712 /// From the point that this method is called, and until
1713 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1714 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1715 /// make the Preprocessor re-lex the same tokens.
1716 ///
1717 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1718 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1719 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1720 ///
1721 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1722 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1723 /// tokens will continue indefinitely.
1724 ///
1725 void EnableBacktrackAtThisPos();
1726
1727 /// Disable the last EnableBacktrackAtThisPos call.
1728 void CommitBacktrackedTokens();
1729
1730 /// Make Preprocessor re-lex the tokens that were lexed since
1731 /// EnableBacktrackAtThisPos() was previously called.
1732 void Backtrack();
1733
1734 /// True if EnableBacktrackAtThisPos() was called and
1735 /// caching of tokens is on.
1736 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1737
1738 /// Lex the next token for this preprocessor.
1739 void Lex(Token &Result);
1740
1741 /// Lex all tokens for this preprocessor until (and excluding) end of file.
1742 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1743
1744 /// Lex a token, forming a header-name token if possible.
1745 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1746
1747 /// Lex the parameters for an #embed directive, returns nullopt on error.
1748 std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1749 bool ForHasEmbed);
1750
1751 bool LexAfterModuleImport(Token &Result);
1752 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1753
1754 void makeModuleVisible(Module *M, SourceLocation Loc);
1755
1756 SourceLocation getModuleImportLoc(Module *M) const {
1757 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1758 }
1759
1760 /// Lex a string literal, which may be the concatenation of multiple
1761 /// string literals and may even come from macro expansion.
1762 /// \returns true on success, false if a error diagnostic has been generated.
1763 bool LexStringLiteral(Token &Result, std::string &String,
1764 const char *DiagnosticTag, bool AllowMacroExpansion) {
1765 if (AllowMacroExpansion)
1766 Lex(Result);
1767 else
1768 LexUnexpandedToken(Result);
1769 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1770 AllowMacroExpansion);
1771 }
1772
1773 /// Complete the lexing of a string literal where the first token has
1774 /// already been lexed (see LexStringLiteral).
1775 bool FinishLexStringLiteral(Token &Result, std::string &String,
1776 const char *DiagnosticTag,
1777 bool AllowMacroExpansion);
1778
1779 /// Lex a token. If it's a comment, keep lexing until we get
1780 /// something not a comment.
1781 ///
1782 /// This is useful in -E -C mode where comments would foul up preprocessor
1783 /// directive handling.
1784 void LexNonComment(Token &Result) {
1785 do
1786 Lex(Result);
1787 while (Result.getKind() == tok::comment);
1788 }
1789
1790 /// Just like Lex, but disables macro expansion of identifier tokens.
1791 void LexUnexpandedToken(Token &Result) {
1792 // Disable macro expansion.
1793 bool OldVal = DisableMacroExpansion;
1794 DisableMacroExpansion = true;
1795 // Lex the token.
1796 Lex(Result);
1797
1798 // Reenable it.
1799 DisableMacroExpansion = OldVal;
1800 }
1801
1802 /// Like LexNonComment, but this disables macro expansion of
1803 /// identifier tokens.
1804 void LexUnexpandedNonComment(Token &Result) {
1805 do
1806 LexUnexpandedToken(Result);
1807 while (Result.getKind() == tok::comment);
1808 }
1809
1810 /// Parses a simple integer literal to get its numeric value. Floating
1811 /// point literals and user defined literals are rejected. Used primarily to
1812 /// handle pragmas that accept integer arguments.
1813 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1814
1815 /// Disables macro expansion everywhere except for preprocessor directives.
1816 void SetMacroExpansionOnlyInDirectives() {
1817 DisableMacroExpansion = true;
1818 MacroExpansionInDirectivesOverride = true;
1819 }
1820
1821 /// Peeks ahead N tokens and returns that token without consuming any
1822 /// tokens.
1823 ///
1824 /// LookAhead(0) returns the next token that would be returned by Lex(),
1825 /// LookAhead(1) returns the token after it, etc. This returns normal
1826 /// tokens after phase 5. As such, it is equivalent to using
1827 /// 'Lex', not 'LexUnexpandedToken'.
1828 const Token &LookAhead(unsigned N) {
1829 assert(LexLevel == 0 && "cannot use lookahead while lexing");
1830 if (CachedLexPos + N < CachedTokens.size())
1831 return CachedTokens[CachedLexPos+N];
1832 else
1833 return PeekAhead(N: N+1);
1834 }
1835
1836 /// When backtracking is enabled and tokens are cached,
1837 /// this allows to revert a specific number of tokens.
1838 ///
1839 /// Note that the number of tokens being reverted should be up to the last
1840 /// backtrack position, not more.
1841 void RevertCachedTokens(unsigned N) {
1842 assert(isBacktrackEnabled() &&
1843 "Should only be called when tokens are cached for backtracking");
1844 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1845 && "Should revert tokens up to the last backtrack position, not more");
1846 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1847 "Corrupted backtrack positions ?");
1848 CachedLexPos -= N;
1849 }
1850
1851 /// Enters a token in the token stream to be lexed next.
1852 ///
1853 /// If BackTrack() is called afterwards, the token will remain at the
1854 /// insertion point.
1855 /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1856 /// flag set. See the flag documentation for details.
1857 void EnterToken(const Token &Tok, bool IsReinject) {
1858 if (LexLevel) {
1859 // It's not correct in general to enter caching lex mode while in the
1860 // middle of a nested lexing action.
1861 auto TokCopy = std::make_unique<Token[]>(num: 1);
1862 TokCopy[0] = Tok;
1863 EnterTokenStream(Toks: std::move(TokCopy), NumToks: 1, DisableMacroExpansion: true, IsReinject);
1864 } else {
1865 EnterCachingLexMode();
1866 assert(IsReinject && "new tokens in the middle of cached stream");
1867 CachedTokens.insert(I: CachedTokens.begin()+CachedLexPos, Elt: Tok);
1868 }
1869 }
1870
1871 /// We notify the Preprocessor that if it is caching tokens (because
1872 /// backtrack is enabled) it should replace the most recent cached tokens
1873 /// with the given annotation token. This function has no effect if
1874 /// backtracking is not enabled.
1875 ///
1876 /// Note that the use of this function is just for optimization, so that the
1877 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1878 /// invoked.
1879 void AnnotateCachedTokens(const Token &Tok) {
1880 assert(Tok.isAnnotation() && "Expected annotation token");
1881 if (CachedLexPos != 0 && isBacktrackEnabled())
1882 AnnotatePreviousCachedTokens(Tok);
1883 }
1884
1885 /// Get the location of the last cached token, suitable for setting the end
1886 /// location of an annotation token.
1887 SourceLocation getLastCachedTokenLocation() const {
1888 assert(CachedLexPos != 0);
1889 return CachedTokens[CachedLexPos-1].getLastLoc();
1890 }
1891
1892 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1893 /// CachedTokens.
1894 bool IsPreviousCachedToken(const Token &Tok) const;
1895
1896 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1897 /// in \p NewToks.
1898 ///
1899 /// Useful when a token needs to be split in smaller ones and CachedTokens
1900 /// most recent token must to be updated to reflect that.
1901 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1902
1903 /// Replace the last token with an annotation token.
1904 ///
1905 /// Like AnnotateCachedTokens(), this routine replaces an
1906 /// already-parsed (and resolved) token with an annotation
1907 /// token. However, this routine only replaces the last token with
1908 /// the annotation token; it does not affect any other cached
1909 /// tokens. This function has no effect if backtracking is not
1910 /// enabled.
1911 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1912 assert(Tok.isAnnotation() && "Expected annotation token");
1913 if (CachedLexPos != 0 && isBacktrackEnabled())
1914 CachedTokens[CachedLexPos-1] = Tok;
1915 }
1916
1917 /// Enter an annotation token into the token stream.
1918 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1919 void *AnnotationVal);
1920
1921 /// Determine whether it's possible for a future call to Lex to produce an
1922 /// annotation token created by a previous call to EnterAnnotationToken.
1923 bool mightHavePendingAnnotationTokens() {
1924 return CurLexerCallback != CLK_Lexer;
1925 }
1926
1927 /// Update the current token to represent the provided
1928 /// identifier, in order to cache an action performed by typo correction.
1929 void TypoCorrectToken(const Token &Tok) {
1930 assert(Tok.getIdentifierInfo() && "Expected identifier token");
1931 if (CachedLexPos != 0 && isBacktrackEnabled())
1932 CachedTokens[CachedLexPos-1] = Tok;
1933 }
1934
1935 /// Recompute the current lexer kind based on the CurLexer/
1936 /// CurTokenLexer pointers.
1937 void recomputeCurLexerKind();
1938
1939 /// Returns true if incremental processing is enabled
1940 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1941
1942 /// Enables the incremental processing
1943 void enableIncrementalProcessing(bool value = true) {
1944 IncrementalProcessing = value;
1945 }
1946
1947 /// Specify the point at which code-completion will be performed.
1948 ///
1949 /// \param File the file in which code completion should occur. If
1950 /// this file is included multiple times, code-completion will
1951 /// perform completion the first time it is included. If NULL, this
1952 /// function clears out the code-completion point.
1953 ///
1954 /// \param Line the line at which code completion should occur
1955 /// (1-based).
1956 ///
1957 /// \param Column the column at which code completion should occur
1958 /// (1-based).
1959 ///
1960 /// \returns true if an error occurred, false otherwise.
1961 bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line,
1962 unsigned Column);
1963
1964 /// Determine if we are performing code completion.
1965 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1966
1967 /// Returns the location of the code-completion point.
1968 ///
1969 /// Returns an invalid location if code-completion is not enabled or the file
1970 /// containing the code-completion point has not been lexed yet.
1971 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1972
1973 /// Returns the start location of the file of code-completion point.
1974 ///
1975 /// Returns an invalid location if code-completion is not enabled or the file
1976 /// containing the code-completion point has not been lexed yet.
1977 SourceLocation getCodeCompletionFileLoc() const {
1978 return CodeCompletionFileLoc;
1979 }
1980
1981 /// Returns true if code-completion is enabled and we have hit the
1982 /// code-completion point.
1983 bool isCodeCompletionReached() const { return CodeCompletionReached; }
1984
1985 /// Note that we hit the code-completion point.
1986 void setCodeCompletionReached() {
1987 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1988 CodeCompletionReached = true;
1989 // Silence any diagnostics that occur after we hit the code-completion.
1990 getDiagnostics().setSuppressAllDiagnostics(true);
1991 }
1992
1993 /// The location of the currently-active \#pragma clang
1994 /// arc_cf_code_audited begin.
1995 ///
1996 /// Returns an invalid location if there is no such pragma active.
1997 std::pair<IdentifierInfo *, SourceLocation>
1998 getPragmaARCCFCodeAuditedInfo() const {
1999 return PragmaARCCFCodeAuditedInfo;
2000 }
2001
2002 /// Set the location of the currently-active \#pragma clang
2003 /// arc_cf_code_audited begin. An invalid location ends the pragma.
2004 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
2005 SourceLocation Loc) {
2006 PragmaARCCFCodeAuditedInfo = {Ident, Loc};
2007 }
2008
2009 /// The location of the currently-active \#pragma clang
2010 /// assume_nonnull begin.
2011 ///
2012 /// Returns an invalid location if there is no such pragma active.
2013 SourceLocation getPragmaAssumeNonNullLoc() const {
2014 return PragmaAssumeNonNullLoc;
2015 }
2016
2017 /// Set the location of the currently-active \#pragma clang
2018 /// assume_nonnull begin. An invalid location ends the pragma.
2019 void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
2020 PragmaAssumeNonNullLoc = Loc;
2021 }
2022
2023 /// Get the location of the recorded unterminated \#pragma clang
2024 /// assume_nonnull begin in the preamble, if one exists.
2025 ///
2026 /// Returns an invalid location if the premable did not end with
2027 /// such a pragma active or if there is no recorded preamble.
2028 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
2029 return PreambleRecordedPragmaAssumeNonNullLoc;
2030 }
2031
2032 /// Record the location of the unterminated \#pragma clang
2033 /// assume_nonnull begin in the preamble.
2034 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
2035 PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2036 }
2037
2038 /// Set the directory in which the main file should be considered
2039 /// to have been found, if it is not a real file.
2040 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2041
2042 /// Instruct the preprocessor to skip part of the main source file.
2043 ///
2044 /// \param Bytes The number of bytes in the preamble to skip.
2045 ///
2046 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2047 /// start of a line.
2048 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2049 SkipMainFilePreamble.first = Bytes;
2050 SkipMainFilePreamble.second = StartOfLine;
2051 }
2052
2053 /// Forwarding function for diagnostics. This emits a diagnostic at
2054 /// the specified Token's location, translating the token's start
2055 /// position in the current buffer into a SourcePosition object for rendering.
2056 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2057 return Diags->Report(Loc, DiagID);
2058 }
2059
2060 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2061 return Diags->Report(Loc: Tok.getLocation(), DiagID);
2062 }
2063
2064 /// Return the 'spelling' of the token at the given
2065 /// location; does not go up to the spelling location or down to the
2066 /// expansion location.
2067 ///
2068 /// \param buffer A buffer which will be used only if the token requires
2069 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
2070 /// \param invalid If non-null, will be set \c true if an error occurs.
2071 StringRef getSpelling(SourceLocation loc,
2072 SmallVectorImpl<char> &buffer,
2073 bool *invalid = nullptr) const {
2074 return Lexer::getSpelling(loc, buffer, SM: SourceMgr, options: LangOpts, invalid);
2075 }
2076
2077 /// Return the 'spelling' of the Tok token.
2078 ///
2079 /// The spelling of a token is the characters used to represent the token in
2080 /// the source file after trigraph expansion and escaped-newline folding. In
2081 /// particular, this wants to get the true, uncanonicalized, spelling of
2082 /// things like digraphs, UCNs, etc.
2083 ///
2084 /// \param Invalid If non-null, will be set \c true if an error occurs.
2085 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2086 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2087 }
2088
2089 /// Get the spelling of a token into a preallocated buffer, instead
2090 /// of as an std::string.
2091 ///
2092 /// The caller is required to allocate enough space for the token, which is
2093 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2094 /// actual result is returned.
2095 ///
2096 /// Note that this method may do two possible things: it may either fill in
2097 /// the buffer specified with characters, or it may *change the input pointer*
2098 /// to point to a constant buffer with the data already in it (avoiding a
2099 /// copy). The caller is not allowed to modify the returned buffer pointer
2100 /// if an internal buffer is returned.
2101 unsigned getSpelling(const Token &Tok, const char *&Buffer,
2102 bool *Invalid = nullptr) const {
2103 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2104 }
2105
2106 /// Get the spelling of a token into a SmallVector.
2107 ///
2108 /// Note that the returned StringRef may not point to the
2109 /// supplied buffer if a copy can be avoided.
2110 StringRef getSpelling(const Token &Tok,
2111 SmallVectorImpl<char> &Buffer,
2112 bool *Invalid = nullptr) const;
2113
2114 /// Relex the token at the specified location.
2115 /// \returns true if there was a failure, false on success.
2116 bool getRawToken(SourceLocation Loc, Token &Result,
2117 bool IgnoreWhiteSpace = false) {
2118 return Lexer::getRawToken(Loc, Result, SM: SourceMgr, LangOpts, IgnoreWhiteSpace);
2119 }
2120
2121 /// Given a Token \p Tok that is a numeric constant with length 1,
2122 /// return the character.
2123 char
2124 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
2125 bool *Invalid = nullptr) const {
2126 assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2127 Tok.getLength() == 1 && "Called on unsupported token");
2128 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2129
2130 // If the token is carrying a literal data pointer, just use it.
2131 if (const char *D = Tok.getLiteralData())
2132 return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2133
2134 assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2135 // Otherwise, fall back on getCharacterData, which is slower, but always
2136 // works.
2137 return *SourceMgr.getCharacterData(SL: Tok.getLocation(), Invalid) - '0';
2138 }
2139
2140 /// Retrieve the name of the immediate macro expansion.
2141 ///
2142 /// This routine starts from a source location, and finds the name of the
2143 /// macro responsible for its immediate expansion. It looks through any
2144 /// intervening macro argument expansions to compute this. It returns a
2145 /// StringRef that refers to the SourceManager-owned buffer of the source
2146 /// where that macro name is spelled. Thus, the result shouldn't out-live
2147 /// the SourceManager.
2148 StringRef getImmediateMacroName(SourceLocation Loc) {
2149 return Lexer::getImmediateMacroName(Loc, SM: SourceMgr, LangOpts: getLangOpts());
2150 }
2151
2152 /// Plop the specified string into a scratch buffer and set the
2153 /// specified token's location and length to it.
2154 ///
2155 /// If specified, the source location provides a location of the expansion
2156 /// point of the token.
2157 void CreateString(StringRef Str, Token &Tok,
2158 SourceLocation ExpansionLocStart = SourceLocation(),
2159 SourceLocation ExpansionLocEnd = SourceLocation());
2160
2161 /// Split the first Length characters out of the token starting at TokLoc
2162 /// and return a location pointing to the split token. Re-lexing from the
2163 /// split token will return the split token rather than the original.
2164 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2165
2166 /// Computes the source location just past the end of the
2167 /// token at this source location.
2168 ///
2169 /// This routine can be used to produce a source location that
2170 /// points just past the end of the token referenced by \p Loc, and
2171 /// is generally used when a diagnostic needs to point just after a
2172 /// token where it expected something different that it received. If
2173 /// the returned source location would not be meaningful (e.g., if
2174 /// it points into a macro), this routine returns an invalid
2175 /// source location.
2176 ///
2177 /// \param Offset an offset from the end of the token, where the source
2178 /// location should refer to. The default offset (0) produces a source
2179 /// location pointing just past the end of the token; an offset of 1 produces
2180 /// a source location pointing to the last character in the token, etc.
2181 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2182 return Lexer::getLocForEndOfToken(Loc, Offset, SM: SourceMgr, LangOpts);
2183 }
2184
2185 /// Returns true if the given MacroID location points at the first
2186 /// token of the macro expansion.
2187 ///
2188 /// \param MacroBegin If non-null and function returns true, it is set to
2189 /// begin location of the macro.
2190 bool isAtStartOfMacroExpansion(SourceLocation loc,
2191 SourceLocation *MacroBegin = nullptr) const {
2192 return Lexer::isAtStartOfMacroExpansion(loc, SM: SourceMgr, LangOpts,
2193 MacroBegin);
2194 }
2195
2196 /// Returns true if the given MacroID location points at the last
2197 /// token of the macro expansion.
2198 ///
2199 /// \param MacroEnd If non-null and function returns true, it is set to
2200 /// end location of the macro.
2201 bool isAtEndOfMacroExpansion(SourceLocation loc,
2202 SourceLocation *MacroEnd = nullptr) const {
2203 return Lexer::isAtEndOfMacroExpansion(loc, SM: SourceMgr, LangOpts, MacroEnd);
2204 }
2205
2206 /// Print the token to stderr, used for debugging.
2207 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2208 void DumpLocation(SourceLocation Loc) const;
2209 void DumpMacro(const MacroInfo &MI) const;
2210 void dumpMacroInfo(const IdentifierInfo *II);
2211
2212 /// Given a location that specifies the start of a
2213 /// token, return a new location that specifies a character within the token.
2214 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2215 unsigned Char) const {
2216 return Lexer::AdvanceToTokenCharacter(TokStart, Characters: Char, SM: SourceMgr, LangOpts);
2217 }
2218
2219 /// Increment the counters for the number of token paste operations
2220 /// performed.
2221 ///
2222 /// If fast was specified, this is a 'fast paste' case we handled.
2223 void IncrementPasteCounter(bool isFast) {
2224 if (isFast)
2225 ++NumFastTokenPaste;
2226 else
2227 ++NumTokenPaste;
2228 }
2229
2230 void PrintStats();
2231
2232 size_t getTotalMemory() const;
2233
2234 /// When the macro expander pastes together a comment (/##/) in Microsoft
2235 /// mode, this method handles updating the current state, returning the
2236 /// token on the next source line.
2237 void HandleMicrosoftCommentPaste(Token &Tok);
2238
2239 //===--------------------------------------------------------------------===//
2240 // Preprocessor callback methods. These are invoked by a lexer as various
2241 // directives and events are found.
2242
2243 /// Given a tok::raw_identifier token, look up the
2244 /// identifier information for the token and install it into the token,
2245 /// updating the token kind accordingly.
2246 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2247
2248private:
2249 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2250
2251public:
2252 /// Specifies the reason for poisoning an identifier.
2253 ///
2254 /// If that identifier is accessed while poisoned, then this reason will be
2255 /// used instead of the default "poisoned" diagnostic.
2256 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2257
2258 /// Display reason for poisoned identifier.
2259 void HandlePoisonedIdentifier(Token & Identifier);
2260
2261 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2262 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2263 if(II->isPoisoned()) {
2264 HandlePoisonedIdentifier(Identifier);
2265 }
2266 }
2267 }
2268
2269private:
2270 /// Identifiers used for SEH handling in Borland. These are only
2271 /// allowed in particular circumstances
2272 // __except block
2273 IdentifierInfo *Ident__exception_code,
2274 *Ident___exception_code,
2275 *Ident_GetExceptionCode;
2276 // __except filter expression
2277 IdentifierInfo *Ident__exception_info,
2278 *Ident___exception_info,
2279 *Ident_GetExceptionInfo;
2280 // __finally
2281 IdentifierInfo *Ident__abnormal_termination,
2282 *Ident___abnormal_termination,
2283 *Ident_AbnormalTermination;
2284
2285 const char *getCurLexerEndPos();
2286 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2287
2288public:
2289 void PoisonSEHIdentifiers(bool Poison = true); // Borland
2290
2291 /// Callback invoked when the lexer reads an identifier and has
2292 /// filled in the tokens IdentifierInfo member.
2293 ///
2294 /// This callback potentially macro expands it or turns it into a named
2295 /// token (like 'for').
2296 ///
2297 /// \returns true if we actually computed a token, false if we need to
2298 /// lex again.
2299 bool HandleIdentifier(Token &Identifier);
2300
2301 /// Callback invoked when the lexer hits the end of the current file.
2302 ///
2303 /// This either returns the EOF token and returns true, or
2304 /// pops a level off the include stack and returns false, at which point the
2305 /// client should call lex again.
2306 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2307
2308 /// Callback invoked when the current TokenLexer hits the end of its
2309 /// token stream.
2310 bool HandleEndOfTokenLexer(Token &Result);
2311
2312 /// Callback invoked when the lexer sees a # token at the start of a
2313 /// line.
2314 ///
2315 /// This consumes the directive, modifies the lexer/preprocessor state, and
2316 /// advances the lexer(s) so that the next token read is the correct one.
2317 void HandleDirective(Token &Result);
2318
2319 /// Ensure that the next token is a tok::eod token.
2320 ///
2321 /// If not, emit a diagnostic and consume up until the eod.
2322 /// If \p EnableMacros is true, then we consider macros that expand to zero
2323 /// tokens as being ok.
2324 ///
2325 /// \return The location of the end of the directive (the terminating
2326 /// newline).
2327 SourceLocation CheckEndOfDirective(const char *DirType,
2328 bool EnableMacros = false);
2329
2330 /// Read and discard all tokens remaining on the current line until
2331 /// the tok::eod token is found. Returns the range of the skipped tokens.
2332 SourceRange DiscardUntilEndOfDirective() {
2333 Token Tmp;
2334 return DiscardUntilEndOfDirective(Tok&: Tmp);
2335 }
2336
2337 /// Same as above except retains the token that was found.
2338 SourceRange DiscardUntilEndOfDirective(Token &Tok);
2339
2340 /// Returns true if the preprocessor has seen a use of
2341 /// __DATE__ or __TIME__ in the file so far.
2342 bool SawDateOrTime() const {
2343 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2344 }
2345 unsigned getCounterValue() const { return CounterValue; }
2346 void setCounterValue(unsigned V) { CounterValue = V; }
2347
2348 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2349 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2350 "FPEvalMethod should be set either from command line or from the "
2351 "target info");
2352 return CurrentFPEvalMethod;
2353 }
2354
2355 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2356 return TUFPEvalMethod;
2357 }
2358
2359 SourceLocation getLastFPEvalPragmaLocation() const {
2360 return LastFPEvalPragmaLocation;
2361 }
2362
2363 void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2364 LangOptions::FPEvalMethodKind Val) {
2365 assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2366 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2367 // This is the location of the '#pragma float_control" where the
2368 // execution state is modifed.
2369 LastFPEvalPragmaLocation = PragmaLoc;
2370 CurrentFPEvalMethod = Val;
2371 TUFPEvalMethod = Val;
2372 }
2373
2374 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2375 assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2376 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2377 TUFPEvalMethod = Val;
2378 }
2379
2380 /// Retrieves the module that we're currently building, if any.
2381 Module *getCurrentModule();
2382
2383 /// Retrieves the module whose implementation we're current compiling, if any.
2384 Module *getCurrentModuleImplementation();
2385
2386 /// If we are preprocessing a named module.
2387 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2388
2389 /// If we are proprocessing a named interface unit.
2390 /// Note that a module implementation partition is not considered as an
2391 /// named interface unit here although it is importable
2392 /// to ease the parsing.
2393 bool isInNamedInterfaceUnit() const {
2394 return ModuleDeclState.isNamedInterface();
2395 }
2396
2397 /// Get the named module name we're preprocessing.
2398 /// Requires we're preprocessing a named module.
2399 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2400
2401 /// If we are implementing an implementation module unit.
2402 /// Note that the module implementation partition is not considered as an
2403 /// implementation unit.
2404 bool isInImplementationUnit() const {
2405 return ModuleDeclState.isImplementationUnit();
2406 }
2407
2408 /// If we're importing a standard C++20 Named Modules.
2409 bool isInImportingCXXNamedModules() const {
2410 // NamedModuleImportPath will be non-empty only if we're importing
2411 // Standard C++ named modules.
2412 return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2413 !IsAtImport;
2414 }
2415
2416 /// Allocate a new MacroInfo object with the provided SourceLocation.
2417 MacroInfo *AllocateMacroInfo(SourceLocation L);
2418
2419 /// Turn the specified lexer token into a fully checked and spelled
2420 /// filename, e.g. as an operand of \#include.
2421 ///
2422 /// The caller is expected to provide a buffer that is large enough to hold
2423 /// the spelling of the filename, but is also expected to handle the case
2424 /// when this method decides to use a different buffer.
2425 ///
2426 /// \returns true if the input filename was in <>'s or false if it was
2427 /// in ""'s.
2428 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2429
2430 /// Given a "foo" or \<foo> reference, look up the indicated file.
2431 ///
2432 /// Returns std::nullopt on failure. \p isAngled indicates whether the file
2433 /// reference is for system \#include's or not (i.e. using <> instead of "").
2434 OptionalFileEntryRef
2435 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2436 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2437 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2438 SmallVectorImpl<char> *RelativePath,
2439 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2440 bool *IsFrameworkFound, bool SkipCache = false,
2441 bool OpenFile = true, bool CacheFailures = true);
2442
2443 /// Given a "Filename" or \<Filename> reference, look up the indicated embed
2444 /// resource. \p isAngled indicates whether the file reference is for
2445 /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2446 /// is true, the file looked up is opened for reading, otherwise it only
2447 /// validates that the file exists. Quoted filenames are looked up relative
2448 /// to \p LookupFromFile if it is nonnull.
2449 ///
2450 /// Returns std::nullopt on failure.
2451 OptionalFileEntryRef
2452 LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
2453 const FileEntry *LookupFromFile = nullptr);
2454
2455 /// Return true if we're in the top-level file, not in a \#include.
2456 bool isInPrimaryFile() const;
2457
2458 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2459 /// followed by EOD. Return true if the token is not a valid on-off-switch.
2460 bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2461
2462 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2463 bool *ShadowFlag = nullptr);
2464
2465 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2466 Module *LeaveSubmodule(bool ForPragma);
2467
2468private:
2469 friend void TokenLexer::ExpandFunctionArguments();
2470
2471 void PushIncludeMacroStack() {
2472 assert(CurLexerCallback != CLK_CachingLexer &&
2473 "cannot push a caching lexer");
2474 IncludeMacroStack.emplace_back(args&: CurLexerCallback, args&: CurLexerSubmodule,
2475 args: std::move(CurLexer), args&: CurPPLexer,
2476 args: std::move(CurTokenLexer), args&: CurDirLookup);
2477 CurPPLexer = nullptr;
2478 }
2479
2480 void PopIncludeMacroStack() {
2481 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2482 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2483 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2484 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
2485 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2486 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2487 IncludeMacroStack.pop_back();
2488 }
2489
2490 void PropagateLineStartLeadingSpaceInfo(Token &Result);
2491
2492 /// Determine whether we need to create module macros for #defines in the
2493 /// current context.
2494 bool needModuleMacros() const;
2495
2496 /// Update the set of active module macros and ambiguity flag for a module
2497 /// macro name.
2498 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2499
2500 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2501 SourceLocation Loc);
2502 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2503 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2504 bool isPublic);
2505
2506 /// Lex and validate a macro name, which occurs after a
2507 /// \#define or \#undef.
2508 ///
2509 /// \param MacroNameTok Token that represents the name defined or undefined.
2510 /// \param IsDefineUndef Kind if preprocessor directive.
2511 /// \param ShadowFlag Points to flag that is set if macro name shadows
2512 /// a keyword.
2513 ///
2514 /// This emits a diagnostic, sets the token kind to eod,
2515 /// and discards the rest of the macro line if the macro name is invalid.
2516 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2517 bool *ShadowFlag = nullptr);
2518
2519 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2520 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2521 /// doing so performs certain validity checks including (but not limited to):
2522 /// - # (stringization) is followed by a macro parameter
2523 /// \param MacroNameTok - Token that represents the macro name
2524 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2525 ///
2526 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2527 /// returns a nullptr if an invalid sequence of tokens is encountered.
2528 MacroInfo *ReadOptionalMacroParameterListAndBody(
2529 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2530
2531 /// The ( starting an argument list of a macro definition has just been read.
2532 /// Lex the rest of the parameters and the closing ), updating \p MI with
2533 /// what we learn and saving in \p LastTok the last token read.
2534 /// Return true if an error occurs parsing the arg list.
2535 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2536
2537 /// Provide a suggestion for a typoed directive. If there is no typo, then
2538 /// just skip suggesting.
2539 ///
2540 /// \param Tok - Token that represents the directive
2541 /// \param Directive - String reference for the directive name
2542 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2543
2544 /// We just read a \#if or related directive and decided that the
2545 /// subsequent tokens are in the \#if'd out portion of the
2546 /// file. Lex the rest of the file, until we see an \#endif. If \p
2547 /// FoundNonSkipPortion is true, then we have already emitted code for part of
2548 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2549 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2550 /// already seen one so a \#else directive is a duplicate. When this returns,
2551 /// the caller can lex the first valid token.
2552 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2553 SourceLocation IfTokenLoc,
2554 bool FoundNonSkipPortion, bool FoundElse,
2555 SourceLocation ElseLoc = SourceLocation());
2556
2557 /// Information about the result for evaluating an expression for a
2558 /// preprocessor directive.
2559 struct DirectiveEvalResult {
2560 /// The integral value of the expression.
2561 std::optional<llvm::APSInt> Value;
2562
2563 /// Whether the expression was evaluated as true or not.
2564 bool Conditional;
2565
2566 /// True if the expression contained identifiers that were undefined.
2567 bool IncludedUndefinedIds;
2568
2569 /// The source range for the expression.
2570 SourceRange ExprRange;
2571 };
2572
2573 /// Evaluate an integer constant expression that may occur after a
2574 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2575 ///
2576 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2577 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2578 bool CheckForEoD = true);
2579
2580 /// Evaluate an integer constant expression that may occur after a
2581 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2582 ///
2583 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2584 /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2585 /// in the evaluated expression or not.
2586 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2587 Token &Tok,
2588 bool &EvaluatedDefined,
2589 bool CheckForEoD = true);
2590
2591 /// Process a '__has_embed("path" [, ...])' expression.
2592 ///
2593 /// Returns predefined `__STDC_EMBED_*` macro values if
2594 /// successful.
2595 EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2596
2597 /// Process a '__has_include("path")' expression.
2598 ///
2599 /// Returns true if successful.
2600 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2601
2602 /// Process '__has_include_next("path")' expression.
2603 ///
2604 /// Returns true if successful.
2605 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2606
2607 /// Get the directory and file from which to start \#include_next lookup.
2608 std::pair<ConstSearchDirIterator, const FileEntry *>
2609 getIncludeNextStart(const Token &IncludeNextTok) const;
2610
2611 /// Install the standard preprocessor pragmas:
2612 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2613 void RegisterBuiltinPragmas();
2614
2615 /// Register builtin macros such as __LINE__ with the identifier table.
2616 void RegisterBuiltinMacros();
2617
2618 /// If an identifier token is read that is to be expanded as a macro, handle
2619 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2620 /// otherwise the caller should lex again.
2621 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2622
2623 /// Cache macro expanded tokens for TokenLexers.
2624 //
2625 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2626 /// going to lex in the cache and when it finishes the tokens are removed
2627 /// from the end of the cache.
2628 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2629 ArrayRef<Token> tokens);
2630
2631 void removeCachedMacroExpandedTokensOfLastLexer();
2632
2633 /// Determine whether the next preprocessor token to be
2634 /// lexed is a '('. If so, consume the token and return true, if not, this
2635 /// method should have no observable side-effect on the lexed tokens.
2636 bool isNextPPTokenLParen();
2637
2638 /// After reading "MACRO(", this method is invoked to read all of the formal
2639 /// arguments specified for the macro invocation. Returns null on error.
2640 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2641 SourceLocation &MacroEnd);
2642
2643 /// If an identifier token is read that is to be expanded
2644 /// as a builtin macro, handle it and return the next token as 'Tok'.
2645 void ExpandBuiltinMacro(Token &Tok);
2646
2647 /// Read a \c _Pragma directive, slice it up, process it, then
2648 /// return the first token after the directive.
2649 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2650 void Handle_Pragma(Token &Tok);
2651
2652 /// Like Handle_Pragma except the pragma text is not enclosed within
2653 /// a string literal.
2654 void HandleMicrosoft__pragma(Token &Tok);
2655
2656 /// Add a lexer to the top of the include stack and
2657 /// start lexing tokens from it instead of the current buffer.
2658 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2659
2660 /// Set the FileID for the preprocessor predefines.
2661 void setPredefinesFileID(FileID FID) {
2662 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2663 PredefinesFileID = FID;
2664 }
2665
2666 /// Set the FileID for the PCH through header.
2667 void setPCHThroughHeaderFileID(FileID FID);
2668
2669 /// Returns true if we are lexing from a file and not a
2670 /// pragma or a macro.
2671 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2672 return L ? !L->isPragmaLexer() : P != nullptr;
2673 }
2674
2675 static bool IsFileLexer(const IncludeStackInfo& I) {
2676 return IsFileLexer(L: I.TheLexer.get(), P: I.ThePPLexer);
2677 }
2678
2679 bool IsFileLexer() const {
2680 return IsFileLexer(L: CurLexer.get(), P: CurPPLexer);
2681 }
2682
2683 //===--------------------------------------------------------------------===//
2684 // Caching stuff.
2685 void CachingLex(Token &Result);
2686
2687 bool InCachingLexMode() const {
2688 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2689 // that we are past EOF, not that we are in CachingLex mode.
2690 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2691 }
2692
2693 void EnterCachingLexMode();
2694 void EnterCachingLexModeUnchecked();
2695
2696 void ExitCachingLexMode() {
2697 if (InCachingLexMode())
2698 RemoveTopOfLexerStack();
2699 }
2700
2701 const Token &PeekAhead(unsigned N);
2702 void AnnotatePreviousCachedTokens(const Token &Tok);
2703
2704 //===--------------------------------------------------------------------===//
2705 /// Handle*Directive - implement the various preprocessor directives. These
2706 /// should side-effect the current preprocessor object so that the next call
2707 /// to Lex() will return the appropriate token next.
2708 void HandleLineDirective();
2709 void HandleDigitDirective(Token &Tok);
2710 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2711 void HandleIdentSCCSDirective(Token &Tok);
2712 void HandleMacroPublicDirective(Token &Tok);
2713 void HandleMacroPrivateDirective();
2714
2715 /// An additional notification that can be produced by a header inclusion or
2716 /// import to tell the parser what happened.
2717 struct ImportAction {
2718 enum ActionKind {
2719 None,
2720 ModuleBegin,
2721 ModuleImport,
2722 HeaderUnitImport,
2723 SkippedModuleImport,
2724 Failure,
2725 } Kind;
2726 Module *ModuleForHeader = nullptr;
2727
2728 ImportAction(ActionKind AK, Module *Mod = nullptr)
2729 : Kind(AK), ModuleForHeader(Mod) {
2730 assert((AK == None || Mod || AK == Failure) &&
2731 "no module for module action");
2732 }
2733 };
2734
2735 OptionalFileEntryRef LookupHeaderIncludeOrImport(
2736 ConstSearchDirIterator *CurDir, StringRef &Filename,
2737 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2738 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2739 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2740 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2741 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2742 ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2743 // Binary data inclusion
2744 void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
2745 const FileEntry *LookupFromFile = nullptr);
2746 void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2747 const LexEmbedParametersResult &Params,
2748 StringRef BinaryContents);
2749
2750 // File inclusion.
2751 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2752 ConstSearchDirIterator LookupFrom = nullptr,
2753 const FileEntry *LookupFromFile = nullptr);
2754 ImportAction
2755 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2756 Token &FilenameTok, SourceLocation EndLoc,
2757 ConstSearchDirIterator LookupFrom = nullptr,
2758 const FileEntry *LookupFromFile = nullptr);
2759 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2760 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2761 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2762 void HandleMicrosoftImportDirective(Token &Tok);
2763
2764public:
2765 /// Check that the given module is available, producing a diagnostic if not.
2766 /// \return \c true if the check failed (because the module is not available).
2767 /// \c false if the module appears to be usable.
2768 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2769 const TargetInfo &TargetInfo,
2770 const Module &M, DiagnosticsEngine &Diags);
2771
2772 // Module inclusion testing.
2773 /// Find the module that owns the source or header file that
2774 /// \p Loc points to. If the location is in a file that was included
2775 /// into a module, or is outside any module, returns nullptr.
2776 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2777
2778 /// We want to produce a diagnostic at location IncLoc concerning an
2779 /// unreachable effect at location MLoc (eg, where a desired entity was
2780 /// declared or defined). Determine whether the right way to make MLoc
2781 /// reachable is by #include, and if so, what header should be included.
2782 ///
2783 /// This is not necessarily fast, and might load unexpected module maps, so
2784 /// should only be called by code that intends to produce an error.
2785 ///
2786 /// \param IncLoc The location at which the missing effect was detected.
2787 /// \param MLoc A location within an unimported module at which the desired
2788 /// effect occurred.
2789 /// \return A file that can be #included to provide the desired effect. Null
2790 /// if no such file could be determined or if a #include is not
2791 /// appropriate (eg, if a module should be imported instead).
2792 OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2793 SourceLocation MLoc);
2794
2795 bool isRecordingPreamble() const {
2796 return PreambleConditionalStack.isRecording();
2797 }
2798
2799 bool hasRecordedPreamble() const {
2800 return PreambleConditionalStack.hasRecordedPreamble();
2801 }
2802
2803 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2804 return PreambleConditionalStack.getStack();
2805 }
2806
2807 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2808 PreambleConditionalStack.setStack(s);
2809 }
2810
2811 void setReplayablePreambleConditionalStack(
2812 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2813 PreambleConditionalStack.startReplaying();
2814 PreambleConditionalStack.setStack(s);
2815 PreambleConditionalStack.SkipInfo = SkipInfo;
2816 }
2817
2818 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2819 return PreambleConditionalStack.SkipInfo;
2820 }
2821
2822private:
2823 /// After processing predefined file, initialize the conditional stack from
2824 /// the preamble.
2825 void replayPreambleConditionalStack();
2826
2827 // Macro handling.
2828 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2829 void HandleUndefDirective();
2830
2831 // Conditional Inclusion.
2832 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2833 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2834 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2835 bool ReadAnyTokensBeforeDirective);
2836 void HandleEndifDirective(Token &EndifToken);
2837 void HandleElseDirective(Token &Result, const Token &HashToken);
2838 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2839 tok::PPKeywordKind Kind);
2840
2841 // Pragmas.
2842 void HandlePragmaDirective(PragmaIntroducer Introducer);
2843
2844public:
2845 void HandlePragmaOnce(Token &OnceTok);
2846 void HandlePragmaMark(Token &MarkTok);
2847 void HandlePragmaPoison();
2848 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2849 void HandlePragmaDependency(Token &DependencyTok);
2850 void HandlePragmaPushMacro(Token &Tok);
2851 void HandlePragmaPopMacro(Token &Tok);
2852 void HandlePragmaIncludeAlias(Token &Tok);
2853 void HandlePragmaModuleBuild(Token &Tok);
2854 void HandlePragmaHdrstop(Token &Tok);
2855 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2856
2857 // Return true and store the first token only if any CommentHandler
2858 // has inserted some tokens and getCommentRetentionState() is false.
2859 bool HandleComment(Token &result, SourceRange Comment);
2860
2861 /// A macro is used, update information about macros that need unused
2862 /// warnings.
2863 void markMacroAsUsed(MacroInfo *MI);
2864
2865 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2866 SourceLocation AnnotationLoc) {
2867 auto Annotations = AnnotationInfos.find(Val: II);
2868 if (Annotations == AnnotationInfos.end())
2869 AnnotationInfos.insert(KV: std::make_pair(
2870 x&: II,
2871 y: MacroAnnotations::makeDeprecation(Loc: AnnotationLoc, Msg: std::move(Msg))));
2872 else
2873 Annotations->second.DeprecationInfo =
2874 MacroAnnotationInfo{.Location: AnnotationLoc, .Message: std::move(Msg)};
2875 }
2876
2877 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2878 SourceLocation AnnotationLoc) {
2879 auto Annotations = AnnotationInfos.find(Val: II);
2880 if (Annotations == AnnotationInfos.end())
2881 AnnotationInfos.insert(
2882 KV: std::make_pair(x&: II, y: MacroAnnotations::makeRestrictExpansion(
2883 Loc: AnnotationLoc, Msg: std::move(Msg))));
2884 else
2885 Annotations->second.RestrictExpansionInfo =
2886 MacroAnnotationInfo{.Location: AnnotationLoc, .Message: std::move(Msg)};
2887 }
2888
2889 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2890 auto Annotations = AnnotationInfos.find(Val: II);
2891 if (Annotations == AnnotationInfos.end())
2892 AnnotationInfos.insert(
2893 KV: std::make_pair(x&: II, y: MacroAnnotations::makeFinal(Loc: AnnotationLoc)));
2894 else
2895 Annotations->second.FinalAnnotationLoc = AnnotationLoc;
2896 }
2897
2898 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2899 return AnnotationInfos.find(Val: II)->second;
2900 }
2901
2902 void emitMacroExpansionWarnings(const Token &Identifier,
2903 bool IsIfnDef = false) const {
2904 IdentifierInfo *Info = Identifier.getIdentifierInfo();
2905 if (Info->isDeprecatedMacro())
2906 emitMacroDeprecationWarning(Identifier);
2907
2908 if (Info->isRestrictExpansion() &&
2909 !SourceMgr.isInMainFile(Loc: Identifier.getLocation()))
2910 emitRestrictExpansionWarning(Identifier);
2911
2912 if (!IsIfnDef) {
2913 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
2914 emitRestrictInfNaNWarning(Identifier, DiagSelection: 0);
2915 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
2916 emitRestrictInfNaNWarning(Identifier, DiagSelection: 1);
2917 }
2918 }
2919
2920 static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2921 const LangOptions &LangOpts,
2922 const TargetInfo &TI);
2923
2924 static void processPathToFileName(SmallVectorImpl<char> &FileName,
2925 const PresumedLoc &PLoc,
2926 const LangOptions &LangOpts,
2927 const TargetInfo &TI);
2928
2929private:
2930 void emitMacroDeprecationWarning(const Token &Identifier) const;
2931 void emitRestrictExpansionWarning(const Token &Identifier) const;
2932 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2933 void emitRestrictInfNaNWarning(const Token &Identifier,
2934 unsigned DiagSelection) const;
2935
2936 /// This boolean state keeps track if the current scanned token (by this PP)
2937 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
2938 /// translation unit in a linear order.
2939 bool InSafeBufferOptOutRegion = false;
2940
2941 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
2942 /// region if PP is currently in such a region. Hold undefined value
2943 /// otherwise.
2944 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
2945
2946 using SafeBufferOptOutRegionsTy =
2947 SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
2948 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
2949 // translation unit. Each region is represented by a pair of start and
2950 // end locations.
2951 SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
2952
2953 // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
2954 // following structure to manage them by their ASTs.
2955 struct {
2956 // A map from unique IDs to region maps of loaded ASTs. The ID identifies a
2957 // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
2958 llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
2959
2960 // Returns a reference to the safe buffer opt-out regions of the loaded
2961 // AST where `Loc` belongs to. (Construct if absent)
2962 SafeBufferOptOutRegionsTy &
2963 findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
2964 return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
2965 }
2966
2967 // Returns a reference to the safe buffer opt-out regions of the loaded
2968 // AST where `Loc` belongs to. (This const function returns nullptr if
2969 // absent.)
2970 const SafeBufferOptOutRegionsTy *
2971 lookupLoadedOptOutMap(SourceLocation Loc,
2972 const SourceManager &SrcMgr) const {
2973 FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
2974 auto Iter = LoadedRegions.find(Val: FID);
2975
2976 if (Iter == LoadedRegions.end())
2977 return nullptr;
2978 return &Iter->getSecond();
2979 }
2980 } LoadedSafeBufferOptOutMap;
2981
2982public:
2983 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
2984 /// region. This `Loc` must be a source location that has been pre-processed.
2985 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
2986
2987 /// Alter the state of whether this PP currently is in a
2988 /// "-Wunsafe-buffer-usage" opt-out region.
2989 ///
2990 /// \param isEnter true if this PP is entering a region; otherwise, this PP
2991 /// is exiting a region
2992 /// \param Loc the location of the entry or exit of a
2993 /// region
2994 /// \return true iff it is INVALID to enter or exit a region, i.e.,
2995 /// attempt to enter a region before exiting a previous region, or exiting a
2996 /// region that PP is not currently in.
2997 bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
2998 const SourceLocation &Loc);
2999
3000 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3001 /// opt-out region
3002 bool isPPInSafeBufferOptOutRegion();
3003
3004 /// \param StartLoc output argument. It will be set to the start location of
3005 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3006 /// returns true.
3007 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3008 /// opt-out region
3009 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3010
3011 /// \return a sequence of SourceLocations representing ordered opt-out regions
3012 /// specified by
3013 /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3014 SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3015
3016 /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3017 /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3018 /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3019 /// is same as itself before the call.
3020 bool setDeserializedSafeBufferOptOutMap(
3021 const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3022
3023private:
3024 /// Helper functions to forward lexing to the actual lexer. They all share the
3025 /// same signature.
3026 static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3027 return P.CurLexer->Lex(Result);
3028 }
3029 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3030 return P.CurTokenLexer->Lex(Tok&: Result);
3031 }
3032 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3033 P.CachingLex(Result);
3034 return true;
3035 }
3036 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3037 return P.CurLexer->LexDependencyDirectiveToken(Result);
3038 }
3039 static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
3040 return P.LexAfterModuleImport(Result);
3041 }
3042};
3043
3044/// Abstract base class that describes a handler that will receive
3045/// source ranges for each of the comments encountered in the source file.
3046class CommentHandler {
3047public:
3048 virtual ~CommentHandler();
3049
3050 // The handler shall return true if it has pushed any tokens
3051 // to be read using e.g. EnterToken or EnterTokenStream.
3052 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3053};
3054
3055/// Abstract base class that describes a handler that will receive
3056/// source ranges for empty lines encountered in the source file.
3057class EmptylineHandler {
3058public:
3059 virtual ~EmptylineHandler();
3060
3061 // The handler handles empty lines.
3062 virtual void HandleEmptyline(SourceRange Range) = 0;
3063};
3064
3065/// Helper class to shuttle information about #embed directives from the
3066/// preprocessor to the parser through an annotation token.
3067struct EmbedAnnotationData {
3068 StringRef BinaryData;
3069};
3070
3071/// Registry of pragma handlers added by plugins
3072using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3073
3074} // namespace clang
3075
3076#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
3077