1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/IdentifierTable.h"
31#include "clang/Basic/LLVM.h"
32#include "clang/Basic/LangOptions.h"
33#include "clang/Basic/Module.h"
34#include "clang/Basic/SourceLocation.h"
35#include "clang/Basic/SourceManager.h"
36#include "clang/Basic/TargetInfo.h"
37#include "clang/Lex/CodeCompletionHandler.h"
38#include "clang/Lex/DependencyDirectivesScanner.h"
39#include "clang/Lex/ExternalPreprocessorSource.h"
40#include "clang/Lex/HeaderSearch.h"
41#include "clang/Lex/LexDiagnostic.h"
42#include "clang/Lex/Lexer.h"
43#include "clang/Lex/LiteralSupport.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/ModuleLoader.h"
47#include "clang/Lex/NoTrivialPPDirectiveTracer.h"
48#include "clang/Lex/Pragma.h"
49#include "clang/Lex/PreprocessingRecord.h"
50#include "clang/Lex/PreprocessorLexer.h"
51#include "clang/Lex/PreprocessorOptions.h"
52#include "clang/Lex/ScratchBuffer.h"
53#include "clang/Lex/Token.h"
54#include "clang/Lex/TokenLexer.h"
55#include "llvm/ADT/APInt.h"
56#include "llvm/ADT/ArrayRef.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/ScopeExit.h"
60#include "llvm/ADT/SmallVector.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/MemoryBuffer.h"
65#include "llvm/Support/MemoryBufferRef.h"
66#include "llvm/Support/SaveAndRestore.h"
67#include "llvm/Support/raw_ostream.h"
68#include <algorithm>
69#include <cassert>
70#include <memory>
71#include <optional>
72#include <string>
73#include <utility>
74#include <vector>
75
76using namespace clang;
77
78/// Minimum distance between two check points, in tokens.
79static constexpr unsigned CheckPointStepSize = 1024;
80
81LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
82
83ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
84
85Preprocessor::Preprocessor(const PreprocessorOptions &PPOpts,
86 DiagnosticsEngine &diags, const LangOptions &opts,
87 SourceManager &SM, HeaderSearch &Headers,
88 ModuleLoader &TheModuleLoader,
89 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
90 TranslationUnitKind TUKind)
91 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
92 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
93 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
94 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
95 // As the language options may have not been loaded yet (when
96 // deserializing an ASTUnit), adding keywords to the identifier table is
97 // deferred to Preprocessor::Initialize().
98 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
99 TUKind(TUKind), SkipMainFilePreamble(0, true),
100 CurSubmoduleState(&NullSubmoduleState) {
101 OwnsHeaderSearch = OwnsHeaders;
102
103 // Default to discarding comments.
104 KeepComments = false;
105 KeepMacroComments = false;
106 SuppressIncludeNotFoundError = false;
107
108 // Macro expansion is enabled.
109 DisableMacroExpansion = false;
110 MacroExpansionInDirectivesOverride = false;
111 InMacroArgs = false;
112 ArgMacro = nullptr;
113 InMacroArgPreExpansion = false;
114 NumCachedTokenLexers = 0;
115 PragmasEnabled = true;
116 ParsingIfOrElifDirective = false;
117 PreprocessedOutput = false;
118
119 // We haven't read anything from the external source.
120 ReadMacrosFromExternalSource = false;
121
122 LastTokenWasExportKeyword.reset();
123
124 BuiltinInfo = std::make_unique<Builtin::Context>();
125
126 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
127 // a macro. They get unpoisoned where it is allowed.
128 (Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
129 SetPoisonReason(II: Ident__VA_ARGS__,DiagID: diag::ext_pp_bad_vaargs_use);
130 (Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
131 SetPoisonReason(II: Ident__VA_OPT__,DiagID: diag::ext_pp_bad_vaopt_use);
132
133 // Initialize the pragma handlers.
134 RegisterBuiltinPragmas();
135
136 // Initialize builtin macros like __LINE__ and friends.
137 RegisterBuiltinMacros();
138
139 if(LangOpts.Borland) {
140 Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
141 Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
142 Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
143 Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
144 Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
145 Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
146 Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
147 Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
148 Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
149 } else {
150 Ident__exception_info = Ident__exception_code = nullptr;
151 Ident__abnormal_termination = Ident___exception_info = nullptr;
152 Ident___exception_code = Ident___abnormal_termination = nullptr;
153 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
154 Ident_AbnormalTermination = nullptr;
155 }
156
157 // Default incremental processing to -fincremental-extensions, clients can
158 // override with `enableIncrementalProcessing` if desired.
159 IncrementalProcessing = LangOpts.IncrementalExtensions;
160
161 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
162 if (usingPCHWithPragmaHdrStop())
163 SkippingUntilPragmaHdrStop = true;
164
165 // If using a PCH with a through header, start skipping tokens.
166 if (!this->PPOpts.PCHThroughHeader.empty() &&
167 !this->PPOpts.ImplicitPCHInclude.empty())
168 SkippingUntilPCHThroughHeader = true;
169
170 if (this->PPOpts.GeneratePreamble)
171 PreambleConditionalStack.startRecording();
172
173 MaxTokens = LangOpts.MaxTokens;
174}
175
176Preprocessor::~Preprocessor() {
177 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
178
179 IncludeMacroStack.clear();
180
181 // Free any cached macro expanders.
182 // This populates MacroArgCache, so all TokenLexers need to be destroyed
183 // before the code below that frees up the MacroArgCache list.
184 std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
185 CurTokenLexer.reset();
186
187 // Free any cached MacroArgs.
188 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
189 ArgList = ArgList->deallocate();
190
191 // Delete the header search info, if we own it.
192 if (OwnsHeaderSearch)
193 delete &HeaderInfo;
194}
195
196void Preprocessor::Initialize(const TargetInfo &Target,
197 const TargetInfo *AuxTarget) {
198 assert((!this->Target || this->Target == &Target) &&
199 "Invalid override of target information");
200 this->Target = &Target;
201
202 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
203 "Invalid override of aux target information.");
204 this->AuxTarget = AuxTarget;
205
206 // Initialize information about built-ins.
207 BuiltinInfo->InitializeTarget(Target, AuxTarget);
208 HeaderInfo.setTarget(Target);
209
210 // Populate the identifier table with info about keywords for the current language.
211 Identifiers.AddKeywords(LangOpts);
212
213 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
214 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
215
216 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
217 // Use setting from TargetInfo.
218 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: Target.getFPEvalMethod());
219 else
220 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
221 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: getLangOpts().getFPEvalMethod());
222}
223
224void Preprocessor::InitializeForModelFile() {
225 NumEnteredSourceFiles = 0;
226
227 // Reset pragmas
228 PragmaHandlersBackup = std::move(PragmaHandlers);
229 PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
230 RegisterBuiltinPragmas();
231
232 // Reset PredefinesFileID
233 PredefinesFileID = FileID();
234}
235
236void Preprocessor::FinalizeForModelFile() {
237 NumEnteredSourceFiles = 1;
238
239 PragmaHandlers = std::move(PragmaHandlersBackup);
240}
241
242void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
243 llvm::errs() << tok::getTokenName(Kind: Tok.getKind());
244
245 if (!Tok.isAnnotation())
246 llvm::errs() << " '" << getSpelling(Tok) << "'";
247
248 if (!DumpFlags) return;
249
250 llvm::errs() << "\t";
251 if (Tok.isAtStartOfLine())
252 llvm::errs() << " [StartOfLine]";
253 if (Tok.hasLeadingSpace())
254 llvm::errs() << " [LeadingSpace]";
255 if (Tok.isExpandDisabled())
256 llvm::errs() << " [ExpandDisabled]";
257 if (Tok.needsCleaning()) {
258 const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
259 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
260 << "']";
261 }
262
263 llvm::errs() << "\tLoc=<";
264 DumpLocation(Loc: Tok.getLocation());
265 llvm::errs() << ">";
266}
267
268void Preprocessor::DumpLocation(SourceLocation Loc) const {
269 Loc.print(OS&: llvm::errs(), SM: SourceMgr);
270}
271
272void Preprocessor::DumpMacro(const MacroInfo &MI) const {
273 llvm::errs() << "MACRO: ";
274 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
275 DumpToken(Tok: MI.getReplacementToken(Tok: i));
276 llvm::errs() << " ";
277 }
278 llvm::errs() << "\n";
279}
280
281void Preprocessor::PrintStats() {
282 llvm::errs() << "\n*** Preprocessor Stats:\n";
283 llvm::errs() << NumDirectives << " directives found:\n";
284 llvm::errs() << " " << NumDefined << " #define.\n";
285 llvm::errs() << " " << NumUndefined << " #undef.\n";
286 llvm::errs() << " #include/#include_next/#import:\n";
287 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
288 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
289 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
290 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
291 llvm::errs() << " " << NumEndif << " #endif.\n";
292 llvm::errs() << " " << NumPragma << " #pragma.\n";
293 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
294
295 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
296 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
297 << NumFastMacroExpanded << " on the fast path.\n";
298 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
299 << " token paste (##) operations performed, "
300 << NumFastTokenPaste << " on the fast path.\n";
301
302 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
303
304 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
305 llvm::errs() << "\n Macro Expanded Tokens: "
306 << llvm::capacity_in_bytes(X: MacroExpandedTokens);
307 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
308 // FIXME: List information for all submodules.
309 llvm::errs() << "\n Macros: "
310 << llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
311 llvm::errs() << "\n #pragma push_macro Info: "
312 << llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
313 llvm::errs() << "\n Poison Reasons: "
314 << llvm::capacity_in_bytes(X: PoisonReasons);
315 llvm::errs() << "\n Comment Handlers: "
316 << llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
317}
318
319Preprocessor::macro_iterator
320Preprocessor::macro_begin(bool IncludeExternalMacros) const {
321 if (IncludeExternalMacros && ExternalSource &&
322 !ReadMacrosFromExternalSource) {
323 ReadMacrosFromExternalSource = true;
324 ExternalSource->ReadDefinedMacros();
325 }
326
327 // Make sure we cover all macros in visible modules.
328 for (const ModuleMacro &Macro : ModuleMacros)
329 CurSubmoduleState->Macros.try_emplace(Key: Macro.II);
330
331 return CurSubmoduleState->Macros.begin();
332}
333
334size_t Preprocessor::getTotalMemory() const {
335 return BP.getTotalMemory()
336 + llvm::capacity_in_bytes(X: MacroExpandedTokens)
337 + Predefines.capacity() /* Predefines buffer. */
338 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
339 // and ModuleMacros.
340 + llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
341 + llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
342 + llvm::capacity_in_bytes(X: PoisonReasons)
343 + llvm::capacity_in_bytes(x: CommentHandlers);
344}
345
346Preprocessor::macro_iterator
347Preprocessor::macro_end(bool IncludeExternalMacros) const {
348 if (IncludeExternalMacros && ExternalSource &&
349 !ReadMacrosFromExternalSource) {
350 ReadMacrosFromExternalSource = true;
351 ExternalSource->ReadDefinedMacros();
352 }
353
354 return CurSubmoduleState->Macros.end();
355}
356
357/// Compares macro tokens with a specified token value sequence.
358static bool MacroDefinitionEquals(const MacroInfo *MI,
359 ArrayRef<TokenValue> Tokens) {
360 return Tokens.size() == MI->getNumTokens() &&
361 std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
362}
363
364StringRef Preprocessor::getLastMacroWithSpelling(
365 SourceLocation Loc,
366 ArrayRef<TokenValue> Tokens) const {
367 SourceLocation BestLocation;
368 StringRef BestSpelling;
369 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
370 I != E; ++I) {
371 const MacroDirective::DefInfo
372 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
373 if (!Def || !Def.getMacroInfo())
374 continue;
375 if (!Def.getMacroInfo()->isObjectLike())
376 continue;
377 if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
378 continue;
379 SourceLocation Location = Def.getLocation();
380 // Choose the macro defined latest.
381 if (BestLocation.isInvalid() ||
382 (Location.isValid() &&
383 SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
384 BestLocation = Location;
385 BestSpelling = I->first->getName();
386 }
387 }
388 return BestSpelling;
389}
390
391void Preprocessor::recomputeCurLexerKind() {
392 if (CurLexer)
393 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
394 ? CLK_DependencyDirectivesLexer
395 : CLK_Lexer;
396 else if (CurTokenLexer)
397 CurLexerCallback = CLK_TokenLexer;
398 else
399 CurLexerCallback = CLK_CachingLexer;
400}
401
402bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
403 unsigned CompleteLine,
404 unsigned CompleteColumn) {
405 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
406 assert(!CodeCompletionFile && "Already set");
407
408 // Load the actual file's contents.
409 std::optional<llvm::MemoryBufferRef> Buffer =
410 SourceMgr.getMemoryBufferForFileOrNone(File);
411 if (!Buffer)
412 return true;
413
414 // Find the byte position of the truncation point.
415 const char *Position = Buffer->getBufferStart();
416 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
417 for (; *Position; ++Position) {
418 if (*Position != '\r' && *Position != '\n')
419 continue;
420
421 // Eat \r\n or \n\r as a single line.
422 if ((Position[1] == '\r' || Position[1] == '\n') &&
423 Position[0] != Position[1])
424 ++Position;
425 ++Position;
426 break;
427 }
428 }
429
430 Position += CompleteColumn - 1;
431
432 // If pointing inside the preamble, adjust the position at the beginning of
433 // the file after the preamble.
434 if (SkipMainFilePreamble.first &&
435 SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
436 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
437 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
438 }
439
440 if (Position > Buffer->getBufferEnd())
441 Position = Buffer->getBufferEnd();
442
443 CodeCompletionFile = File;
444 CodeCompletionOffset = Position - Buffer->getBufferStart();
445
446 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
447 Size: Buffer->getBufferSize() + 1, BufferName: Buffer->getBufferIdentifier());
448 char *NewBuf = NewBuffer->getBufferStart();
449 char *NewPos = std::copy(first: Buffer->getBufferStart(), last: Position, result: NewBuf);
450 *NewPos = '\0';
451 std::copy(first: Position, last: Buffer->getBufferEnd(), result: NewPos+1);
452 SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
453
454 return false;
455}
456
457void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
458 bool IsAngled) {
459 setCodeCompletionReached();
460 if (CodeComplete)
461 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
462}
463
464void Preprocessor::CodeCompleteNaturalLanguage() {
465 setCodeCompletionReached();
466 if (CodeComplete)
467 CodeComplete->CodeCompleteNaturalLanguage();
468}
469
470/// getSpelling - This method is used to get the spelling of a token into a
471/// SmallVector. Note that the returned StringRef may not point to the
472/// supplied buffer if a copy can be avoided.
473StringRef Preprocessor::getSpelling(const Token &Tok,
474 SmallVectorImpl<char> &Buffer,
475 bool *Invalid) const {
476 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
477 if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
478 // Try the fast path.
479 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
480 return II->getName();
481 }
482
483 // Resize the buffer if we need to copy into it.
484 if (Tok.needsCleaning())
485 Buffer.resize(N: Tok.getLength());
486
487 const char *Ptr = Buffer.data();
488 unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
489 return StringRef(Ptr, Len);
490}
491
492/// CreateString - Plop the specified string into a scratch buffer and return a
493/// location for it. If specified, the source location provides a source
494/// location for the token.
495void Preprocessor::CreateString(StringRef Str, Token &Tok,
496 SourceLocation ExpansionLocStart,
497 SourceLocation ExpansionLocEnd) {
498 Tok.setLength(Str.size());
499
500 const char *DestPtr;
501 SourceLocation Loc = ScratchBuf->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
502
503 if (ExpansionLocStart.isValid())
504 Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
505 ExpansionLocEnd, Length: Str.size());
506 Tok.setLocation(Loc);
507
508 // If this is a raw identifier or a literal token, set the pointer data.
509 if (Tok.is(K: tok::raw_identifier))
510 Tok.setRawIdentifierData(DestPtr);
511 else if (Tok.isLiteral())
512 Tok.setLiteralData(DestPtr);
513}
514
515SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
516 auto &SM = getSourceManager();
517 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
518 FileIDAndOffset LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
519 bool Invalid = false;
520 StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
521 if (Invalid)
522 return SourceLocation();
523
524 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
525 const char *DestPtr;
526 SourceLocation Spelling =
527 ScratchBuf->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
528 return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
529}
530
531Module *Preprocessor::getCurrentModule() {
532 if (!getLangOpts().isCompilingModule())
533 return nullptr;
534
535 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
536}
537
538Module *Preprocessor::getCurrentModuleImplementation() {
539 if (!getLangOpts().isCompilingModuleImplementation())
540 return nullptr;
541
542 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
543}
544
545//===----------------------------------------------------------------------===//
546// Preprocessor Initialization Methods
547//===----------------------------------------------------------------------===//
548
549/// EnterMainSourceFile - Enter the specified FileID as the main source file,
550/// which implicitly adds the builtin defines etc.
551void Preprocessor::EnterMainSourceFile() {
552 // We do not allow the preprocessor to reenter the main file. Doing so will
553 // cause FileID's to accumulate information from both runs (e.g. #line
554 // information) and predefined macros aren't guaranteed to be set properly.
555 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
556 FileID MainFileID = SourceMgr.getMainFileID();
557
558 // If MainFileID is loaded it means we loaded an AST file, no need to enter
559 // a main file.
560 if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
561 // Enter the main file source buffer.
562 EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation());
563
564 // If we've been asked to skip bytes in the main file (e.g., as part of a
565 // precompiled preamble), do so now.
566 if (SkipMainFilePreamble.first > 0)
567 CurLexer->SetByteOffset(Offset: SkipMainFilePreamble.first,
568 StartOfLine: SkipMainFilePreamble.second);
569
570 // Tell the header info that the main file was entered. If the file is later
571 // #imported, it won't be re-entered.
572 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
573 markIncluded(File: *FE);
574
575 // Record the first PP token in the main file. This is used to generate
576 // better diagnostics for C++ modules.
577 //
578 // // This is a comment.
579 // #define FOO int // note: add 'module;' to the start of the file
580 // ^ FirstPPToken // to introduce a global module fragment.
581 //
582 // export module M; // error: module declaration must occur
583 // // at the start of the translation unit.
584 if (getLangOpts().CPlusPlusModules) {
585 std::optional<StringRef> Input =
586 getSourceManager().getBufferDataOrNone(FID: MainFileID);
587 if (!isPreprocessedModuleFile() && Input)
588 MainFileIsPreprocessedModuleFile =
589 clang::isPreprocessedModuleFile(Source: *Input);
590 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(args&: *this);
591 DirTracer = Tracer.get();
592 addPPCallbacks(C: std::move(Tracer));
593 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
594 if (FirstPPTok)
595 FirstPPTokenLoc = FirstPPTok->getLocation();
596 }
597 }
598
599 // Preprocess Predefines to populate the initial preprocessor state.
600 std::unique_ptr<llvm::MemoryBuffer> SB =
601 llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
602 assert(SB && "Cannot create predefined source buffer");
603 FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
604 assert(FID.isValid() && "Could not create FileID for predefines?");
605 setPredefinesFileID(FID);
606
607 // Start parsing the predefines.
608 EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation());
609
610 if (!PPOpts.PCHThroughHeader.empty()) {
611 // Lookup and save the FileID for the through header. If it isn't found
612 // in the search path, it's a fatal error.
613 OptionalFileEntryRef File = LookupFile(
614 FilenameLoc: SourceLocation(), Filename: PPOpts.PCHThroughHeader,
615 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
616 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
617 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
618 /*IsFrameworkFound=*/nullptr);
619 if (!File) {
620 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_found)
621 << PPOpts.PCHThroughHeader;
622 return;
623 }
624 setPCHThroughHeaderFileID(
625 SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation(), FileCharacter: SrcMgr::C_User));
626 }
627
628 // Skip tokens from the Predefines and if needed the main file.
629 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
630 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
631 SkipTokensWhileUsingPCH();
632}
633
634void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
635 assert(PCHThroughHeaderFileID.isInvalid() &&
636 "PCHThroughHeaderFileID already set!");
637 PCHThroughHeaderFileID = FID;
638}
639
640bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
641 assert(PCHThroughHeaderFileID.isValid() &&
642 "Invalid PCH through header FileID");
643 return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
644}
645
646bool Preprocessor::creatingPCHWithThroughHeader() {
647 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
648 PCHThroughHeaderFileID.isValid();
649}
650
651bool Preprocessor::usingPCHWithThroughHeader() {
652 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
653 PCHThroughHeaderFileID.isValid();
654}
655
656bool Preprocessor::creatingPCHWithPragmaHdrStop() {
657 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
658}
659
660bool Preprocessor::usingPCHWithPragmaHdrStop() {
661 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
662}
663
664/// Skip tokens until after the #include of the through header or
665/// until after a #pragma hdrstop is seen. Tokens in the predefines file
666/// and the main file may be skipped. If the end of the predefines file
667/// is reached, skipping continues into the main file. If the end of the
668/// main file is reached, it's a fatal error.
669void Preprocessor::SkipTokensWhileUsingPCH() {
670 bool ReachedMainFileEOF = false;
671 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
672 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
673 Token Tok;
674 while (true) {
675 bool InPredefines =
676 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
677 CurLexerCallback(*this, Tok);
678 if (Tok.is(K: tok::eof) && !InPredefines) {
679 ReachedMainFileEOF = true;
680 break;
681 }
682 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
683 break;
684 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
685 break;
686 }
687 if (ReachedMainFileEOF) {
688 if (UsingPCHThroughHeader)
689 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_seen)
690 << PPOpts.PCHThroughHeader << 1;
691 else if (!PPOpts.PCHWithHdrStopCreate)
692 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_pragma_hdrstop_not_seen);
693 }
694}
695
696void Preprocessor::replayPreambleConditionalStack() {
697 // Restore the conditional stack from the preamble, if there is one.
698 if (PreambleConditionalStack.isReplaying()) {
699 assert(CurPPLexer &&
700 "CurPPLexer is null when calling replayPreambleConditionalStack.");
701 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
702 PreambleConditionalStack.doneReplaying();
703 if (PreambleConditionalStack.reachedEOFWhileSkipping())
704 SkipExcludedConditionalBlock(
705 HashTokenLoc: PreambleConditionalStack.SkipInfo->HashTokenLoc,
706 IfTokenLoc: PreambleConditionalStack.SkipInfo->IfTokenLoc,
707 FoundNonSkipPortion: PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
708 FoundElse: PreambleConditionalStack.SkipInfo->FoundElse,
709 ElseLoc: PreambleConditionalStack.SkipInfo->ElseLoc);
710 }
711}
712
713void Preprocessor::EndSourceFile() {
714 // Notify the client that we reached the end of the source file.
715 if (Callbacks)
716 Callbacks->EndOfMainFile();
717}
718
719//===----------------------------------------------------------------------===//
720// Lexer Event Handling.
721//===----------------------------------------------------------------------===//
722
723/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
724/// identifier information for the token and install it into the token,
725/// updating the token kind accordingly.
726IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
727 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
728
729 // Look up this token, see if it is a macro, or if it is a language keyword.
730 IdentifierInfo *II;
731 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
732 // No cleaning needed, just use the characters from the lexed buffer.
733 II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
734 } else {
735 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
736 SmallString<64> IdentifierBuffer;
737 StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
738
739 if (Identifier.hasUCN()) {
740 SmallString<64> UCNIdentifierBuffer;
741 expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
742 II = getIdentifierInfo(Name: UCNIdentifierBuffer);
743 } else {
744 II = getIdentifierInfo(Name: CleanedStr);
745 }
746 }
747
748 // Update the token info (identifier info and appropriate token kind).
749 // FIXME: the raw_identifier may contain leading whitespace which is removed
750 // from the cleaned identifier token. The SourceLocation should be updated to
751 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
752 // line continuation before 'B') is parsed as a single tok::raw_identifier and
753 // is cleaned to tok::identifier "B". After cleaning the token's length is
754 // still 3 and the SourceLocation refers to the location of the backslash.
755 Identifier.setIdentifierInfo(II);
756 Identifier.setKind(II->getTokenID());
757
758 return II;
759}
760
761void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
762 PoisonReasons[II] = DiagID;
763}
764
765void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
766 assert(Ident__exception_code && Ident__exception_info);
767 assert(Ident___exception_code && Ident___exception_info);
768 Ident__exception_code->setIsPoisoned(Poison);
769 Ident___exception_code->setIsPoisoned(Poison);
770 Ident_GetExceptionCode->setIsPoisoned(Poison);
771 Ident__exception_info->setIsPoisoned(Poison);
772 Ident___exception_info->setIsPoisoned(Poison);
773 Ident_GetExceptionInfo->setIsPoisoned(Poison);
774 Ident__abnormal_termination->setIsPoisoned(Poison);
775 Ident___abnormal_termination->setIsPoisoned(Poison);
776 Ident_AbnormalTermination->setIsPoisoned(Poison);
777}
778
779void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
780 assert(Identifier.getIdentifierInfo() &&
781 "Can't handle identifiers without identifier info!");
782 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
783 PoisonReasons.find(Val: Identifier.getIdentifierInfo());
784 if(it == PoisonReasons.end())
785 Diag(Tok: Identifier, DiagID: diag::err_pp_used_poisoned_id);
786 else
787 Diag(Tok: Identifier,DiagID: it->second) << Identifier.getIdentifierInfo();
788}
789
790void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
791 assert(II.isOutOfDate() && "not out of date");
792 assert(getExternalSource() &&
793 "getExternalSource() should not return nullptr");
794 getExternalSource()->updateOutOfDateIdentifier(II);
795}
796
797/// HandleIdentifier - This callback is invoked when the lexer reads an
798/// identifier. This callback looks up the identifier in the map and/or
799/// potentially macro expands it or turns it into a named token (like 'for').
800///
801/// Note that callers of this method are guarded by checking the
802/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
803/// IdentifierInfo methods that compute these properties will need to change to
804/// match.
805bool Preprocessor::HandleIdentifier(Token &Identifier) {
806 assert(Identifier.getIdentifierInfo() &&
807 "Can't handle identifiers without identifier info!");
808
809 IdentifierInfo &II = *Identifier.getIdentifierInfo();
810
811 // If the information about this identifier is out of date, update it from
812 // the external source.
813 // We have to treat __VA_ARGS__ in a special way, since it gets
814 // serialized with isPoisoned = true, but our preprocessor may have
815 // unpoisoned it if we're defining a C99 macro.
816 if (II.isOutOfDate()) {
817 bool CurrentIsPoisoned = false;
818 const bool IsSpecialVariadicMacro =
819 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
820 if (IsSpecialVariadicMacro)
821 CurrentIsPoisoned = II.isPoisoned();
822
823 updateOutOfDateIdentifier(II);
824 Identifier.setKind(II.getTokenID());
825
826 if (IsSpecialVariadicMacro)
827 II.setIsPoisoned(CurrentIsPoisoned);
828 }
829
830 // If this identifier was poisoned, and if it was not produced from a macro
831 // expansion, emit an error.
832 if (II.isPoisoned() && CurPPLexer) {
833 HandlePoisonedIdentifier(Identifier);
834 }
835
836 // If this is a macro to be expanded, do it.
837 if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
838 const auto *MI = MD.getMacroInfo();
839 assert(MI && "macro definition with no macro info?");
840 if (!DisableMacroExpansion) {
841 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
842 // C99 6.10.3p10: If the preprocessing token immediately after the
843 // macro name isn't a '(', this macro should not be expanded.
844 if (!MI->isFunctionLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
845 return HandleMacroExpandedIdentifier(Identifier, MD);
846 } else {
847 // C99 6.10.3.4p2 says that a disabled macro may never again be
848 // expanded, even if it's in a context where it could be expanded in the
849 // future.
850 Identifier.setFlag(Token::DisableExpand);
851 if (MI->isObjectLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
852 Diag(Tok: Identifier, DiagID: diag::pp_disabled_macro_expansion);
853 }
854 }
855 }
856
857 // If this identifier is a keyword in a newer Standard or proposed Standard,
858 // produce a warning. Don't warn if we're not considering macro expansion,
859 // since this identifier might be the name of a macro.
860 // FIXME: This warning is disabled in cases where it shouldn't be, like
861 // "#define constexpr constexpr", "int constexpr;"
862 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
863 Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
864 << II.getName();
865 // Don't diagnose this keyword again in this translation unit.
866 II.setIsFutureCompatKeyword(false);
867 }
868
869 // If this identifier would be a keyword in C++, diagnose as a compatibility
870 // issue.
871 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
872 Diag(Tok: Identifier, DiagID: diag::warn_pp_identifier_is_cpp_keyword) << &II;
873
874 // If this is an extension token, diagnose its use.
875 // We avoid diagnosing tokens that originate from macro definitions.
876 // FIXME: This warning is disabled in cases where it shouldn't be,
877 // like "#define TY typeof", "TY(1) x".
878 if (II.isExtensionToken() && !DisableMacroExpansion)
879 Diag(Tok: Identifier, DiagID: diag::ext_token_used);
880
881 // If this is the 'import' contextual keyword following an '@', note
882 // that the next token indicates a module name.
883 //
884 // Note that we do not treat 'import' as a contextual
885 // keyword when we're in a caching lexer, because caching lexers only get
886 // used in contexts where import declarations are disallowed.
887 //
888 // Likewise if this is the standard C++ import keyword.
889 if (((LastTokenWasAt && II.isImportKeyword()) ||
890 Identifier.is(K: tok::kw_import)) &&
891 !InMacroArgs &&
892 (!DisableMacroExpansion || MacroExpansionInDirectivesOverride) &&
893 CurLexerCallback != CLK_CachingLexer) {
894 ModuleImportLoc = Identifier.getLocation();
895 IsAtImport = true;
896 CurLexerCallback = CLK_LexAfterModuleImport;
897 }
898 return true;
899}
900
901void Preprocessor::Lex(Token &Result) {
902 ++LexLevel;
903
904 // We loop here until a lex function returns a token; this avoids recursion.
905 while (!CurLexerCallback(*this, Result))
906 ;
907
908 if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
909 return;
910
911 if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
912 // Remember the identifier before code completion token.
913 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
914 setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
915 // Set IdenfitierInfo to null to avoid confusing code that handles both
916 // identifiers and completion tokens.
917 Result.setIdentifierInfo(nullptr);
918 }
919
920 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
921 // if this token is being produced as a result of phase 4 of translation.
922 // Update TrackGMFState to decide if we are currently in a Global Module
923 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
924 // depends on the prevailing StdCXXImportSeq state in two cases.
925 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
926 !Result.getFlag(Flag: Token::IsReinjected)) {
927 switch (Result.getKind()) {
928 case tok::l_paren: case tok::l_square: case tok::l_brace:
929 StdCXXImportSeqState.handleOpenBracket();
930 break;
931 case tok::r_paren: case tok::r_square:
932 StdCXXImportSeqState.handleCloseBracket();
933 break;
934 case tok::r_brace:
935 StdCXXImportSeqState.handleCloseBrace();
936 break;
937#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
938// For `#pragma ...` mimic ';'.
939#include "clang/Basic/TokenKinds.def"
940#undef PRAGMA_ANNOTATION
941 // This token is injected to represent the translation of '#include "a.h"'
942 // into "import a.h;". Mimic the notional ';'.
943 case tok::annot_module_include:
944 case tok::annot_repl_input_end:
945 case tok::semi:
946 TrackGMFState.handleSemi();
947 StdCXXImportSeqState.handleSemi();
948 ModuleDeclState.handleSemi();
949 break;
950 case tok::header_name:
951 case tok::annot_header_unit:
952 StdCXXImportSeqState.handleHeaderName();
953 break;
954 case tok::kw_export:
955 if (hasSeenNoTrivialPPDirective())
956 Result.setFlag(Token::HasSeenNoTrivialPPDirective);
957 TrackGMFState.handleExport();
958 StdCXXImportSeqState.handleExport();
959 ModuleDeclState.handleExport();
960 break;
961 case tok::colon:
962 ModuleDeclState.handleColon();
963 break;
964 case tok::kw_import:
965 if (StdCXXImportSeqState.atTopLevel()) {
966 TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
967 StdCXXImportSeqState.handleImport();
968 }
969 break;
970 case tok::kw_module:
971 if (StdCXXImportSeqState.atTopLevel()) {
972 if (hasSeenNoTrivialPPDirective())
973 Result.setFlag(Token::HasSeenNoTrivialPPDirective);
974 TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
975 ModuleDeclState.handleModule();
976 }
977 break;
978 case tok::annot_module_name:
979 ModuleDeclState.handleModuleName(
980 NameLoc: static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
981 if (ModuleDeclState.isModuleCandidate())
982 break;
983 [[fallthrough]];
984 default:
985 TrackGMFState.handleMisc();
986 StdCXXImportSeqState.handleMisc();
987 ModuleDeclState.handleMisc();
988 break;
989 }
990 }
991
992 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
993 CheckPoints[CurLexer->getFileID()].push_back(Elt: CurLexer->BufferPtr);
994 CheckPointCounter = 0;
995 }
996
997 LastTokenWasAt = Result.is(K: tok::at);
998 if (Result.isNot(K: tok::kw_export))
999 LastTokenWasExportKeyword.reset();
1000
1001 --LexLevel;
1002
1003 // Destroy any lexers that were deferred while we were in nested Lex calls.
1004 // This must happen after decrementing LexLevel but before any other
1005 // processing that might re-enter Lex.
1006 if (LexLevel == 0 && !PendingDestroyLexers.empty())
1007 PendingDestroyLexers.clear();
1008
1009 if ((LexLevel == 0 || PreprocessToken) &&
1010 !Result.getFlag(Flag: Token::IsReinjected)) {
1011 if (LexLevel == 0)
1012 ++TokenCount;
1013 if (OnToken)
1014 OnToken(Result);
1015 }
1016}
1017
1018void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1019 while (1) {
1020 Token Tok;
1021 Lex(Result&: Tok);
1022 if (Tok.isOneOf(Ks: tok::unknown, Ks: tok::eof, Ks: tok::eod,
1023 Ks: tok::annot_repl_input_end))
1024 break;
1025 if (Tokens != nullptr)
1026 Tokens->push_back(x: Tok);
1027 }
1028}
1029
1030/// Lex a header-name token (including one formed from header-name-tokens if
1031/// \p AllowMacroExpansion is \c true).
1032///
1033/// \param FilenameTok Filled in with the next token. On success, this will
1034/// be either a header_name token. On failure, it will be whatever other
1035/// token was found instead.
1036/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1037/// by macro expansion (concatenating tokens as necessary if the first
1038/// token is a '<').
1039/// \return \c true if we reached EOD or EOF while looking for a > token in
1040/// a concatenated header name and diagnosed it. \c false otherwise.
1041bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1042 // Lex using header-name tokenization rules if tokens are being lexed from
1043 // a file. Just grab a token normally if we're in a macro expansion.
1044 if (CurPPLexer)
1045 CurPPLexer->LexIncludeFilename(FilenameTok);
1046 else
1047 Lex(Result&: FilenameTok);
1048
1049 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1050 // case, glue the tokens together into an angle_string_literal token.
1051 SmallString<128> FilenameBuffer;
1052 if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1053 bool StartOfLine = FilenameTok.isAtStartOfLine();
1054 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1055 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1056
1057 SourceLocation Start = FilenameTok.getLocation();
1058 SourceLocation End;
1059 FilenameBuffer.push_back(Elt: '<');
1060
1061 // Consume tokens until we find a '>'.
1062 // FIXME: A header-name could be formed starting or ending with an
1063 // alternative token. It's not clear whether that's ill-formed in all
1064 // cases.
1065 while (FilenameTok.isNot(K: tok::greater)) {
1066 Lex(Result&: FilenameTok);
1067 if (FilenameTok.isOneOf(Ks: tok::eod, Ks: tok::eof)) {
1068 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_expected) << tok::greater;
1069 Diag(Loc: Start, DiagID: diag::note_matching) << tok::less;
1070 return true;
1071 }
1072
1073 End = FilenameTok.getLocation();
1074
1075 // FIXME: Provide code completion for #includes.
1076 if (FilenameTok.is(K: tok::code_completion)) {
1077 setCodeCompletionReached();
1078 Lex(Result&: FilenameTok);
1079 continue;
1080 }
1081
1082 // Append the spelling of this token to the buffer. If there was a space
1083 // before it, add it now.
1084 if (FilenameTok.hasLeadingSpace())
1085 FilenameBuffer.push_back(Elt: ' ');
1086
1087 // Get the spelling of the token, directly into FilenameBuffer if
1088 // possible.
1089 size_t PreAppendSize = FilenameBuffer.size();
1090 FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1091
1092 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1093 unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1094
1095 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1096 if (BufPtr != &FilenameBuffer[PreAppendSize])
1097 memcpy(dest: &FilenameBuffer[PreAppendSize], src: BufPtr, n: ActualLen);
1098
1099 // Resize FilenameBuffer to the correct size.
1100 if (FilenameTok.getLength() != ActualLen)
1101 FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1102 }
1103
1104 FilenameTok.startToken();
1105 FilenameTok.setKind(tok::header_name);
1106 FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1107 FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1108 FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1109 CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1110 } else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1111 // Convert a string-literal token of the form " h-char-sequence "
1112 // (produced by macro expansion) into a header-name token.
1113 //
1114 // The rules for header-names don't quite match the rules for
1115 // string-literals, but all the places where they differ result in
1116 // undefined behavior, so we can and do treat them the same.
1117 //
1118 // A string-literal with a prefix or suffix is not translated into a
1119 // header-name. This could theoretically be observable via the C++20
1120 // context-sensitive header-name formation rules.
1121 StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1122 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1123 FilenameTok.setKind(tok::header_name);
1124 }
1125
1126 return false;
1127}
1128
1129std::optional<Token> Preprocessor::peekNextPPToken() const {
1130 // Do some quick tests for rejection cases.
1131 std::optional<Token> Val;
1132 if (CurLexer)
1133 Val = CurLexer->peekNextPPToken();
1134 else
1135 Val = CurTokenLexer->peekNextPPToken();
1136
1137 if (!Val) {
1138 // We have run off the end. If it's a source file we don't
1139 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1140 // macro stack.
1141 if (CurPPLexer)
1142 return std::nullopt;
1143 for (const IncludeStackInfo &Entry : llvm::reverse(C: IncludeMacroStack)) {
1144 if (Entry.TheLexer)
1145 Val = Entry.TheLexer->peekNextPPToken();
1146 else
1147 Val = Entry.TheTokenLexer->peekNextPPToken();
1148
1149 if (Val)
1150 break;
1151
1152 // Ran off the end of a source file?
1153 if (Entry.ThePPLexer)
1154 return std::nullopt;
1155 }
1156 }
1157
1158 // Okay, we found the token and return. Otherwise we found the end of the
1159 // translation unit.
1160 return Val;
1161}
1162
1163// We represent the primary and partition names as 'Paths' which are sections
1164// of the hierarchical access path for a clang module. However for C++20
1165// the periods in a name are just another character, and we will need to
1166// flatten them into a string.
1167std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) {
1168 std::string Name;
1169 if (Path.empty())
1170 return Name;
1171
1172 for (auto &Piece : Path) {
1173 assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1174 if (!Name.empty())
1175 Name += ".";
1176 Name += Piece.getIdentifierInfo()->getName();
1177 }
1178 return Name;
1179}
1180
1181ModuleNameLoc *ModuleNameLoc::Create(Preprocessor &PP, ModuleIdPath Path) {
1182 assert(!Path.empty() && "expect at least one identifier in a module name");
1183 void *Mem = PP.getPreprocessorAllocator().Allocate(
1184 Size: totalSizeToAlloc<IdentifierLoc>(Counts: Path.size()), Alignment: alignof(ModuleNameLoc));
1185 return new (Mem) ModuleNameLoc(Path);
1186}
1187
1188bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc,
1189 SmallVectorImpl<Token> &Suffix,
1190 SmallVectorImpl<IdentifierLoc> &Path,
1191 bool AllowMacroExpansion,
1192 bool IsPartition) {
1193 auto ConsumeToken = [&]() {
1194 if (AllowMacroExpansion)
1195 Lex(Result&: Tok);
1196 else
1197 LexUnexpandedToken(Result&: Tok);
1198 Suffix.push_back(Elt: Tok);
1199 };
1200
1201 while (true) {
1202 if (Tok.isNot(K: tok::identifier)) {
1203 if (Tok.is(K: tok::code_completion)) {
1204 CurLexer->cutOffLexing();
1205 CodeComplete->CodeCompleteModuleImport(ImportLoc: UseLoc, Path);
1206 return true;
1207 }
1208
1209 Diag(Tok, DiagID: diag::err_pp_module_expected_ident) << Path.empty();
1210 return true;
1211 }
1212
1213 // [cpp.pre]/p2:
1214 // No identifier in the pp-module-name or pp-module-partition shall
1215 // currently be defined as an object-like macro.
1216 if (MacroInfo *MI = getMacroInfo(II: Tok.getIdentifierInfo());
1217 MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1218 !AllowMacroExpansion) {
1219 Diag(Tok, DiagID: diag::err_pp_module_name_is_macro)
1220 << IsPartition << Tok.getIdentifierInfo();
1221 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::note_macro_here)
1222 << Tok.getIdentifierInfo();
1223 }
1224
1225 // Record this part of the module path.
1226 Path.emplace_back(Args: Tok.getLocation(), Args: Tok.getIdentifierInfo());
1227 ConsumeToken();
1228
1229 if (Tok.isNot(K: tok::period))
1230 return false;
1231
1232 ConsumeToken();
1233 }
1234}
1235
1236/// [cpp.pre]/p2:
1237/// A preprocessing directive consists of a sequence of preprocessing tokens
1238/// that satisfies the following constraints: At the start of translation phase
1239/// 4, the first preprocessing token in the sequence, referred to as a
1240/// directive-introducing token, begins with the first character in the source
1241/// file (optionally after whitespace containing no new-line characters) or
1242/// follows whitespace containing at least one new-line character, and is:
1243/// - a # preprocessing token, or
1244/// - an import preprocessing token immediately followed on the same logical
1245/// source line by a header-name, <, identifier, or : preprocessing token, or
1246/// - a module preprocessing token immediately followed on the same logical
1247/// source line by an identifier, :, or ; preprocessing token, or
1248/// - an export preprocessing token immediately followed on the same logical
1249/// source line by one of the two preceding forms.
1250///
1251///
1252/// At the start of phase 4 an import or module token is treated as starting a
1253/// directive and are converted to their respective keywords iff:
1254/// - After skipping horizontal whitespace are
1255/// - at the start of a logical line, or
1256/// - preceded by an 'export' at the start of the logical line.
1257/// - Are followed by an identifier pp token (before macro expansion), or
1258/// - <, ", or : (but not ::) pp tokens for 'import', or
1259/// - ; for 'module'
1260/// Otherwise the token is treated as an identifier.
1261bool Preprocessor::HandleModuleContextualKeyword(
1262 Token &Result, bool TokAtPhysicalStartOfLine) {
1263 if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
1264 return false;
1265
1266 if (Result.is(K: tok::kw_export)) {
1267 LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine};
1268 return false;
1269 }
1270
1271 /// Trait 'module' and 'import' as a identifier when the main file is a
1272 /// preprocessed module file. We only allow '__preprocessed_module' and
1273 /// '__preprocessed_import' in this context.
1274 IdentifierInfo *II = Result.getIdentifierInfo();
1275 if (isPreprocessedModuleFile() &&
1276 (II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_import)) ||
1277 II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_module))))
1278 return false;
1279
1280 if (LastTokenWasExportKeyword.isValid()) {
1281 // The export keyword was not at the start of line, it's not a
1282 // directive-introducing token.
1283 if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine())
1284 return false;
1285 // [cpp.pre]/1.4
1286 // export // not a preprocessing directive
1287 // import foo; // preprocessing directive (ill-formed at phase7)
1288 if (TokAtPhysicalStartOfLine)
1289 return false;
1290 } else if (!TokAtPhysicalStartOfLine)
1291 return false;
1292
1293 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1294 CurPPLexer->ParsingPreprocessorDirective, true);
1295
1296 // The next token may be an angled string literal after import keyword.
1297 llvm::SaveAndRestore<bool> SavedParsingFilemame(
1298 CurPPLexer->ParsingFilename,
1299 Result.getIdentifierInfo()->isImportKeyword());
1300
1301 std::optional<Token> NextTok =
1302 CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken();
1303 if (!NextTok)
1304 return false;
1305
1306 if (NextTok->is(K: tok::raw_identifier))
1307 LookUpIdentifierInfo(Identifier&: *NextTok);
1308
1309 if (Result.getIdentifierInfo()->isImportKeyword()) {
1310 if (NextTok->isOneOf(Ks: tok::identifier, Ks: tok::less, Ks: tok::colon,
1311 Ks: tok::header_name)) {
1312 Result.setKind(tok::kw_import);
1313 ModuleImportLoc = Result.getLocation();
1314 IsAtImport = false;
1315 return true;
1316 }
1317 }
1318
1319 if (Result.getIdentifierInfo()->isModuleKeyword() &&
1320 NextTok->isOneOf(Ks: tok::identifier, Ks: tok::colon, Ks: tok::semi)) {
1321 Result.setKind(tok::kw_module);
1322 ModuleDeclLoc = Result.getLocation();
1323 return true;
1324 }
1325
1326 // Ok, it's an identifier.
1327 return false;
1328}
1329
1330bool Preprocessor::CollectPPImportSuffixAndEnterStream(
1331 SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1332 CollectPPImportSuffix(Toks);
1333 EnterModuleSuffixTokenStream(Toks);
1334 return false;
1335}
1336
1337/// Collect the tokens of a C++20 pp-import-suffix.
1338void Preprocessor::CollectPPImportSuffix(SmallVectorImpl<Token> &Toks,
1339 bool StopUntilEOD) {
1340 while (true) {
1341 Toks.emplace_back();
1342 Lex(Result&: Toks.back());
1343
1344 switch (Toks.back().getKind()) {
1345 case tok::semi:
1346 if (!StopUntilEOD)
1347 return;
1348 [[fallthrough]];
1349 case tok::eod:
1350 case tok::eof:
1351 return;
1352 default:
1353 break;
1354 }
1355 }
1356}
1357
1358// Allocate a holding buffer for a sequence of tokens and introduce it into
1359// the token stream.
1360void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef<Token> Toks) {
1361 if (Toks.empty())
1362 return;
1363 auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1364 std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1365 EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1366 /*DisableMacroExpansion*/ false, /*IsReinject*/ false);
1367 assert(CurTokenLexer && "Must have a TokenLexer");
1368 CurTokenLexer->setLexingCXXModuleDirective();
1369}
1370
1371/// Lex a token following the 'import' contextual keyword.
1372///
1373/// pp-import: [C++20]
1374/// import header-name pp-import-suffix[opt] ;
1375/// import header-name-tokens pp-import-suffix[opt] ;
1376/// [ObjC] @ import module-name ;
1377/// [Clang] import module-name ;
1378///
1379/// header-name-tokens:
1380/// string-literal
1381/// < [any sequence of preprocessing-tokens other than >] >
1382///
1383/// module-name:
1384/// module-name-qualifier[opt] identifier
1385///
1386/// module-name-qualifier
1387/// module-name-qualifier[opt] identifier .
1388///
1389/// We respond to a pp-import by importing macros from the named module.
1390bool Preprocessor::LexAfterModuleImport(Token &Result) {
1391 // Figure out what kind of lexer we actually have.
1392 recomputeCurLexerKind();
1393
1394 SmallVector<Token, 32> Suffix;
1395 SmallVector<IdentifierLoc, 3> Path;
1396 Lex(Result);
1397 if (LexModuleNameContinue(Tok&: Result, UseLoc: ModuleImportLoc, Suffix, Path))
1398 return CollectPPImportSuffixAndEnterStream(Toks&: Suffix);
1399
1400 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(PP&: *this, Path);
1401 Suffix.clear();
1402 Suffix.emplace_back();
1403 Suffix.back().setKind(tok::annot_module_name);
1404 Suffix.back().setAnnotationRange(NameLoc->getRange());
1405 Suffix.back().setAnnotationValue(static_cast<void *>(NameLoc));
1406 Suffix.push_back(Elt: Result);
1407
1408 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1409 // at the semicolon already.
1410 SourceLocation SemiLoc = Result.getLocation();
1411 if (Suffix.back().isNot(K: tok::semi)) {
1412 if (Suffix.back().isNot(K: tok::eof))
1413 CollectPPImportSuffix(Toks&: Suffix);
1414 if (Suffix.back().isNot(K: tok::semi)) {
1415 // This is not an import after all.
1416 EnterModuleSuffixTokenStream(Toks: Suffix);
1417 return false;
1418 }
1419 SemiLoc = Suffix.back().getLocation();
1420 }
1421
1422 Module *Imported = nullptr;
1423 if (getLangOpts().Modules) {
1424 Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc, Path, Visibility: Module::Hidden,
1425 /*IsInclusionDirective=*/false);
1426 if (Imported)
1427 makeModuleVisible(M: Imported, Loc: SemiLoc);
1428 }
1429
1430 if (Callbacks)
1431 Callbacks->moduleImport(ImportLoc: ModuleImportLoc, Path, Imported);
1432
1433 if (!Suffix.empty()) {
1434 EnterModuleSuffixTokenStream(Toks: Suffix);
1435 return false;
1436 }
1437 return true;
1438}
1439
1440void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc,
1441 bool IncludeExports) {
1442 CurSubmoduleState->VisibleModules.setVisible(
1443 M, Loc, IncludeExports, Vis: [](Module *) {},
1444 Cb: [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1445 // FIXME: Include the path in the diagnostic.
1446 // FIXME: Include the import location for the conflicting module.
1447 Diag(Loc: ModuleImportLoc, DiagID: diag::warn_module_conflict)
1448 << Path[0]->getFullModuleName()
1449 << Conflict->getFullModuleName()
1450 << Message;
1451 });
1452
1453 // Add this module to the imports list of the currently-built submodule.
1454 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1455 BuildingSubmoduleStack.back().M->Imports.insert(X: M);
1456}
1457
1458bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1459 const char *DiagnosticTag,
1460 bool AllowMacroExpansion) {
1461 // We need at least one string literal.
1462 if (Result.isNot(K: tok::string_literal)) {
1463 Diag(Tok: Result, DiagID: diag::err_expected_string_literal)
1464 << /*Source='in...'*/0 << DiagnosticTag;
1465 return false;
1466 }
1467
1468 // Lex string literal tokens, optionally with macro expansion.
1469 SmallVector<Token, 4> StrToks;
1470 do {
1471 StrToks.push_back(Elt: Result);
1472
1473 if (Result.hasUDSuffix())
1474 Diag(Tok: Result, DiagID: diag::err_invalid_string_udl);
1475
1476 if (AllowMacroExpansion)
1477 Lex(Result);
1478 else
1479 LexUnexpandedToken(Result);
1480 } while (Result.is(K: tok::string_literal));
1481
1482 // Concatenate and parse the strings.
1483 StringLiteralParser Literal(StrToks, *this);
1484 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1485
1486 if (Literal.hadError)
1487 return false;
1488
1489 if (Literal.Pascal) {
1490 Diag(Loc: StrToks[0].getLocation(), DiagID: diag::err_expected_string_literal)
1491 << /*Source='in...'*/0 << DiagnosticTag;
1492 return false;
1493 }
1494
1495 String = std::string(Literal.GetString());
1496 return true;
1497}
1498
1499bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1500 assert(Tok.is(tok::numeric_constant));
1501 SmallString<8> IntegerBuffer;
1502 bool NumberInvalid = false;
1503 StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1504 if (NumberInvalid)
1505 return false;
1506 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1507 getLangOpts(), getTargetInfo(),
1508 getDiagnostics());
1509 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1510 return false;
1511 llvm::APInt APVal(64, 0);
1512 if (Literal.GetIntegerValue(Val&: APVal))
1513 return false;
1514 Lex(Result&: Tok);
1515 Value = APVal.getLimitedValue();
1516 return true;
1517}
1518
1519void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1520 assert(Handler && "NULL comment handler");
1521 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1522 "Comment handler already registered");
1523 CommentHandlers.push_back(x: Handler);
1524}
1525
1526void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1527 std::vector<CommentHandler *>::iterator Pos =
1528 llvm::find(Range&: CommentHandlers, Val: Handler);
1529 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1530 CommentHandlers.erase(position: Pos);
1531}
1532
1533bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1534 bool AnyPendingTokens = false;
1535 for (CommentHandler *H : CommentHandlers) {
1536 if (H->HandleComment(PP&: *this, Comment))
1537 AnyPendingTokens = true;
1538 }
1539 if (!AnyPendingTokens || getCommentRetentionState())
1540 return false;
1541 Lex(Result&: result);
1542 return true;
1543}
1544
1545void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1546 const MacroAnnotations &A =
1547 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1548 assert(A.DeprecationInfo &&
1549 "Macro deprecation warning without recorded annotation!");
1550 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1551 if (Info.Message.empty())
1552 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1553 << Identifier.getIdentifierInfo() << 0;
1554 else
1555 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1556 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1557 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 0;
1558}
1559
1560void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1561 const MacroAnnotations &A =
1562 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1563 assert(A.RestrictExpansionInfo &&
1564 "Macro restricted expansion warning without recorded annotation!");
1565 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1566 if (Info.Message.empty())
1567 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1568 << Identifier.getIdentifierInfo() << 0;
1569 else
1570 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1571 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1572 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 1;
1573}
1574
1575void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1576 unsigned DiagSelection) const {
1577 Diag(Tok: Identifier, DiagID: diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1578}
1579
1580void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1581 bool IsUndef) const {
1582 const MacroAnnotations &A =
1583 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1584 assert(A.FinalAnnotationLoc &&
1585 "Final macro warning without recorded annotation!");
1586
1587 Diag(Tok: Identifier, DiagID: diag::warn_pragma_final_macro)
1588 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1589 Diag(Loc: *A.FinalAnnotationLoc, DiagID: diag::note_pp_macro_annotation) << 2;
1590}
1591
1592bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1593 const SourceLocation &Loc) const {
1594 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1595 // region map:
1596 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1597 const SourceLocation &Loc) -> bool {
1598 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1599 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1600 Range: Map, P: [&SourceMgr,
1601 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1602 return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1603 });
1604
1605 if (FirstRegionEndingAfterLoc != Map.end()) {
1606 // To test if the start location of the found region precedes `Loc`:
1607 return SourceMgr.isBeforeInTranslationUnit(
1608 LHS: FirstRegionEndingAfterLoc->first, RHS: Loc);
1609 }
1610 // If we do not find a region whose end location passes `Loc`, we want to
1611 // check if the current region is still open:
1612 if (!Map.empty() && Map.back().first == Map.back().second)
1613 return SourceMgr.isBeforeInTranslationUnit(LHS: Map.back().first, RHS: Loc);
1614 return false;
1615 };
1616
1617 // What the following does:
1618 //
1619 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1620 // Otherwise, `Loc` is from a loaded AST. We look up the
1621 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1622 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1623 // region w.r.t. the region map. If the region map is absent, it means there
1624 // is no opt-out pragma in that loaded AST.
1625 //
1626 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1627 // one of them. That means if you put the pragmas around a `#include
1628 // "module.h"`, where module.h is a module, it is not actually suppressing
1629 // warnings in module.h. This is fine because warnings in module.h will be
1630 // reported when module.h is compiled in isolation and nothing in module.h
1631 // will be analyzed ever again. So you will not see warnings from the file
1632 // that imports module.h anyway. And you can't even do the same thing for PCHs
1633 // because they can only be included from the command line.
1634
1635 if (SourceMgr.isLocalSourceLocation(Loc))
1636 return TestInMap(SafeBufferOptOutMap, Loc);
1637
1638 const SafeBufferOptOutRegionsTy *LoadedRegions =
1639 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SrcMgr: SourceMgr);
1640
1641 if (LoadedRegions)
1642 return TestInMap(*LoadedRegions, Loc);
1643 return false;
1644}
1645
1646bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1647 bool isEnter, const SourceLocation &Loc) {
1648 if (isEnter) {
1649 if (isPPInSafeBufferOptOutRegion())
1650 return true; // invalid enter action
1651 InSafeBufferOptOutRegion = true;
1652 CurrentSafeBufferOptOutStart = Loc;
1653
1654 // To set the start location of a new region:
1655
1656 if (!SafeBufferOptOutMap.empty()) {
1657 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1658 assert(PrevRegion->first != PrevRegion->second &&
1659 "Shall not begin a safe buffer opt-out region before closing the "
1660 "previous one.");
1661 }
1662 // If the start location equals to the end location, we call the region a
1663 // open region or a unclosed region (i.e., end location has not been set
1664 // yet).
1665 SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1666 } else {
1667 if (!isPPInSafeBufferOptOutRegion())
1668 return true; // invalid enter action
1669 InSafeBufferOptOutRegion = false;
1670
1671 // To set the end location of the current open region:
1672
1673 assert(!SafeBufferOptOutMap.empty() &&
1674 "Misordered safe buffer opt-out regions");
1675 auto *CurrRegion = &SafeBufferOptOutMap.back();
1676 assert(CurrRegion->first == CurrRegion->second &&
1677 "Set end location to a closed safe buffer opt-out region");
1678 CurrRegion->second = Loc;
1679 }
1680 return false;
1681}
1682
1683bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1684 return InSafeBufferOptOutRegion;
1685}
1686bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1687 StartLoc = CurrentSafeBufferOptOutStart;
1688 return InSafeBufferOptOutRegion;
1689}
1690
1691SmallVector<SourceLocation, 64>
1692Preprocessor::serializeSafeBufferOptOutMap() const {
1693 assert(!InSafeBufferOptOutRegion &&
1694 "Attempt to serialize safe buffer opt-out regions before file being "
1695 "completely preprocessed");
1696
1697 SmallVector<SourceLocation, 64> SrcSeq;
1698
1699 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1700 SrcSeq.push_back(Elt: begin);
1701 SrcSeq.push_back(Elt: end);
1702 }
1703 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1704 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1705 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1706 // It means that for each loading pch/module m, it just needs to load m's own
1707 // `SafeBufferOptOutMap`.
1708 return SrcSeq;
1709}
1710
1711bool Preprocessor::setDeserializedSafeBufferOptOutMap(
1712 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1713 if (SourceLocations.size() == 0)
1714 return false;
1715
1716 assert(SourceLocations.size() % 2 == 0 &&
1717 "ill-formed SourceLocation sequence");
1718
1719 auto It = SourceLocations.begin();
1720 SafeBufferOptOutRegionsTy &Regions =
1721 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(Loc: *It, SrcMgr&: SourceMgr);
1722
1723 do {
1724 SourceLocation Begin = *It++;
1725 SourceLocation End = *It++;
1726
1727 Regions.emplace_back(Args&: Begin, Args&: End);
1728 } while (It != SourceLocations.end());
1729 return true;
1730}
1731
1732ModuleLoader::~ModuleLoader() = default;
1733
1734CommentHandler::~CommentHandler() = default;
1735
1736EmptylineHandler::~EmptylineHandler() = default;
1737
1738CodeCompletionHandler::~CodeCompletionHandler() = default;
1739
1740void Preprocessor::createPreprocessingRecord() {
1741 if (Record)
1742 return;
1743
1744 Record = new PreprocessingRecord(getSourceManager());
1745 addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1746}
1747
1748const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1749 if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1750 const SmallVector<const char *> &FileCheckPoints = It->second;
1751 const char *Last = nullptr;
1752 // FIXME: Do better than a linear search.
1753 for (const char *P : FileCheckPoints) {
1754 if (P > Start)
1755 break;
1756 Last = P;
1757 }
1758 return Last;
1759 }
1760
1761 return nullptr;
1762}
1763
1764bool Preprocessor::hasSeenNoTrivialPPDirective() const {
1765 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1766}
1767
1768bool NoTrivialPPDirectiveTracer::hasSeenNoTrivialPPDirective() const {
1769 return SeenNoTrivialPPDirective;
1770}
1771
1772void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1773 if (InMainFile && !SeenNoTrivialPPDirective)
1774 SeenNoTrivialPPDirective = true;
1775}
1776
1777void NoTrivialPPDirectiveTracer::LexedFileChanged(
1778 FileID FID, LexedFileChangeReason Reason,
1779 SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1780 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1781}
1782
1783void NoTrivialPPDirectiveTracer::MacroExpands(const Token &MacroNameTok,
1784 const MacroDefinition &MD,
1785 SourceRange Range,
1786 const MacroArgs *Args) {
1787 // FIXME: Does only enable builtin macro expansion make sense?
1788 if (!MD.getMacroInfo()->isBuiltinMacro())
1789 setSeenNoTrivialPPDirective();
1790}
1791