1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/IdentifierTable.h"
31#include "clang/Basic/LLVM.h"
32#include "clang/Basic/LangOptions.h"
33#include "clang/Basic/Module.h"
34#include "clang/Basic/SourceLocation.h"
35#include "clang/Basic/SourceManager.h"
36#include "clang/Basic/TargetInfo.h"
37#include "clang/Lex/CodeCompletionHandler.h"
38#include "clang/Lex/DependencyDirectivesScanner.h"
39#include "clang/Lex/ExternalPreprocessorSource.h"
40#include "clang/Lex/HeaderSearch.h"
41#include "clang/Lex/LexDiagnostic.h"
42#include "clang/Lex/Lexer.h"
43#include "clang/Lex/LiteralSupport.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/ModuleLoader.h"
47#include "clang/Lex/NoTrivialPPDirectiveTracer.h"
48#include "clang/Lex/Pragma.h"
49#include "clang/Lex/PreprocessingRecord.h"
50#include "clang/Lex/PreprocessorLexer.h"
51#include "clang/Lex/PreprocessorOptions.h"
52#include "clang/Lex/ScratchBuffer.h"
53#include "clang/Lex/Token.h"
54#include "clang/Lex/TokenLexer.h"
55#include "llvm/ADT/APInt.h"
56#include "llvm/ADT/ArrayRef.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/ScopeExit.h"
60#include "llvm/ADT/SmallVector.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/MemoryBuffer.h"
65#include "llvm/Support/MemoryBufferRef.h"
66#include "llvm/Support/SaveAndRestore.h"
67#include "llvm/Support/raw_ostream.h"
68#include <algorithm>
69#include <cassert>
70#include <memory>
71#include <optional>
72#include <string>
73#include <utility>
74#include <vector>
75
76using namespace clang;
77
78/// Minimum distance between two check points, in tokens.
79static constexpr unsigned CheckPointStepSize = 1024;
80
81LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
82
83ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
84
85Preprocessor::Preprocessor(const PreprocessorOptions &PPOpts,
86 DiagnosticsEngine &diags, const LangOptions &opts,
87 SourceManager &SM, HeaderSearch &Headers,
88 ModuleLoader &TheModuleLoader,
89 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
90 TranslationUnitKind TUKind)
91 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
92 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
93 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
94 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
95 // As the language options may have not been loaded yet (when
96 // deserializing an ASTUnit), adding keywords to the identifier table is
97 // deferred to Preprocessor::Initialize().
98 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
99 TUKind(TUKind), SkipMainFilePreamble(0, true),
100 CurSubmoduleState(&NullSubmoduleState) {
101 OwnsHeaderSearch = OwnsHeaders;
102
103 // Default to discarding comments.
104 KeepComments = false;
105 KeepMacroComments = false;
106 SuppressIncludeNotFoundError = false;
107
108 // Macro expansion is enabled.
109 DisableMacroExpansion = false;
110 MacroExpansionInDirectivesOverride = false;
111 InMacroArgs = false;
112 ArgMacro = nullptr;
113 InMacroArgPreExpansion = false;
114 NumCachedTokenLexers = 0;
115 PragmasEnabled = true;
116 ParsingIfOrElifDirective = false;
117 PreprocessedOutput = false;
118
119 // We haven't read anything from the external source.
120 ReadMacrosFromExternalSource = false;
121
122 LastExportKeyword.startToken();
123
124 BuiltinInfo = std::make_unique<Builtin::Context>();
125
126 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
127 // a macro. They get unpoisoned where it is allowed.
128 (Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
129 SetPoisonReason(II: Ident__VA_ARGS__,DiagID: diag::ext_pp_bad_vaargs_use);
130 (Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
131 SetPoisonReason(II: Ident__VA_OPT__,DiagID: diag::ext_pp_bad_vaopt_use);
132
133 // Initialize the pragma handlers.
134 RegisterBuiltinPragmas();
135
136 // Initialize builtin macros like __LINE__ and friends.
137 RegisterBuiltinMacros();
138
139 if(LangOpts.Borland) {
140 Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
141 Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
142 Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
143 Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
144 Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
145 Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
146 Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
147 Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
148 Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
149 } else {
150 Ident__exception_info = Ident__exception_code = nullptr;
151 Ident__abnormal_termination = Ident___exception_info = nullptr;
152 Ident___exception_code = Ident___abnormal_termination = nullptr;
153 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
154 Ident_AbnormalTermination = nullptr;
155 }
156
157 // Default incremental processing to -fincremental-extensions, clients can
158 // override with `enableIncrementalProcessing` if desired.
159 IncrementalProcessing = LangOpts.IncrementalExtensions;
160
161 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
162 if (usingPCHWithPragmaHdrStop())
163 SkippingUntilPragmaHdrStop = true;
164
165 // If using a PCH with a through header, start skipping tokens.
166 if (!this->PPOpts.PCHThroughHeader.empty() &&
167 !this->PPOpts.ImplicitPCHInclude.empty())
168 SkippingUntilPCHThroughHeader = true;
169
170 if (this->PPOpts.GeneratePreamble)
171 PreambleConditionalStack.startRecording();
172
173 MaxTokens = LangOpts.MaxTokens;
174}
175
176Preprocessor::~Preprocessor() {
177 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
178
179 IncludeMacroStack.clear();
180
181 // Free any cached macro expanders.
182 // This populates MacroArgCache, so all TokenLexers need to be destroyed
183 // before the code below that frees up the MacroArgCache list.
184 std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
185 CurTokenLexer.reset();
186
187 // Free any cached MacroArgs.
188 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
189 ArgList = ArgList->deallocate();
190
191 // Delete the header search info, if we own it.
192 if (OwnsHeaderSearch)
193 delete &HeaderInfo;
194}
195
196void Preprocessor::Initialize(const TargetInfo &Target,
197 const TargetInfo *AuxTarget) {
198 assert((!this->Target || this->Target == &Target) &&
199 "Invalid override of target information");
200 this->Target = &Target;
201
202 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
203 "Invalid override of aux target information.");
204 this->AuxTarget = AuxTarget;
205
206 // Initialize information about built-ins.
207 BuiltinInfo->InitializeTarget(Target, AuxTarget);
208 HeaderInfo.setTarget(Target);
209
210 // Populate the identifier table with info about keywords for the current language.
211 Identifiers.AddKeywords(LangOpts);
212
213 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
214 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
215
216 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
217 // Use setting from TargetInfo.
218 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: Target.getFPEvalMethod());
219 else
220 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
221 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: getLangOpts().getFPEvalMethod());
222}
223
224void Preprocessor::InitializeForModelFile() {
225 NumEnteredSourceFiles = 0;
226
227 // Reset pragmas
228 PragmaHandlersBackup = std::move(PragmaHandlers);
229 PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
230 RegisterBuiltinPragmas();
231
232 // Reset PredefinesFileID
233 PredefinesFileID = FileID();
234}
235
236void Preprocessor::FinalizeForModelFile() {
237 NumEnteredSourceFiles = 1;
238
239 PragmaHandlers = std::move(PragmaHandlersBackup);
240}
241
242void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
243 llvm::errs() << tok::getTokenName(Kind: Tok.getKind());
244
245 if (!Tok.isAnnotation())
246 llvm::errs() << " '" << getSpelling(Tok) << "'";
247
248 if (!DumpFlags) return;
249
250 llvm::errs() << "\t";
251 if (Tok.isAtStartOfLine())
252 llvm::errs() << " [StartOfLine]";
253 if (Tok.hasLeadingSpace())
254 llvm::errs() << " [LeadingSpace]";
255 if (Tok.isExpandDisabled())
256 llvm::errs() << " [ExpandDisabled]";
257 if (Tok.needsCleaning()) {
258 const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
259 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
260 << "']";
261 }
262
263 llvm::errs() << "\tLoc=<";
264 DumpLocation(Loc: Tok.getLocation());
265 llvm::errs() << ">";
266}
267
268void Preprocessor::DumpLocation(SourceLocation Loc) const {
269 Loc.print(OS&: llvm::errs(), SM: SourceMgr);
270}
271
272void Preprocessor::DumpMacro(const MacroInfo &MI) const {
273 llvm::errs() << "MACRO: ";
274 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
275 DumpToken(Tok: MI.getReplacementToken(Tok: i));
276 llvm::errs() << " ";
277 }
278 llvm::errs() << "\n";
279}
280
281void Preprocessor::PrintStats() {
282 llvm::errs() << "\n*** Preprocessor Stats:\n";
283 llvm::errs() << NumDirectives << " directives found:\n";
284 llvm::errs() << " " << NumDefined << " #define.\n";
285 llvm::errs() << " " << NumUndefined << " #undef.\n";
286 llvm::errs() << " #include/#include_next/#import:\n";
287 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
288 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
289 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
290 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
291 llvm::errs() << " " << NumEndif << " #endif.\n";
292 llvm::errs() << " " << NumPragma << " #pragma.\n";
293 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
294
295 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
296 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
297 << NumFastMacroExpanded << " on the fast path.\n";
298 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
299 << " token paste (##) operations performed, "
300 << NumFastTokenPaste << " on the fast path.\n";
301
302 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
303
304 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
305 llvm::errs() << "\n Macro Expanded Tokens: "
306 << llvm::capacity_in_bytes(X: MacroExpandedTokens);
307 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
308 // FIXME: List information for all submodules.
309 llvm::errs() << "\n Macros: "
310 << llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
311 llvm::errs() << "\n #pragma push_macro Info: "
312 << llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
313 llvm::errs() << "\n Poison Reasons: "
314 << llvm::capacity_in_bytes(X: PoisonReasons);
315 llvm::errs() << "\n Comment Handlers: "
316 << llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
317}
318
319Preprocessor::macro_iterator
320Preprocessor::macro_begin(bool IncludeExternalMacros) const {
321 if (IncludeExternalMacros && ExternalSource &&
322 !ReadMacrosFromExternalSource) {
323 ReadMacrosFromExternalSource = true;
324 ExternalSource->ReadDefinedMacros();
325 }
326
327 // Make sure we cover all macros in visible modules.
328 for (const ModuleMacro &Macro : ModuleMacros)
329 CurSubmoduleState->Macros.try_emplace(Key: Macro.II);
330
331 return CurSubmoduleState->Macros.begin();
332}
333
334size_t Preprocessor::getTotalMemory() const {
335 return BP.getTotalMemory()
336 + llvm::capacity_in_bytes(X: MacroExpandedTokens)
337 + Predefines.capacity() /* Predefines buffer. */
338 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
339 // and ModuleMacros.
340 + llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
341 + llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
342 + llvm::capacity_in_bytes(X: PoisonReasons)
343 + llvm::capacity_in_bytes(x: CommentHandlers);
344}
345
346Preprocessor::macro_iterator
347Preprocessor::macro_end(bool IncludeExternalMacros) const {
348 if (IncludeExternalMacros && ExternalSource &&
349 !ReadMacrosFromExternalSource) {
350 ReadMacrosFromExternalSource = true;
351 ExternalSource->ReadDefinedMacros();
352 }
353
354 return CurSubmoduleState->Macros.end();
355}
356
357/// Compares macro tokens with a specified token value sequence.
358static bool MacroDefinitionEquals(const MacroInfo *MI,
359 ArrayRef<TokenValue> Tokens) {
360 return Tokens.size() == MI->getNumTokens() &&
361 std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
362}
363
364StringRef Preprocessor::getLastMacroWithSpelling(
365 SourceLocation Loc,
366 ArrayRef<TokenValue> Tokens) const {
367 SourceLocation BestLocation;
368 StringRef BestSpelling;
369 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
370 I != E; ++I) {
371 const MacroDirective::DefInfo
372 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
373 if (!Def || !Def.getMacroInfo())
374 continue;
375 if (!Def.getMacroInfo()->isObjectLike())
376 continue;
377 if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
378 continue;
379 SourceLocation Location = Def.getLocation();
380 // Choose the macro defined latest.
381 if (BestLocation.isInvalid() ||
382 (Location.isValid() &&
383 SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
384 BestLocation = Location;
385 BestSpelling = I->first->getName();
386 }
387 }
388 return BestSpelling;
389}
390
391void Preprocessor::recomputeCurLexerKind() {
392 if (CurLexer)
393 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
394 ? CLK_DependencyDirectivesLexer
395 : CLK_Lexer;
396 else if (CurTokenLexer)
397 CurLexerCallback = CLK_TokenLexer;
398 else
399 CurLexerCallback = CLK_CachingLexer;
400}
401
402bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
403 unsigned CompleteLine,
404 unsigned CompleteColumn) {
405 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
406 assert(!CodeCompletionFile && "Already set");
407
408 // Load the actual file's contents.
409 std::optional<llvm::MemoryBufferRef> Buffer =
410 SourceMgr.getMemoryBufferForFileOrNone(File);
411 if (!Buffer)
412 return true;
413
414 // Find the byte position of the truncation point.
415 const char *Position = Buffer->getBufferStart();
416 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
417 for (; *Position; ++Position) {
418 if (*Position != '\r' && *Position != '\n')
419 continue;
420
421 // Eat \r\n or \n\r as a single line.
422 if ((Position[1] == '\r' || Position[1] == '\n') &&
423 Position[0] != Position[1])
424 ++Position;
425 ++Position;
426 break;
427 }
428 }
429
430 Position += CompleteColumn - 1;
431
432 // If pointing inside the preamble, adjust the position at the beginning of
433 // the file after the preamble.
434 if (SkipMainFilePreamble.first &&
435 SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
436 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
437 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
438 }
439
440 if (Position > Buffer->getBufferEnd())
441 Position = Buffer->getBufferEnd();
442
443 CodeCompletionFile = File;
444 CodeCompletionOffset = Position - Buffer->getBufferStart();
445
446 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
447 Size: Buffer->getBufferSize() + 1, BufferName: Buffer->getBufferIdentifier());
448 char *NewBuf = NewBuffer->getBufferStart();
449 char *NewPos = std::copy(first: Buffer->getBufferStart(), last: Position, result: NewBuf);
450 *NewPos = '\0';
451 std::copy(first: Position, last: Buffer->getBufferEnd(), result: NewPos+1);
452 SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
453
454 return false;
455}
456
457void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
458 bool IsAngled) {
459 setCodeCompletionReached();
460 if (CodeComplete)
461 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
462}
463
464void Preprocessor::CodeCompleteNaturalLanguage() {
465 setCodeCompletionReached();
466 if (CodeComplete)
467 CodeComplete->CodeCompleteNaturalLanguage();
468}
469
470/// getSpelling - This method is used to get the spelling of a token into a
471/// SmallVector. Note that the returned StringRef may not point to the
472/// supplied buffer if a copy can be avoided.
473StringRef Preprocessor::getSpelling(const Token &Tok,
474 SmallVectorImpl<char> &Buffer,
475 bool *Invalid) const {
476 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
477 if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
478 // Try the fast path.
479 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
480 return II->getName();
481 }
482
483 // Resize the buffer if we need to copy into it.
484 if (Tok.needsCleaning())
485 Buffer.resize(N: Tok.getLength());
486
487 const char *Ptr = Buffer.data();
488 unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
489 return StringRef(Ptr, Len);
490}
491
492/// CreateString - Plop the specified string into a scratch buffer and return a
493/// location for it. If specified, the source location provides a source
494/// location for the token.
495void Preprocessor::CreateString(StringRef Str, Token &Tok,
496 SourceLocation ExpansionLocStart,
497 SourceLocation ExpansionLocEnd) {
498 Tok.setLength(Str.size());
499
500 const char *DestPtr;
501 SourceLocation Loc = ScratchBuf->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
502
503 if (ExpansionLocStart.isValid())
504 Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
505 ExpansionLocEnd, Length: Str.size());
506 Tok.setLocation(Loc);
507
508 // If this is a raw identifier or a literal token, set the pointer data.
509 if (Tok.is(K: tok::raw_identifier))
510 Tok.setRawIdentifierData(DestPtr);
511 else if (Tok.isLiteral())
512 Tok.setLiteralData(DestPtr);
513}
514
515SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
516 auto &SM = getSourceManager();
517 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
518 FileIDAndOffset LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
519 bool Invalid = false;
520 StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
521 if (Invalid)
522 return SourceLocation();
523
524 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
525 const char *DestPtr;
526 SourceLocation Spelling =
527 ScratchBuf->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
528 return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
529}
530
531Module *Preprocessor::getCurrentModule() {
532 if (!getLangOpts().isCompilingModule())
533 return nullptr;
534
535 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
536}
537
538Module *Preprocessor::getCurrentModuleImplementation() {
539 if (!getLangOpts().isCompilingModuleImplementation())
540 return nullptr;
541
542 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
543}
544
545//===----------------------------------------------------------------------===//
546// Preprocessor Initialization Methods
547//===----------------------------------------------------------------------===//
548
549/// EnterMainSourceFile - Enter the specified FileID as the main source file,
550/// which implicitly adds the builtin defines etc.
551void Preprocessor::EnterMainSourceFile() {
552 // We do not allow the preprocessor to reenter the main file. Doing so will
553 // cause FileID's to accumulate information from both runs (e.g. #line
554 // information) and predefined macros aren't guaranteed to be set properly.
555 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
556 FileID MainFileID = SourceMgr.getMainFileID();
557
558 // If MainFileID is loaded it means we loaded an AST file, no need to enter
559 // a main file.
560 if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
561 // Enter the main file source buffer.
562 EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation());
563
564 // If we've been asked to skip bytes in the main file (e.g., as part of a
565 // precompiled preamble), do so now.
566 if (SkipMainFilePreamble.first > 0)
567 CurLexer->SetByteOffset(Offset: SkipMainFilePreamble.first,
568 StartOfLine: SkipMainFilePreamble.second);
569
570 // Tell the header info that the main file was entered. If the file is later
571 // #imported, it won't be re-entered.
572 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
573 markIncluded(File: *FE);
574
575 // Record the first PP token in the main file. This is used to generate
576 // better diagnostics for C++ modules.
577 //
578 // // This is a comment.
579 // #define FOO int // note: add 'module;' to the start of the file
580 // ^ FirstPPToken // to introduce a global module fragment.
581 //
582 // export module M; // error: module declaration must occur
583 // // at the start of the translation unit.
584 if (getLangOpts().CPlusPlusModules) {
585 std::optional<StringRef> Input =
586 getSourceManager().getBufferDataOrNone(FID: MainFileID);
587 if (!isPreprocessedModuleFile() && Input)
588 MainFileIsPreprocessedModuleFile =
589 clang::isPreprocessedModuleFile(Source: *Input);
590 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(args&: *this);
591 DirTracer = Tracer.get();
592 addPPCallbacks(C: std::move(Tracer));
593 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
594 if (FirstPPTok)
595 FirstPPTokenLoc = FirstPPTok->getLocation();
596 }
597 }
598
599 // Preprocess Predefines to populate the initial preprocessor state.
600 std::unique_ptr<llvm::MemoryBuffer> SB =
601 llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
602 assert(SB && "Cannot create predefined source buffer");
603 FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
604 assert(FID.isValid() && "Could not create FileID for predefines?");
605 setPredefinesFileID(FID);
606
607 // Start parsing the predefines.
608 EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation());
609
610 if (!PPOpts.PCHThroughHeader.empty()) {
611 // Lookup and save the FileID for the through header. If it isn't found
612 // in the search path, it's a fatal error.
613 OptionalFileEntryRef File = LookupFile(
614 FilenameLoc: SourceLocation(), Filename: PPOpts.PCHThroughHeader,
615 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
616 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
617 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
618 /*IsFrameworkFound=*/nullptr);
619 if (!File) {
620 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_found)
621 << PPOpts.PCHThroughHeader;
622 return;
623 }
624 setPCHThroughHeaderFileID(
625 SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation(), FileCharacter: SrcMgr::C_User));
626 }
627
628 // Skip tokens from the Predefines and if needed the main file.
629 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
630 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
631 SkipTokensWhileUsingPCH();
632}
633
634void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
635 assert(PCHThroughHeaderFileID.isInvalid() &&
636 "PCHThroughHeaderFileID already set!");
637 PCHThroughHeaderFileID = FID;
638}
639
640bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
641 assert(PCHThroughHeaderFileID.isValid() &&
642 "Invalid PCH through header FileID");
643 return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
644}
645
646bool Preprocessor::creatingPCHWithThroughHeader() {
647 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
648 PCHThroughHeaderFileID.isValid();
649}
650
651bool Preprocessor::usingPCHWithThroughHeader() {
652 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
653 PCHThroughHeaderFileID.isValid();
654}
655
656bool Preprocessor::creatingPCHWithPragmaHdrStop() {
657 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
658}
659
660bool Preprocessor::usingPCHWithPragmaHdrStop() {
661 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
662}
663
664/// Skip tokens until after the #include of the through header or
665/// until after a #pragma hdrstop is seen. Tokens in the predefines file
666/// and the main file may be skipped. If the end of the predefines file
667/// is reached, skipping continues into the main file. If the end of the
668/// main file is reached, it's a fatal error.
669void Preprocessor::SkipTokensWhileUsingPCH() {
670 bool ReachedMainFileEOF = false;
671 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
672 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
673 Token Tok;
674 while (true) {
675 bool InPredefines =
676 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
677 CurLexerCallback(*this, Tok);
678 if (Tok.is(K: tok::eof) && !InPredefines) {
679 ReachedMainFileEOF = true;
680 break;
681 }
682 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
683 break;
684 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
685 break;
686 }
687 if (ReachedMainFileEOF) {
688 if (UsingPCHThroughHeader)
689 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_seen)
690 << PPOpts.PCHThroughHeader << 1;
691 else if (!PPOpts.PCHWithHdrStopCreate)
692 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_pragma_hdrstop_not_seen);
693 }
694}
695
696void Preprocessor::replayPreambleConditionalStack() {
697 // Restore the conditional stack from the preamble, if there is one.
698 if (PreambleConditionalStack.isReplaying()) {
699 assert(CurPPLexer &&
700 "CurPPLexer is null when calling replayPreambleConditionalStack.");
701 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
702 PreambleConditionalStack.doneReplaying();
703 if (PreambleConditionalStack.reachedEOFWhileSkipping())
704 SkipExcludedConditionalBlock(
705 HashTokenLoc: PreambleConditionalStack.SkipInfo->HashTokenLoc,
706 IfTokenLoc: PreambleConditionalStack.SkipInfo->IfTokenLoc,
707 FoundNonSkipPortion: PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
708 FoundElse: PreambleConditionalStack.SkipInfo->FoundElse,
709 ElseLoc: PreambleConditionalStack.SkipInfo->ElseLoc);
710 }
711}
712
713void Preprocessor::EndSourceFile() {
714 // Notify the client that we reached the end of the source file.
715 if (Callbacks)
716 Callbacks->EndOfMainFile();
717}
718
719//===----------------------------------------------------------------------===//
720// Lexer Event Handling.
721//===----------------------------------------------------------------------===//
722
723/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
724/// identifier information for the token and install it into the token,
725/// updating the token kind accordingly.
726IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
727 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
728
729 // Look up this token, see if it is a macro, or if it is a language keyword.
730 IdentifierInfo *II;
731 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
732 // No cleaning needed, just use the characters from the lexed buffer.
733 II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
734 } else {
735 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
736 SmallString<64> IdentifierBuffer;
737 StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
738
739 if (Identifier.hasUCN()) {
740 SmallString<64> UCNIdentifierBuffer;
741 expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
742 II = getIdentifierInfo(Name: UCNIdentifierBuffer);
743 } else {
744 II = getIdentifierInfo(Name: CleanedStr);
745 }
746 }
747
748 // Update the token info (identifier info and appropriate token kind).
749 // FIXME: the raw_identifier may contain leading whitespace which is removed
750 // from the cleaned identifier token. The SourceLocation should be updated to
751 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
752 // line continuation before 'B') is parsed as a single tok::raw_identifier and
753 // is cleaned to tok::identifier "B". After cleaning the token's length is
754 // still 3 and the SourceLocation refers to the location of the backslash.
755 Identifier.setIdentifierInfo(II);
756 Identifier.setKind(II->getTokenID());
757
758 return II;
759}
760
761void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
762 PoisonReasons[II] = DiagID;
763}
764
765void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
766 assert(Ident__exception_code && Ident__exception_info);
767 assert(Ident___exception_code && Ident___exception_info);
768 Ident__exception_code->setIsPoisoned(Poison);
769 Ident___exception_code->setIsPoisoned(Poison);
770 Ident_GetExceptionCode->setIsPoisoned(Poison);
771 Ident__exception_info->setIsPoisoned(Poison);
772 Ident___exception_info->setIsPoisoned(Poison);
773 Ident_GetExceptionInfo->setIsPoisoned(Poison);
774 Ident__abnormal_termination->setIsPoisoned(Poison);
775 Ident___abnormal_termination->setIsPoisoned(Poison);
776 Ident_AbnormalTermination->setIsPoisoned(Poison);
777}
778
779void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
780 assert(Identifier.getIdentifierInfo() &&
781 "Can't handle identifiers without identifier info!");
782 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
783 PoisonReasons.find(Val: Identifier.getIdentifierInfo());
784 if(it == PoisonReasons.end())
785 Diag(Tok: Identifier, DiagID: diag::err_pp_used_poisoned_id);
786 else
787 Diag(Tok: Identifier,DiagID: it->second) << Identifier.getIdentifierInfo();
788}
789
790void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
791 assert(II.isOutOfDate() && "not out of date");
792 assert(getExternalSource() &&
793 "getExternalSource() should not return nullptr");
794 getExternalSource()->updateOutOfDateIdentifier(II);
795}
796
797/// HandleIdentifier - This callback is invoked when the lexer reads an
798/// identifier. This callback looks up the identifier in the map and/or
799/// potentially macro expands it or turns it into a named token (like 'for').
800///
801/// Note that callers of this method are guarded by checking the
802/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
803/// IdentifierInfo methods that compute these properties will need to change to
804/// match.
805bool Preprocessor::HandleIdentifier(Token &Identifier) {
806 assert(Identifier.getIdentifierInfo() &&
807 "Can't handle identifiers without identifier info!");
808
809 IdentifierInfo &II = *Identifier.getIdentifierInfo();
810
811 // If the information about this identifier is out of date, update it from
812 // the external source.
813 // We have to treat __VA_ARGS__ in a special way, since it gets
814 // serialized with isPoisoned = true, but our preprocessor may have
815 // unpoisoned it if we're defining a C99 macro.
816 if (II.isOutOfDate()) {
817 bool CurrentIsPoisoned = false;
818 const bool IsSpecialVariadicMacro =
819 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
820 if (IsSpecialVariadicMacro)
821 CurrentIsPoisoned = II.isPoisoned();
822
823 updateOutOfDateIdentifier(II);
824 Identifier.setKind(II.getTokenID());
825
826 if (IsSpecialVariadicMacro)
827 II.setIsPoisoned(CurrentIsPoisoned);
828 }
829
830 // If this identifier was poisoned, and if it was not produced from a macro
831 // expansion, emit an error.
832 if (II.isPoisoned() && CurPPLexer) {
833 HandlePoisonedIdentifier(Identifier);
834 }
835
836 // If this is a macro to be expanded, do it.
837 if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
838 const auto *MI = MD.getMacroInfo();
839 assert(MI && "macro definition with no macro info?");
840 if (!DisableMacroExpansion) {
841 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
842 // C99 6.10.3p10: If the preprocessing token immediately after the
843 // macro name isn't a '(', this macro should not be expanded.
844 if (!MI->isFunctionLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
845 return HandleMacroExpandedIdentifier(Identifier, MD);
846 } else {
847 // C99 6.10.3.4p2 says that a disabled macro may never again be
848 // expanded, even if it's in a context where it could be expanded in the
849 // future.
850 Identifier.setFlag(Token::DisableExpand);
851 if (MI->isObjectLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
852 Diag(Tok: Identifier, DiagID: diag::pp_disabled_macro_expansion);
853 }
854 }
855 }
856
857 // If this identifier is a keyword in a newer Standard or proposed Standard,
858 // produce a warning. Don't warn if we're not considering macro expansion,
859 // since this identifier might be the name of a macro.
860 // FIXME: This warning is disabled in cases where it shouldn't be, like
861 // "#define constexpr constexpr", "int constexpr;"
862 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
863 Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
864 << II.getName();
865 // Don't diagnose this keyword again in this translation unit.
866 II.setIsFutureCompatKeyword(false);
867 }
868
869 // If this identifier would be a keyword in C++, diagnose as a compatibility
870 // issue.
871 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
872 Diag(Tok: Identifier, DiagID: diag::warn_pp_identifier_is_cpp_keyword) << &II;
873
874 // If this is an extension token, diagnose its use.
875 // We avoid diagnosing tokens that originate from macro definitions.
876 // FIXME: This warning is disabled in cases where it shouldn't be,
877 // like "#define TY typeof", "TY(1) x".
878 if (II.isExtensionToken() && !DisableMacroExpansion)
879 Diag(Tok: Identifier, DiagID: diag::ext_token_used);
880
881 // Handle module contextual keywords.
882 if (getLangOpts().CPlusPlusModules && CurLexer &&
883 !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
884 !CurLexer->ParsingPreprocessorDirective &&
885 Identifier.isModuleContextualKeyword() &&
886 HandleModuleContextualKeyword(Result&: Identifier)) {
887 HandleDirective(Result&: Identifier);
888 // With a fatal failure in the module loader, we abort parsing.
889 return hadModuleLoaderFatalFailure();
890 }
891
892 // If this is the 'import' contextual keyword following an '@', note
893 // that the next token indicates a module name.
894 //
895 // Note that we do not treat 'import' as a contextual
896 // keyword when we're in a caching lexer, because caching lexers only get
897 // used in contexts where import declarations are disallowed.
898 //
899 // Likewise if this is the standard C++ import keyword.
900 if (((LastTokenWasAt && II.isImportKeyword()) ||
901 Identifier.is(K: tok::kw_import)) &&
902 !InMacroArgs &&
903 (!DisableMacroExpansion || MacroExpansionInDirectivesOverride) &&
904 CurLexerCallback != CLK_CachingLexer) {
905 ModuleImportLoc = Identifier.getLocation();
906 IsAtImport = true;
907 CurLexerCallback = CLK_LexAfterModuleImport;
908 }
909 return true;
910}
911
912void Preprocessor::Lex(Token &Result) {
913 ++LexLevel;
914
915 // We loop here until a lex function returns a token; this avoids recursion.
916 while (!CurLexerCallback(*this, Result))
917 ;
918
919 if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
920 return;
921
922 if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
923 // Remember the identifier before code completion token.
924 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
925 setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
926 // Set IdenfitierInfo to null to avoid confusing code that handles both
927 // identifiers and completion tokens.
928 Result.setIdentifierInfo(nullptr);
929 }
930
931 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
932 // if this token is being produced as a result of phase 4 of translation.
933 // Update TrackGMFState to decide if we are currently in a Global Module
934 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
935 // depends on the prevailing StdCXXImportSeq state in two cases.
936 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
937 !Result.getFlag(Flag: Token::IsReinjected)) {
938 switch (Result.getKind()) {
939 case tok::l_paren: case tok::l_square: case tok::l_brace:
940 StdCXXImportSeqState.handleOpenBracket();
941 break;
942 case tok::r_paren: case tok::r_square:
943 StdCXXImportSeqState.handleCloseBracket();
944 break;
945 case tok::r_brace:
946 StdCXXImportSeqState.handleCloseBrace();
947 break;
948#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
949// For `#pragma ...` mimic ';'.
950#include "clang/Basic/TokenKinds.def"
951#undef PRAGMA_ANNOTATION
952 // This token is injected to represent the translation of '#include "a.h"'
953 // into "import a.h;". Mimic the notional ';'.
954 case tok::annot_module_include:
955 case tok::annot_repl_input_end:
956 case tok::semi:
957 TrackGMFState.handleSemi();
958 StdCXXImportSeqState.handleSemi();
959 ModuleDeclState.handleSemi();
960 break;
961 case tok::header_name:
962 case tok::annot_header_unit:
963 StdCXXImportSeqState.handleHeaderName();
964 break;
965 case tok::kw_export:
966 if (hasSeenNoTrivialPPDirective())
967 Result.setFlag(Token::HasSeenNoTrivialPPDirective);
968 TrackGMFState.handleExport();
969 StdCXXImportSeqState.handleExport();
970 ModuleDeclState.handleExport();
971 break;
972 case tok::colon:
973 ModuleDeclState.handleColon();
974 break;
975 case tok::kw_import:
976 if (StdCXXImportSeqState.atTopLevel()) {
977 TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
978 StdCXXImportSeqState.handleImport();
979 }
980 break;
981 case tok::kw_module:
982 if (StdCXXImportSeqState.atTopLevel()) {
983 if (hasSeenNoTrivialPPDirective())
984 Result.setFlag(Token::HasSeenNoTrivialPPDirective);
985 TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
986 ModuleDeclState.handleModule();
987 }
988 break;
989 case tok::annot_module_name:
990 ModuleDeclState.handleModuleName(
991 NameLoc: static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
992 if (ModuleDeclState.isModuleCandidate())
993 break;
994 [[fallthrough]];
995 default:
996 TrackGMFState.handleMisc();
997 StdCXXImportSeqState.handleMisc();
998 ModuleDeclState.handleMisc();
999 break;
1000 }
1001 }
1002
1003 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
1004 CheckPoints[CurLexer->getFileID()].push_back(Elt: CurLexer->BufferPtr);
1005 CheckPointCounter = 0;
1006 }
1007
1008 LastTokenWasAt = Result.is(K: tok::at);
1009 if (Result.isNot(K: tok::kw_export))
1010 LastExportKeyword.startToken();
1011
1012 --LexLevel;
1013
1014 // Destroy any lexers that were deferred while we were in nested Lex calls.
1015 // This must happen after decrementing LexLevel but before any other
1016 // processing that might re-enter Lex.
1017 if (LexLevel == 0 && !PendingDestroyLexers.empty())
1018 PendingDestroyLexers.clear();
1019
1020 if ((LexLevel == 0 || PreprocessToken) &&
1021 !Result.getFlag(Flag: Token::IsReinjected)) {
1022 if (LexLevel == 0)
1023 ++TokenCount;
1024 if (OnToken)
1025 OnToken(Result);
1026 }
1027}
1028
1029void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1030 while (1) {
1031 Token Tok;
1032 Lex(Result&: Tok);
1033 if (Tok.isOneOf(Ks: tok::unknown, Ks: tok::eof, Ks: tok::eod,
1034 Ks: tok::annot_repl_input_end))
1035 break;
1036 if (Tokens != nullptr)
1037 Tokens->push_back(x: Tok);
1038 }
1039}
1040
1041/// Lex a header-name token (including one formed from header-name-tokens if
1042/// \p AllowMacroExpansion is \c true).
1043///
1044/// \param FilenameTok Filled in with the next token. On success, this will
1045/// be either a header_name token. On failure, it will be whatever other
1046/// token was found instead.
1047/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1048/// by macro expansion (concatenating tokens as necessary if the first
1049/// token is a '<').
1050/// \return \c true if we reached EOD or EOF while looking for a > token in
1051/// a concatenated header name and diagnosed it. \c false otherwise.
1052bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1053 // Lex using header-name tokenization rules if tokens are being lexed from
1054 // a file. Just grab a token normally if we're in a macro expansion.
1055 if (CurPPLexer) {
1056 // Avoid nested header-name lexing when macro expansion recurses
1057 // __has_include(__has_include))
1058 if (CurPPLexer->ParsingFilename)
1059 LexUnexpandedToken(Result&: FilenameTok);
1060 else
1061 CurPPLexer->LexIncludeFilename(FilenameTok);
1062 } else {
1063 Lex(Result&: FilenameTok);
1064 }
1065
1066 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1067 // case, glue the tokens together into an angle_string_literal token.
1068 SmallString<128> FilenameBuffer;
1069 if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1070 bool StartOfLine = FilenameTok.isAtStartOfLine();
1071 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1072 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1073
1074 SourceLocation Start = FilenameTok.getLocation();
1075 SourceLocation End;
1076 FilenameBuffer.push_back(Elt: '<');
1077
1078 // Consume tokens until we find a '>'.
1079 // FIXME: A header-name could be formed starting or ending with an
1080 // alternative token. It's not clear whether that's ill-formed in all
1081 // cases.
1082 while (FilenameTok.isNot(K: tok::greater)) {
1083 Lex(Result&: FilenameTok);
1084 if (FilenameTok.isOneOf(Ks: tok::eod, Ks: tok::eof)) {
1085 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_expected) << tok::greater;
1086 Diag(Loc: Start, DiagID: diag::note_matching) << tok::less;
1087 return true;
1088 }
1089
1090 End = FilenameTok.getLocation();
1091
1092 // FIXME: Provide code completion for #includes.
1093 if (FilenameTok.is(K: tok::code_completion)) {
1094 setCodeCompletionReached();
1095 Lex(Result&: FilenameTok);
1096 continue;
1097 }
1098
1099 // Append the spelling of this token to the buffer. If there was a space
1100 // before it, add it now.
1101 if (FilenameTok.hasLeadingSpace())
1102 FilenameBuffer.push_back(Elt: ' ');
1103
1104 // Get the spelling of the token, directly into FilenameBuffer if
1105 // possible.
1106 size_t PreAppendSize = FilenameBuffer.size();
1107 FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1108
1109 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1110 unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1111
1112 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1113 if (BufPtr != &FilenameBuffer[PreAppendSize])
1114 memcpy(dest: &FilenameBuffer[PreAppendSize], src: BufPtr, n: ActualLen);
1115
1116 // Resize FilenameBuffer to the correct size.
1117 if (FilenameTok.getLength() != ActualLen)
1118 FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1119 }
1120
1121 FilenameTok.startToken();
1122 FilenameTok.setKind(tok::header_name);
1123 FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1124 FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1125 FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1126 CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1127 } else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1128 // Convert a string-literal token of the form " h-char-sequence "
1129 // (produced by macro expansion) into a header-name token.
1130 //
1131 // The rules for header-names don't quite match the rules for
1132 // string-literals, but all the places where they differ result in
1133 // undefined behavior, so we can and do treat them the same.
1134 //
1135 // A string-literal with a prefix or suffix is not translated into a
1136 // header-name. This could theoretically be observable via the C++20
1137 // context-sensitive header-name formation rules.
1138 StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1139 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1140 FilenameTok.setKind(tok::header_name);
1141 }
1142
1143 return false;
1144}
1145
1146std::optional<Token> Preprocessor::peekNextPPToken() const {
1147 // Do some quick tests for rejection cases.
1148 std::optional<Token> Val;
1149 if (CurLexer)
1150 Val = CurLexer->peekNextPPToken();
1151 else
1152 Val = CurTokenLexer->peekNextPPToken();
1153
1154 if (!Val) {
1155 // We have run off the end. If it's a source file we don't
1156 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1157 // macro stack.
1158 if (CurPPLexer)
1159 return std::nullopt;
1160 for (const IncludeStackInfo &Entry : llvm::reverse(C: IncludeMacroStack)) {
1161 if (Entry.TheLexer)
1162 Val = Entry.TheLexer->peekNextPPToken();
1163 else
1164 Val = Entry.TheTokenLexer->peekNextPPToken();
1165
1166 if (Val)
1167 break;
1168
1169 // Ran off the end of a source file?
1170 if (Entry.ThePPLexer)
1171 return std::nullopt;
1172 }
1173 }
1174
1175 // Okay, we found the token and return. Otherwise we found the end of the
1176 // translation unit.
1177 return Val;
1178}
1179
1180// We represent the primary and partition names as 'Paths' which are sections
1181// of the hierarchical access path for a clang module. However for C++20
1182// the periods in a name are just another character, and we will need to
1183// flatten them into a string.
1184std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) {
1185 std::string Name;
1186 if (Path.empty())
1187 return Name;
1188
1189 for (auto &Piece : Path) {
1190 assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1191 if (!Name.empty())
1192 Name += ".";
1193 Name += Piece.getIdentifierInfo()->getName();
1194 }
1195 return Name;
1196}
1197
1198ModuleNameLoc *ModuleNameLoc::Create(Preprocessor &PP, ModuleIdPath Path) {
1199 assert(!Path.empty() && "expect at least one identifier in a module name");
1200 void *Mem = PP.getPreprocessorAllocator().Allocate(
1201 Size: totalSizeToAlloc<IdentifierLoc>(Counts: Path.size()), Alignment: alignof(ModuleNameLoc));
1202 return new (Mem) ModuleNameLoc(Path);
1203}
1204
1205bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc,
1206 SmallVectorImpl<Token> &Suffix,
1207 SmallVectorImpl<IdentifierLoc> &Path,
1208 bool AllowMacroExpansion,
1209 bool IsPartition) {
1210 auto ConsumeToken = [&]() {
1211 if (AllowMacroExpansion)
1212 Lex(Result&: Tok);
1213 else
1214 LexUnexpandedToken(Result&: Tok);
1215 Suffix.push_back(Elt: Tok);
1216 };
1217
1218 while (true) {
1219 if (Tok.isNot(K: tok::identifier)) {
1220 if (Tok.is(K: tok::code_completion)) {
1221 CurLexer->cutOffLexing();
1222 CodeComplete->CodeCompleteModuleImport(ImportLoc: UseLoc, Path);
1223 return true;
1224 }
1225
1226 Diag(Tok, DiagID: diag::err_pp_module_expected_ident) << Path.empty();
1227 return true;
1228 }
1229
1230 // [cpp.pre]/p2:
1231 // No identifier in the pp-module-name or pp-module-partition shall
1232 // currently be defined as an object-like macro.
1233 if (MacroInfo *MI = getMacroInfo(II: Tok.getIdentifierInfo());
1234 MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1235 !AllowMacroExpansion) {
1236 Diag(Tok, DiagID: diag::err_pp_module_name_is_macro)
1237 << IsPartition << Tok.getIdentifierInfo();
1238 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::note_macro_here)
1239 << Tok.getIdentifierInfo();
1240 }
1241
1242 // Record this part of the module path.
1243 Path.emplace_back(Args: Tok.getLocation(), Args: Tok.getIdentifierInfo());
1244 ConsumeToken();
1245
1246 if (Tok.isNot(K: tok::period))
1247 return false;
1248
1249 ConsumeToken();
1250 }
1251}
1252
1253/// [cpp.pre]/p2:
1254/// A preprocessing directive consists of a sequence of preprocessing tokens
1255/// that satisfies the following constraints: At the start of translation phase
1256/// 4, the first preprocessing token in the sequence, referred to as a
1257/// directive-introducing token, begins with the first character in the source
1258/// file (optionally after whitespace containing no new-line characters) or
1259/// follows whitespace containing at least one new-line character, and is:
1260/// - a # preprocessing token, or
1261/// - an import preprocessing token immediately followed on the same logical
1262/// source line by a header-name, <, identifier, or : preprocessing token, or
1263/// - a module preprocessing token immediately followed on the same logical
1264/// source line by an identifier, :, or ; preprocessing token, or
1265/// - an export preprocessing token immediately followed on the same logical
1266/// source line by one of the two preceding forms.
1267///
1268///
1269/// At the start of phase 4 an import or module token is treated as starting a
1270/// directive and are converted to their respective keywords iff:
1271/// - After skipping horizontal whitespace are
1272/// - at the start of a logical line, or
1273/// - preceded by an 'export' at the start of the logical line.
1274/// - Are followed by an identifier pp token (before macro expansion), or
1275/// - <, ", or : (but not ::) pp tokens for 'import', or
1276/// - ; for 'module'
1277/// Otherwise the token is treated as an identifier.
1278bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
1279 if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
1280 return false;
1281
1282 if (Result.is(K: tok::kw_export)) {
1283 LastExportKeyword = Result;
1284 return false;
1285 }
1286
1287 /// Trait 'module' and 'import' as a identifier when the main file is a
1288 /// preprocessed module file. We only allow '__preprocessed_module' and
1289 /// '__preprocessed_import' in this context.
1290 IdentifierInfo *II = Result.getIdentifierInfo();
1291 if (isPreprocessedModuleFile() &&
1292 (II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_import)) ||
1293 II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_module))))
1294 return false;
1295
1296 if (LastExportKeyword.is(K: tok::kw_export)) {
1297 // The export keyword was not at the start of line, it's not a
1298 // directive-introducing token.
1299 if (!LastExportKeyword.isAtPhysicalStartOfLine())
1300 return false;
1301 // [cpp.pre]/1.4
1302 // export // not a preprocessing directive
1303 // import foo; // preprocessing directive (ill-formed at phase7)
1304 if (Result.isAtPhysicalStartOfLine())
1305 return false;
1306 } else if (!Result.isAtPhysicalStartOfLine())
1307 return false;
1308
1309 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1310 CurPPLexer->ParsingPreprocessorDirective, true);
1311
1312 // The next token may be an angled string literal after import keyword.
1313 llvm::SaveAndRestore<bool> SavedParsingFilemame(
1314 CurPPLexer->ParsingFilename,
1315 Result.getIdentifierInfo()->isImportKeyword());
1316
1317 std::optional<Token> NextTok =
1318 CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken();
1319 if (!NextTok)
1320 return false;
1321
1322 if (NextTok->is(K: tok::raw_identifier))
1323 LookUpIdentifierInfo(Identifier&: *NextTok);
1324
1325 if (Result.getIdentifierInfo()->isImportKeyword()) {
1326 if (NextTok->isOneOf(Ks: tok::identifier, Ks: tok::less, Ks: tok::colon,
1327 Ks: tok::header_name)) {
1328 Result.setKind(tok::kw_import);
1329 ModuleImportLoc = Result.getLocation();
1330 IsAtImport = false;
1331 return true;
1332 }
1333 }
1334
1335 if (Result.getIdentifierInfo()->isModuleKeyword() &&
1336 NextTok->isOneOf(Ks: tok::identifier, Ks: tok::colon, Ks: tok::semi)) {
1337 Result.setKind(tok::kw_module);
1338 ModuleDeclLoc = Result.getLocation();
1339 return true;
1340 }
1341
1342 // Ok, it's an identifier.
1343 return false;
1344}
1345
1346bool Preprocessor::CollectPPImportSuffixAndEnterStream(
1347 SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1348 CollectPPImportSuffix(Toks);
1349 EnterModuleSuffixTokenStream(Toks);
1350 return false;
1351}
1352
1353/// Collect the tokens of a C++20 pp-import-suffix.
1354void Preprocessor::CollectPPImportSuffix(SmallVectorImpl<Token> &Toks,
1355 bool StopUntilEOD) {
1356 while (true) {
1357 Toks.emplace_back();
1358 Lex(Result&: Toks.back());
1359
1360 switch (Toks.back().getKind()) {
1361 case tok::semi:
1362 if (!StopUntilEOD)
1363 return;
1364 [[fallthrough]];
1365 case tok::eod:
1366 case tok::eof:
1367 return;
1368 default:
1369 break;
1370 }
1371 }
1372}
1373
1374// Allocate a holding buffer for a sequence of tokens and introduce it into
1375// the token stream.
1376void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef<Token> Toks) {
1377 if (Toks.empty())
1378 return;
1379 auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1380 std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1381 EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1382 /*DisableMacroExpansion*/ false, /*IsReinject*/ false);
1383 assert(CurTokenLexer && "Must have a TokenLexer");
1384 CurTokenLexer->setLexingCXXModuleDirective();
1385}
1386
1387/// Lex a token following the 'import' contextual keyword.
1388///
1389/// pp-import: [C++20]
1390/// import header-name pp-import-suffix[opt] ;
1391/// import header-name-tokens pp-import-suffix[opt] ;
1392/// [ObjC] @ import module-name ;
1393/// [Clang] import module-name ;
1394///
1395/// header-name-tokens:
1396/// string-literal
1397/// < [any sequence of preprocessing-tokens other than >] >
1398///
1399/// module-name:
1400/// module-name-qualifier[opt] identifier
1401///
1402/// module-name-qualifier
1403/// module-name-qualifier[opt] identifier .
1404///
1405/// We respond to a pp-import by importing macros from the named module.
1406bool Preprocessor::LexAfterModuleImport(Token &Result) {
1407 // Figure out what kind of lexer we actually have.
1408 recomputeCurLexerKind();
1409
1410 SmallVector<Token, 32> Suffix;
1411 SmallVector<IdentifierLoc, 3> Path;
1412 Lex(Result);
1413 if (LexModuleNameContinue(Tok&: Result, UseLoc: ModuleImportLoc, Suffix, Path))
1414 return CollectPPImportSuffixAndEnterStream(Toks&: Suffix);
1415
1416 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(PP&: *this, Path);
1417 Suffix.clear();
1418 Suffix.emplace_back();
1419 Suffix.back().setKind(tok::annot_module_name);
1420 Suffix.back().setAnnotationRange(NameLoc->getRange());
1421 Suffix.back().setAnnotationValue(static_cast<void *>(NameLoc));
1422 Suffix.push_back(Elt: Result);
1423
1424 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1425 // at the semicolon already.
1426 SourceLocation SemiLoc = Result.getLocation();
1427 if (Suffix.back().isNot(K: tok::semi)) {
1428 if (Suffix.back().isNot(K: tok::eof))
1429 CollectPPImportSuffix(Toks&: Suffix);
1430 if (Suffix.back().isNot(K: tok::semi)) {
1431 // This is not an import after all.
1432 EnterModuleSuffixTokenStream(Toks: Suffix);
1433 return false;
1434 }
1435 SemiLoc = Suffix.back().getLocation();
1436 }
1437
1438 Module *Imported = nullptr;
1439 if (getLangOpts().Modules) {
1440 Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc, Path, Visibility: Module::Hidden,
1441 /*IsInclusionDirective=*/false);
1442 if (Imported)
1443 makeModuleVisible(M: Imported, Loc: SemiLoc);
1444 }
1445
1446 if (Callbacks)
1447 Callbacks->moduleImport(ImportLoc: ModuleImportLoc, Path, Imported);
1448
1449 if (!Suffix.empty()) {
1450 EnterModuleSuffixTokenStream(Toks: Suffix);
1451 return false;
1452 }
1453 return true;
1454}
1455
1456void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc,
1457 bool IncludeExports) {
1458 CurSubmoduleState->VisibleModules.setVisible(
1459 M, Loc, IncludeExports, Vis: [](Module *) {},
1460 Cb: [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1461 // FIXME: Include the path in the diagnostic.
1462 // FIXME: Include the import location for the conflicting module.
1463 Diag(Loc: ModuleImportLoc, DiagID: diag::warn_module_conflict)
1464 << Path[0]->getFullModuleName()
1465 << Conflict->getFullModuleName()
1466 << Message;
1467 });
1468
1469 // Add this module to the imports list of the currently-built submodule.
1470 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1471 BuildingSubmoduleStack.back().M->Imports.insert(X: M);
1472}
1473
1474bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1475 const char *DiagnosticTag,
1476 bool AllowMacroExpansion) {
1477 // We need at least one string literal.
1478 if (Result.isNot(K: tok::string_literal)) {
1479 Diag(Tok: Result, DiagID: diag::err_expected_string_literal)
1480 << /*Source='in...'*/0 << DiagnosticTag;
1481 return false;
1482 }
1483
1484 // Lex string literal tokens, optionally with macro expansion.
1485 SmallVector<Token, 4> StrToks;
1486 do {
1487 StrToks.push_back(Elt: Result);
1488
1489 if (Result.hasUDSuffix())
1490 Diag(Tok: Result, DiagID: diag::err_invalid_string_udl);
1491
1492 if (AllowMacroExpansion)
1493 Lex(Result);
1494 else
1495 LexUnexpandedToken(Result);
1496 } while (Result.is(K: tok::string_literal));
1497
1498 // Concatenate and parse the strings.
1499 StringLiteralParser Literal(StrToks, *this);
1500 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1501
1502 if (Literal.hadError)
1503 return false;
1504
1505 if (Literal.Pascal) {
1506 Diag(Loc: StrToks[0].getLocation(), DiagID: diag::err_expected_string_literal)
1507 << /*Source='in...'*/0 << DiagnosticTag;
1508 return false;
1509 }
1510
1511 String = std::string(Literal.GetString());
1512 return true;
1513}
1514
1515bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1516 assert(Tok.is(tok::numeric_constant));
1517 SmallString<8> IntegerBuffer;
1518 bool NumberInvalid = false;
1519 StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1520 if (NumberInvalid)
1521 return false;
1522 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1523 getLangOpts(), getTargetInfo(),
1524 getDiagnostics());
1525 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1526 return false;
1527 llvm::APInt APVal(64, 0);
1528 if (Literal.GetIntegerValue(Val&: APVal))
1529 return false;
1530 Lex(Result&: Tok);
1531 Value = APVal.getLimitedValue();
1532 return true;
1533}
1534
1535void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1536 assert(Handler && "NULL comment handler");
1537 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1538 "Comment handler already registered");
1539 CommentHandlers.push_back(x: Handler);
1540}
1541
1542void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1543 std::vector<CommentHandler *>::iterator Pos =
1544 llvm::find(Range&: CommentHandlers, Val: Handler);
1545 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1546 CommentHandlers.erase(position: Pos);
1547}
1548
1549bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1550 bool AnyPendingTokens = false;
1551 for (CommentHandler *H : CommentHandlers) {
1552 if (H->HandleComment(PP&: *this, Comment))
1553 AnyPendingTokens = true;
1554 }
1555 if (!AnyPendingTokens || getCommentRetentionState())
1556 return false;
1557 Lex(Result&: result);
1558 return true;
1559}
1560
1561void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1562 const MacroAnnotations &A =
1563 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1564 assert(A.DeprecationInfo &&
1565 "Macro deprecation warning without recorded annotation!");
1566 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1567 if (Info.Message.empty())
1568 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1569 << Identifier.getIdentifierInfo() << 0;
1570 else
1571 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1572 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1573 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 0;
1574}
1575
1576void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1577 const MacroAnnotations &A =
1578 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1579 assert(A.RestrictExpansionInfo &&
1580 "Macro restricted expansion warning without recorded annotation!");
1581 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1582 if (Info.Message.empty())
1583 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1584 << Identifier.getIdentifierInfo() << 0;
1585 else
1586 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1587 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1588 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 1;
1589}
1590
1591void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1592 unsigned DiagSelection) const {
1593 Diag(Tok: Identifier, DiagID: diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1594}
1595
1596void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1597 bool IsUndef) const {
1598 const MacroAnnotations &A =
1599 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1600 assert(A.FinalAnnotationLoc &&
1601 "Final macro warning without recorded annotation!");
1602
1603 Diag(Tok: Identifier, DiagID: diag::warn_pragma_final_macro)
1604 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1605 Diag(Loc: *A.FinalAnnotationLoc, DiagID: diag::note_pp_macro_annotation) << 2;
1606}
1607
1608bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1609 const SourceLocation &Loc) const {
1610 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1611 // region map:
1612 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1613 const SourceLocation &Loc) -> bool {
1614 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1615 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1616 Range: Map, P: [&SourceMgr,
1617 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1618 return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1619 });
1620
1621 if (FirstRegionEndingAfterLoc != Map.end()) {
1622 // To test if the start location of the found region precedes `Loc`:
1623 return SourceMgr.isBeforeInTranslationUnit(
1624 LHS: FirstRegionEndingAfterLoc->first, RHS: Loc);
1625 }
1626 // If we do not find a region whose end location passes `Loc`, we want to
1627 // check if the current region is still open:
1628 if (!Map.empty() && Map.back().first == Map.back().second)
1629 return SourceMgr.isBeforeInTranslationUnit(LHS: Map.back().first, RHS: Loc);
1630 return false;
1631 };
1632
1633 // What the following does:
1634 //
1635 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1636 // Otherwise, `Loc` is from a loaded AST. We look up the
1637 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1638 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1639 // region w.r.t. the region map. If the region map is absent, it means there
1640 // is no opt-out pragma in that loaded AST.
1641 //
1642 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1643 // one of them. That means if you put the pragmas around a `#include
1644 // "module.h"`, where module.h is a module, it is not actually suppressing
1645 // warnings in module.h. This is fine because warnings in module.h will be
1646 // reported when module.h is compiled in isolation and nothing in module.h
1647 // will be analyzed ever again. So you will not see warnings from the file
1648 // that imports module.h anyway. And you can't even do the same thing for PCHs
1649 // because they can only be included from the command line.
1650
1651 if (SourceMgr.isLocalSourceLocation(Loc))
1652 return TestInMap(SafeBufferOptOutMap, Loc);
1653
1654 const SafeBufferOptOutRegionsTy *LoadedRegions =
1655 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SrcMgr: SourceMgr);
1656
1657 if (LoadedRegions)
1658 return TestInMap(*LoadedRegions, Loc);
1659 return false;
1660}
1661
1662bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1663 bool isEnter, const SourceLocation &Loc) {
1664 if (isEnter) {
1665 if (isPPInSafeBufferOptOutRegion())
1666 return true; // invalid enter action
1667 InSafeBufferOptOutRegion = true;
1668 CurrentSafeBufferOptOutStart = Loc;
1669
1670 // To set the start location of a new region:
1671
1672 if (!SafeBufferOptOutMap.empty()) {
1673 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1674 assert(PrevRegion->first != PrevRegion->second &&
1675 "Shall not begin a safe buffer opt-out region before closing the "
1676 "previous one.");
1677 }
1678 // If the start location equals to the end location, we call the region a
1679 // open region or a unclosed region (i.e., end location has not been set
1680 // yet).
1681 SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1682 } else {
1683 if (!isPPInSafeBufferOptOutRegion())
1684 return true; // invalid enter action
1685 InSafeBufferOptOutRegion = false;
1686
1687 // To set the end location of the current open region:
1688
1689 assert(!SafeBufferOptOutMap.empty() &&
1690 "Misordered safe buffer opt-out regions");
1691 auto *CurrRegion = &SafeBufferOptOutMap.back();
1692 assert(CurrRegion->first == CurrRegion->second &&
1693 "Set end location to a closed safe buffer opt-out region");
1694 CurrRegion->second = Loc;
1695 }
1696 return false;
1697}
1698
1699bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1700 return InSafeBufferOptOutRegion;
1701}
1702bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1703 StartLoc = CurrentSafeBufferOptOutStart;
1704 return InSafeBufferOptOutRegion;
1705}
1706
1707SmallVector<SourceLocation, 64>
1708Preprocessor::serializeSafeBufferOptOutMap() const {
1709 assert(!InSafeBufferOptOutRegion &&
1710 "Attempt to serialize safe buffer opt-out regions before file being "
1711 "completely preprocessed");
1712
1713 SmallVector<SourceLocation, 64> SrcSeq;
1714
1715 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1716 SrcSeq.push_back(Elt: begin);
1717 SrcSeq.push_back(Elt: end);
1718 }
1719 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1720 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1721 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1722 // It means that for each loading pch/module m, it just needs to load m's own
1723 // `SafeBufferOptOutMap`.
1724 return SrcSeq;
1725}
1726
1727bool Preprocessor::setDeserializedSafeBufferOptOutMap(
1728 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1729 if (SourceLocations.size() == 0)
1730 return false;
1731
1732 assert(SourceLocations.size() % 2 == 0 &&
1733 "ill-formed SourceLocation sequence");
1734
1735 auto It = SourceLocations.begin();
1736 SafeBufferOptOutRegionsTy &Regions =
1737 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(Loc: *It, SrcMgr&: SourceMgr);
1738
1739 do {
1740 SourceLocation Begin = *It++;
1741 SourceLocation End = *It++;
1742
1743 Regions.emplace_back(Args&: Begin, Args&: End);
1744 } while (It != SourceLocations.end());
1745 return true;
1746}
1747
1748ModuleLoader::~ModuleLoader() = default;
1749
1750CommentHandler::~CommentHandler() = default;
1751
1752EmptylineHandler::~EmptylineHandler() = default;
1753
1754CodeCompletionHandler::~CodeCompletionHandler() = default;
1755
1756void Preprocessor::createPreprocessingRecord() {
1757 if (Record)
1758 return;
1759
1760 Record = new PreprocessingRecord(getSourceManager());
1761 addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1762}
1763
1764const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1765 if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1766 const SmallVector<const char *> &FileCheckPoints = It->second;
1767 const char *Last = nullptr;
1768 // FIXME: Do better than a linear search.
1769 for (const char *P : FileCheckPoints) {
1770 if (P > Start)
1771 break;
1772 Last = P;
1773 }
1774 return Last;
1775 }
1776
1777 return nullptr;
1778}
1779
1780bool Preprocessor::hasSeenNoTrivialPPDirective() const {
1781 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1782}
1783
1784bool NoTrivialPPDirectiveTracer::hasSeenNoTrivialPPDirective() const {
1785 return SeenNoTrivialPPDirective;
1786}
1787
1788void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1789 if (InMainFile && !SeenNoTrivialPPDirective)
1790 SeenNoTrivialPPDirective = true;
1791}
1792
1793void NoTrivialPPDirectiveTracer::LexedFileChanged(
1794 FileID FID, LexedFileChangeReason Reason,
1795 SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1796 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1797}
1798
1799void NoTrivialPPDirectiveTracer::MacroExpands(const Token &MacroNameTok,
1800 const MacroDefinition &MD,
1801 SourceRange Range,
1802 const MacroArgs *Args) {
1803 // FIXME: Does only enable builtin macro expansion make sense?
1804 if (!MD.getMacroInfo()->isBuiltinMacro())
1805 setSeenNoTrivialPPDirective();
1806}
1807