1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/IdentifierTable.h"
31#include "clang/Basic/LLVM.h"
32#include "clang/Basic/LangOptions.h"
33#include "clang/Basic/Module.h"
34#include "clang/Basic/SourceLocation.h"
35#include "clang/Basic/SourceManager.h"
36#include "clang/Basic/TargetInfo.h"
37#include "clang/Lex/CodeCompletionHandler.h"
38#include "clang/Lex/ExternalPreprocessorSource.h"
39#include "clang/Lex/HeaderSearch.h"
40#include "clang/Lex/LexDiagnostic.h"
41#include "clang/Lex/Lexer.h"
42#include "clang/Lex/LiteralSupport.h"
43#include "clang/Lex/MacroArgs.h"
44#include "clang/Lex/MacroInfo.h"
45#include "clang/Lex/ModuleLoader.h"
46#include "clang/Lex/Pragma.h"
47#include "clang/Lex/PreprocessingRecord.h"
48#include "clang/Lex/PreprocessorLexer.h"
49#include "clang/Lex/PreprocessorOptions.h"
50#include "clang/Lex/ScratchBuffer.h"
51#include "clang/Lex/Token.h"
52#include "clang/Lex/TokenLexer.h"
53#include "llvm/ADT/APInt.h"
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/DenseMap.h"
56#include "llvm/ADT/STLExtras.h"
57#include "llvm/ADT/SmallVector.h"
58#include "llvm/ADT/StringRef.h"
59#include "llvm/Support/Capacity.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/MemoryBuffer.h"
62#include "llvm/Support/raw_ostream.h"
63#include <algorithm>
64#include <cassert>
65#include <memory>
66#include <optional>
67#include <string>
68#include <utility>
69#include <vector>
70
71using namespace clang;
72
73/// Minimum distance between two check points, in tokens.
74static constexpr unsigned CheckPointStepSize = 1024;
75
76LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
77
78ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
79
80Preprocessor::Preprocessor(const PreprocessorOptions &PPOpts,
81 DiagnosticsEngine &diags, const LangOptions &opts,
82 SourceManager &SM, HeaderSearch &Headers,
83 ModuleLoader &TheModuleLoader,
84 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
85 TranslationUnitKind TUKind)
86 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
87 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
88 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
89 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
90 // As the language options may have not been loaded yet (when
91 // deserializing an ASTUnit), adding keywords to the identifier table is
92 // deferred to Preprocessor::Initialize().
93 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
94 TUKind(TUKind), SkipMainFilePreamble(0, true),
95 CurSubmoduleState(&NullSubmoduleState) {
96 OwnsHeaderSearch = OwnsHeaders;
97
98 // Default to discarding comments.
99 KeepComments = false;
100 KeepMacroComments = false;
101 SuppressIncludeNotFoundError = false;
102
103 // Macro expansion is enabled.
104 DisableMacroExpansion = false;
105 MacroExpansionInDirectivesOverride = false;
106 InMacroArgs = false;
107 ArgMacro = nullptr;
108 InMacroArgPreExpansion = false;
109 NumCachedTokenLexers = 0;
110 PragmasEnabled = true;
111 ParsingIfOrElifDirective = false;
112 PreprocessedOutput = false;
113
114 // We haven't read anything from the external source.
115 ReadMacrosFromExternalSource = false;
116
117 BuiltinInfo = std::make_unique<Builtin::Context>();
118
119 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
120 // a macro. They get unpoisoned where it is allowed.
121 (Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
122 SetPoisonReason(II: Ident__VA_ARGS__,DiagID: diag::ext_pp_bad_vaargs_use);
123 (Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
124 SetPoisonReason(II: Ident__VA_OPT__,DiagID: diag::ext_pp_bad_vaopt_use);
125
126 // Initialize the pragma handlers.
127 RegisterBuiltinPragmas();
128
129 // Initialize builtin macros like __LINE__ and friends.
130 RegisterBuiltinMacros();
131
132 if(LangOpts.Borland) {
133 Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
134 Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
135 Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
136 Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
137 Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
138 Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
139 Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
140 Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
141 Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
142 } else {
143 Ident__exception_info = Ident__exception_code = nullptr;
144 Ident__abnormal_termination = Ident___exception_info = nullptr;
145 Ident___exception_code = Ident___abnormal_termination = nullptr;
146 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
147 Ident_AbnormalTermination = nullptr;
148 }
149
150 // Default incremental processing to -fincremental-extensions, clients can
151 // override with `enableIncrementalProcessing` if desired.
152 IncrementalProcessing = LangOpts.IncrementalExtensions;
153
154 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
155 if (usingPCHWithPragmaHdrStop())
156 SkippingUntilPragmaHdrStop = true;
157
158 // If using a PCH with a through header, start skipping tokens.
159 if (!this->PPOpts.PCHThroughHeader.empty() &&
160 !this->PPOpts.ImplicitPCHInclude.empty())
161 SkippingUntilPCHThroughHeader = true;
162
163 if (this->PPOpts.GeneratePreamble)
164 PreambleConditionalStack.startRecording();
165
166 MaxTokens = LangOpts.MaxTokens;
167}
168
169Preprocessor::~Preprocessor() {
170 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
171
172 IncludeMacroStack.clear();
173
174 // Free any cached macro expanders.
175 // This populates MacroArgCache, so all TokenLexers need to be destroyed
176 // before the code below that frees up the MacroArgCache list.
177 std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
178 CurTokenLexer.reset();
179
180 // Free any cached MacroArgs.
181 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
182 ArgList = ArgList->deallocate();
183
184 // Delete the header search info, if we own it.
185 if (OwnsHeaderSearch)
186 delete &HeaderInfo;
187}
188
189void Preprocessor::Initialize(const TargetInfo &Target,
190 const TargetInfo *AuxTarget) {
191 assert((!this->Target || this->Target == &Target) &&
192 "Invalid override of target information");
193 this->Target = &Target;
194
195 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
196 "Invalid override of aux target information.");
197 this->AuxTarget = AuxTarget;
198
199 // Initialize information about built-ins.
200 BuiltinInfo->InitializeTarget(Target, AuxTarget);
201 HeaderInfo.setTarget(Target);
202
203 // Populate the identifier table with info about keywords for the current language.
204 Identifiers.AddKeywords(LangOpts);
205
206 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
207 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
208
209 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
210 // Use setting from TargetInfo.
211 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: Target.getFPEvalMethod());
212 else
213 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
214 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: getLangOpts().getFPEvalMethod());
215}
216
217void Preprocessor::InitializeForModelFile() {
218 NumEnteredSourceFiles = 0;
219
220 // Reset pragmas
221 PragmaHandlersBackup = std::move(PragmaHandlers);
222 PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
223 RegisterBuiltinPragmas();
224
225 // Reset PredefinesFileID
226 PredefinesFileID = FileID();
227}
228
229void Preprocessor::FinalizeForModelFile() {
230 NumEnteredSourceFiles = 1;
231
232 PragmaHandlers = std::move(PragmaHandlersBackup);
233}
234
235void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
236 llvm::errs() << tok::getTokenName(Kind: Tok.getKind());
237
238 if (!Tok.isAnnotation())
239 llvm::errs() << " '" << getSpelling(Tok) << "'";
240
241 if (!DumpFlags) return;
242
243 llvm::errs() << "\t";
244 if (Tok.isAtStartOfLine())
245 llvm::errs() << " [StartOfLine]";
246 if (Tok.hasLeadingSpace())
247 llvm::errs() << " [LeadingSpace]";
248 if (Tok.isExpandDisabled())
249 llvm::errs() << " [ExpandDisabled]";
250 if (Tok.isFirstPPToken())
251 llvm::errs() << " [First pp-token]";
252 if (Tok.needsCleaning()) {
253 const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
254 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
255 << "']";
256 }
257
258 llvm::errs() << "\tLoc=<";
259 DumpLocation(Loc: Tok.getLocation());
260 llvm::errs() << ">";
261}
262
263void Preprocessor::DumpLocation(SourceLocation Loc) const {
264 Loc.print(OS&: llvm::errs(), SM: SourceMgr);
265}
266
267void Preprocessor::DumpMacro(const MacroInfo &MI) const {
268 llvm::errs() << "MACRO: ";
269 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
270 DumpToken(Tok: MI.getReplacementToken(Tok: i));
271 llvm::errs() << " ";
272 }
273 llvm::errs() << "\n";
274}
275
276void Preprocessor::PrintStats() {
277 llvm::errs() << "\n*** Preprocessor Stats:\n";
278 llvm::errs() << NumDirectives << " directives found:\n";
279 llvm::errs() << " " << NumDefined << " #define.\n";
280 llvm::errs() << " " << NumUndefined << " #undef.\n";
281 llvm::errs() << " #include/#include_next/#import:\n";
282 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
283 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
284 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
285 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
286 llvm::errs() << " " << NumEndif << " #endif.\n";
287 llvm::errs() << " " << NumPragma << " #pragma.\n";
288 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
289
290 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
291 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
292 << NumFastMacroExpanded << " on the fast path.\n";
293 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
294 << " token paste (##) operations performed, "
295 << NumFastTokenPaste << " on the fast path.\n";
296
297 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
298
299 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
300 llvm::errs() << "\n Macro Expanded Tokens: "
301 << llvm::capacity_in_bytes(X: MacroExpandedTokens);
302 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
303 // FIXME: List information for all submodules.
304 llvm::errs() << "\n Macros: "
305 << llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
306 llvm::errs() << "\n #pragma push_macro Info: "
307 << llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
308 llvm::errs() << "\n Poison Reasons: "
309 << llvm::capacity_in_bytes(X: PoisonReasons);
310 llvm::errs() << "\n Comment Handlers: "
311 << llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
312}
313
314Preprocessor::macro_iterator
315Preprocessor::macro_begin(bool IncludeExternalMacros) const {
316 if (IncludeExternalMacros && ExternalSource &&
317 !ReadMacrosFromExternalSource) {
318 ReadMacrosFromExternalSource = true;
319 ExternalSource->ReadDefinedMacros();
320 }
321
322 // Make sure we cover all macros in visible modules.
323 for (const ModuleMacro &Macro : ModuleMacros)
324 CurSubmoduleState->Macros.try_emplace(Key: Macro.II);
325
326 return CurSubmoduleState->Macros.begin();
327}
328
329size_t Preprocessor::getTotalMemory() const {
330 return BP.getTotalMemory()
331 + llvm::capacity_in_bytes(X: MacroExpandedTokens)
332 + Predefines.capacity() /* Predefines buffer. */
333 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
334 // and ModuleMacros.
335 + llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
336 + llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
337 + llvm::capacity_in_bytes(X: PoisonReasons)
338 + llvm::capacity_in_bytes(x: CommentHandlers);
339}
340
341Preprocessor::macro_iterator
342Preprocessor::macro_end(bool IncludeExternalMacros) const {
343 if (IncludeExternalMacros && ExternalSource &&
344 !ReadMacrosFromExternalSource) {
345 ReadMacrosFromExternalSource = true;
346 ExternalSource->ReadDefinedMacros();
347 }
348
349 return CurSubmoduleState->Macros.end();
350}
351
352/// Compares macro tokens with a specified token value sequence.
353static bool MacroDefinitionEquals(const MacroInfo *MI,
354 ArrayRef<TokenValue> Tokens) {
355 return Tokens.size() == MI->getNumTokens() &&
356 std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
357}
358
359StringRef Preprocessor::getLastMacroWithSpelling(
360 SourceLocation Loc,
361 ArrayRef<TokenValue> Tokens) const {
362 SourceLocation BestLocation;
363 StringRef BestSpelling;
364 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
365 I != E; ++I) {
366 const MacroDirective::DefInfo
367 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
368 if (!Def || !Def.getMacroInfo())
369 continue;
370 if (!Def.getMacroInfo()->isObjectLike())
371 continue;
372 if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
373 continue;
374 SourceLocation Location = Def.getLocation();
375 // Choose the macro defined latest.
376 if (BestLocation.isInvalid() ||
377 (Location.isValid() &&
378 SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
379 BestLocation = Location;
380 BestSpelling = I->first->getName();
381 }
382 }
383 return BestSpelling;
384}
385
386void Preprocessor::recomputeCurLexerKind() {
387 if (CurLexer)
388 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
389 ? CLK_DependencyDirectivesLexer
390 : CLK_Lexer;
391 else if (CurTokenLexer)
392 CurLexerCallback = CLK_TokenLexer;
393 else
394 CurLexerCallback = CLK_CachingLexer;
395}
396
397bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
398 unsigned CompleteLine,
399 unsigned CompleteColumn) {
400 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
401 assert(!CodeCompletionFile && "Already set");
402
403 // Load the actual file's contents.
404 std::optional<llvm::MemoryBufferRef> Buffer =
405 SourceMgr.getMemoryBufferForFileOrNone(File);
406 if (!Buffer)
407 return true;
408
409 // Find the byte position of the truncation point.
410 const char *Position = Buffer->getBufferStart();
411 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
412 for (; *Position; ++Position) {
413 if (*Position != '\r' && *Position != '\n')
414 continue;
415
416 // Eat \r\n or \n\r as a single line.
417 if ((Position[1] == '\r' || Position[1] == '\n') &&
418 Position[0] != Position[1])
419 ++Position;
420 ++Position;
421 break;
422 }
423 }
424
425 Position += CompleteColumn - 1;
426
427 // If pointing inside the preamble, adjust the position at the beginning of
428 // the file after the preamble.
429 if (SkipMainFilePreamble.first &&
430 SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
431 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
432 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
433 }
434
435 if (Position > Buffer->getBufferEnd())
436 Position = Buffer->getBufferEnd();
437
438 CodeCompletionFile = File;
439 CodeCompletionOffset = Position - Buffer->getBufferStart();
440
441 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
442 Size: Buffer->getBufferSize() + 1, BufferName: Buffer->getBufferIdentifier());
443 char *NewBuf = NewBuffer->getBufferStart();
444 char *NewPos = std::copy(first: Buffer->getBufferStart(), last: Position, result: NewBuf);
445 *NewPos = '\0';
446 std::copy(first: Position, last: Buffer->getBufferEnd(), result: NewPos+1);
447 SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
448
449 return false;
450}
451
452void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
453 bool IsAngled) {
454 setCodeCompletionReached();
455 if (CodeComplete)
456 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
457}
458
459void Preprocessor::CodeCompleteNaturalLanguage() {
460 setCodeCompletionReached();
461 if (CodeComplete)
462 CodeComplete->CodeCompleteNaturalLanguage();
463}
464
465/// getSpelling - This method is used to get the spelling of a token into a
466/// SmallVector. Note that the returned StringRef may not point to the
467/// supplied buffer if a copy can be avoided.
468StringRef Preprocessor::getSpelling(const Token &Tok,
469 SmallVectorImpl<char> &Buffer,
470 bool *Invalid) const {
471 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
472 if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
473 // Try the fast path.
474 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
475 return II->getName();
476 }
477
478 // Resize the buffer if we need to copy into it.
479 if (Tok.needsCleaning())
480 Buffer.resize(N: Tok.getLength());
481
482 const char *Ptr = Buffer.data();
483 unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
484 return StringRef(Ptr, Len);
485}
486
487/// CreateString - Plop the specified string into a scratch buffer and return a
488/// location for it. If specified, the source location provides a source
489/// location for the token.
490void Preprocessor::CreateString(StringRef Str, Token &Tok,
491 SourceLocation ExpansionLocStart,
492 SourceLocation ExpansionLocEnd) {
493 Tok.setLength(Str.size());
494
495 const char *DestPtr;
496 SourceLocation Loc = ScratchBuf->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
497
498 if (ExpansionLocStart.isValid())
499 Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
500 ExpansionLocEnd, Length: Str.size());
501 Tok.setLocation(Loc);
502
503 // If this is a raw identifier or a literal token, set the pointer data.
504 if (Tok.is(K: tok::raw_identifier))
505 Tok.setRawIdentifierData(DestPtr);
506 else if (Tok.isLiteral())
507 Tok.setLiteralData(DestPtr);
508}
509
510SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
511 auto &SM = getSourceManager();
512 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
513 FileIDAndOffset LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
514 bool Invalid = false;
515 StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
516 if (Invalid)
517 return SourceLocation();
518
519 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
520 const char *DestPtr;
521 SourceLocation Spelling =
522 ScratchBuf->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
523 return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
524}
525
526Module *Preprocessor::getCurrentModule() {
527 if (!getLangOpts().isCompilingModule())
528 return nullptr;
529
530 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
531}
532
533Module *Preprocessor::getCurrentModuleImplementation() {
534 if (!getLangOpts().isCompilingModuleImplementation())
535 return nullptr;
536
537 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
538}
539
540//===----------------------------------------------------------------------===//
541// Preprocessor Initialization Methods
542//===----------------------------------------------------------------------===//
543
544/// EnterMainSourceFile - Enter the specified FileID as the main source file,
545/// which implicitly adds the builtin defines etc.
546void Preprocessor::EnterMainSourceFile() {
547 // We do not allow the preprocessor to reenter the main file. Doing so will
548 // cause FileID's to accumulate information from both runs (e.g. #line
549 // information) and predefined macros aren't guaranteed to be set properly.
550 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
551 FileID MainFileID = SourceMgr.getMainFileID();
552
553 // If MainFileID is loaded it means we loaded an AST file, no need to enter
554 // a main file.
555 if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
556 // Enter the main file source buffer.
557 EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation());
558
559 // If we've been asked to skip bytes in the main file (e.g., as part of a
560 // precompiled preamble), do so now.
561 if (SkipMainFilePreamble.first > 0)
562 CurLexer->SetByteOffset(Offset: SkipMainFilePreamble.first,
563 StartOfLine: SkipMainFilePreamble.second);
564
565 // Tell the header info that the main file was entered. If the file is later
566 // #imported, it won't be re-entered.
567 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
568 markIncluded(File: *FE);
569
570 // Record the first PP token in the main file. This is used to generate
571 // better diagnostics for C++ modules.
572 //
573 // // This is a comment.
574 // #define FOO int // note: add 'module;' to the start of the file
575 // ^ FirstPPToken // to introduce a global module fragment.
576 //
577 // export module M; // error: module declaration must occur
578 // // at the start of the translation unit.
579 if (getLangOpts().CPlusPlusModules) {
580 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
581 if (FirstPPTok && FirstPPTok->isFirstPPToken())
582 FirstPPTokenLoc = FirstPPTok->getLocation();
583 }
584 }
585
586 // Preprocess Predefines to populate the initial preprocessor state.
587 std::unique_ptr<llvm::MemoryBuffer> SB =
588 llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
589 assert(SB && "Cannot create predefined source buffer");
590 FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
591 assert(FID.isValid() && "Could not create FileID for predefines?");
592 setPredefinesFileID(FID);
593
594 // Start parsing the predefines.
595 EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation());
596
597 if (!PPOpts.PCHThroughHeader.empty()) {
598 // Lookup and save the FileID for the through header. If it isn't found
599 // in the search path, it's a fatal error.
600 OptionalFileEntryRef File = LookupFile(
601 FilenameLoc: SourceLocation(), Filename: PPOpts.PCHThroughHeader,
602 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
603 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
604 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
605 /*IsFrameworkFound=*/nullptr);
606 if (!File) {
607 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_found)
608 << PPOpts.PCHThroughHeader;
609 return;
610 }
611 setPCHThroughHeaderFileID(
612 SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation(), FileCharacter: SrcMgr::C_User));
613 }
614
615 // Skip tokens from the Predefines and if needed the main file.
616 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
617 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
618 SkipTokensWhileUsingPCH();
619}
620
621void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
622 assert(PCHThroughHeaderFileID.isInvalid() &&
623 "PCHThroughHeaderFileID already set!");
624 PCHThroughHeaderFileID = FID;
625}
626
627bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
628 assert(PCHThroughHeaderFileID.isValid() &&
629 "Invalid PCH through header FileID");
630 return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
631}
632
633bool Preprocessor::creatingPCHWithThroughHeader() {
634 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
635 PCHThroughHeaderFileID.isValid();
636}
637
638bool Preprocessor::usingPCHWithThroughHeader() {
639 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
640 PCHThroughHeaderFileID.isValid();
641}
642
643bool Preprocessor::creatingPCHWithPragmaHdrStop() {
644 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
645}
646
647bool Preprocessor::usingPCHWithPragmaHdrStop() {
648 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
649}
650
651/// Skip tokens until after the #include of the through header or
652/// until after a #pragma hdrstop is seen. Tokens in the predefines file
653/// and the main file may be skipped. If the end of the predefines file
654/// is reached, skipping continues into the main file. If the end of the
655/// main file is reached, it's a fatal error.
656void Preprocessor::SkipTokensWhileUsingPCH() {
657 bool ReachedMainFileEOF = false;
658 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
659 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
660 Token Tok;
661 while (true) {
662 bool InPredefines =
663 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
664 CurLexerCallback(*this, Tok);
665 if (Tok.is(K: tok::eof) && !InPredefines) {
666 ReachedMainFileEOF = true;
667 break;
668 }
669 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
670 break;
671 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
672 break;
673 }
674 if (ReachedMainFileEOF) {
675 if (UsingPCHThroughHeader)
676 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_seen)
677 << PPOpts.PCHThroughHeader << 1;
678 else if (!PPOpts.PCHWithHdrStopCreate)
679 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_pragma_hdrstop_not_seen);
680 }
681}
682
683void Preprocessor::replayPreambleConditionalStack() {
684 // Restore the conditional stack from the preamble, if there is one.
685 if (PreambleConditionalStack.isReplaying()) {
686 assert(CurPPLexer &&
687 "CurPPLexer is null when calling replayPreambleConditionalStack.");
688 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
689 PreambleConditionalStack.doneReplaying();
690 if (PreambleConditionalStack.reachedEOFWhileSkipping())
691 SkipExcludedConditionalBlock(
692 HashTokenLoc: PreambleConditionalStack.SkipInfo->HashTokenLoc,
693 IfTokenLoc: PreambleConditionalStack.SkipInfo->IfTokenLoc,
694 FoundNonSkipPortion: PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
695 FoundElse: PreambleConditionalStack.SkipInfo->FoundElse,
696 ElseLoc: PreambleConditionalStack.SkipInfo->ElseLoc);
697 }
698}
699
700void Preprocessor::EndSourceFile() {
701 // Notify the client that we reached the end of the source file.
702 if (Callbacks)
703 Callbacks->EndOfMainFile();
704}
705
706//===----------------------------------------------------------------------===//
707// Lexer Event Handling.
708//===----------------------------------------------------------------------===//
709
710/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
711/// identifier information for the token and install it into the token,
712/// updating the token kind accordingly.
713IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
714 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
715
716 // Look up this token, see if it is a macro, or if it is a language keyword.
717 IdentifierInfo *II;
718 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
719 // No cleaning needed, just use the characters from the lexed buffer.
720 II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
721 } else {
722 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
723 SmallString<64> IdentifierBuffer;
724 StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
725
726 if (Identifier.hasUCN()) {
727 SmallString<64> UCNIdentifierBuffer;
728 expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
729 II = getIdentifierInfo(Name: UCNIdentifierBuffer);
730 } else {
731 II = getIdentifierInfo(Name: CleanedStr);
732 }
733 }
734
735 // Update the token info (identifier info and appropriate token kind).
736 // FIXME: the raw_identifier may contain leading whitespace which is removed
737 // from the cleaned identifier token. The SourceLocation should be updated to
738 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
739 // line continuation before 'B') is parsed as a single tok::raw_identifier and
740 // is cleaned to tok::identifier "B". After cleaning the token's length is
741 // still 3 and the SourceLocation refers to the location of the backslash.
742 Identifier.setIdentifierInfo(II);
743 Identifier.setKind(II->getTokenID());
744
745 return II;
746}
747
748void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
749 PoisonReasons[II] = DiagID;
750}
751
752void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
753 assert(Ident__exception_code && Ident__exception_info);
754 assert(Ident___exception_code && Ident___exception_info);
755 Ident__exception_code->setIsPoisoned(Poison);
756 Ident___exception_code->setIsPoisoned(Poison);
757 Ident_GetExceptionCode->setIsPoisoned(Poison);
758 Ident__exception_info->setIsPoisoned(Poison);
759 Ident___exception_info->setIsPoisoned(Poison);
760 Ident_GetExceptionInfo->setIsPoisoned(Poison);
761 Ident__abnormal_termination->setIsPoisoned(Poison);
762 Ident___abnormal_termination->setIsPoisoned(Poison);
763 Ident_AbnormalTermination->setIsPoisoned(Poison);
764}
765
766void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
767 assert(Identifier.getIdentifierInfo() &&
768 "Can't handle identifiers without identifier info!");
769 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
770 PoisonReasons.find(Val: Identifier.getIdentifierInfo());
771 if(it == PoisonReasons.end())
772 Diag(Tok: Identifier, DiagID: diag::err_pp_used_poisoned_id);
773 else
774 Diag(Tok: Identifier,DiagID: it->second) << Identifier.getIdentifierInfo();
775}
776
777void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
778 assert(II.isOutOfDate() && "not out of date");
779 assert(getExternalSource() &&
780 "getExternalSource() should not return nullptr");
781 getExternalSource()->updateOutOfDateIdentifier(II);
782}
783
784/// HandleIdentifier - This callback is invoked when the lexer reads an
785/// identifier. This callback looks up the identifier in the map and/or
786/// potentially macro expands it or turns it into a named token (like 'for').
787///
788/// Note that callers of this method are guarded by checking the
789/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
790/// IdentifierInfo methods that compute these properties will need to change to
791/// match.
792bool Preprocessor::HandleIdentifier(Token &Identifier) {
793 assert(Identifier.getIdentifierInfo() &&
794 "Can't handle identifiers without identifier info!");
795
796 IdentifierInfo &II = *Identifier.getIdentifierInfo();
797
798 // If the information about this identifier is out of date, update it from
799 // the external source.
800 // We have to treat __VA_ARGS__ in a special way, since it gets
801 // serialized with isPoisoned = true, but our preprocessor may have
802 // unpoisoned it if we're defining a C99 macro.
803 if (II.isOutOfDate()) {
804 bool CurrentIsPoisoned = false;
805 const bool IsSpecialVariadicMacro =
806 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
807 if (IsSpecialVariadicMacro)
808 CurrentIsPoisoned = II.isPoisoned();
809
810 updateOutOfDateIdentifier(II);
811 Identifier.setKind(II.getTokenID());
812
813 if (IsSpecialVariadicMacro)
814 II.setIsPoisoned(CurrentIsPoisoned);
815 }
816
817 // If this identifier was poisoned, and if it was not produced from a macro
818 // expansion, emit an error.
819 if (II.isPoisoned() && CurPPLexer) {
820 HandlePoisonedIdentifier(Identifier);
821 }
822
823 // If this is a macro to be expanded, do it.
824 if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
825 const auto *MI = MD.getMacroInfo();
826 assert(MI && "macro definition with no macro info?");
827 if (!DisableMacroExpansion) {
828 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
829 // C99 6.10.3p10: If the preprocessing token immediately after the
830 // macro name isn't a '(', this macro should not be expanded.
831 if (!MI->isFunctionLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
832 return HandleMacroExpandedIdentifier(Identifier, MD);
833 } else {
834 // C99 6.10.3.4p2 says that a disabled macro may never again be
835 // expanded, even if it's in a context where it could be expanded in the
836 // future.
837 Identifier.setFlag(Token::DisableExpand);
838 if (MI->isObjectLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
839 Diag(Tok: Identifier, DiagID: diag::pp_disabled_macro_expansion);
840 }
841 }
842 }
843
844 // If this identifier is a keyword in a newer Standard or proposed Standard,
845 // produce a warning. Don't warn if we're not considering macro expansion,
846 // since this identifier might be the name of a macro.
847 // FIXME: This warning is disabled in cases where it shouldn't be, like
848 // "#define constexpr constexpr", "int constexpr;"
849 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
850 Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
851 << II.getName();
852 // Don't diagnose this keyword again in this translation unit.
853 II.setIsFutureCompatKeyword(false);
854 }
855
856 // If this identifier would be a keyword in C++, diagnose as a compatibility
857 // issue.
858 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
859 Diag(Tok: Identifier, DiagID: diag::warn_pp_identifier_is_cpp_keyword) << &II;
860
861 // If this is an extension token, diagnose its use.
862 // We avoid diagnosing tokens that originate from macro definitions.
863 // FIXME: This warning is disabled in cases where it shouldn't be,
864 // like "#define TY typeof", "TY(1) x".
865 if (II.isExtensionToken() && !DisableMacroExpansion)
866 Diag(Tok: Identifier, DiagID: diag::ext_token_used);
867
868 // If this is the 'import' contextual keyword following an '@', note
869 // that the next token indicates a module name.
870 //
871 // Note that we do not treat 'import' as a contextual
872 // keyword when we're in a caching lexer, because caching lexers only get
873 // used in contexts where import declarations are disallowed.
874 //
875 // Likewise if this is the standard C++ import keyword.
876 if (((LastTokenWasAt && II.isModulesImport()) ||
877 Identifier.is(K: tok::kw_import)) &&
878 !InMacroArgs && !DisableMacroExpansion &&
879 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
880 CurLexerCallback != CLK_CachingLexer) {
881 ModuleImportLoc = Identifier.getLocation();
882 NamedModuleImportPath.clear();
883 IsAtImport = true;
884 ModuleImportExpectsIdentifier = true;
885 CurLexerCallback = CLK_LexAfterModuleImport;
886 }
887 return true;
888}
889
890void Preprocessor::Lex(Token &Result) {
891 ++LexLevel;
892
893 // We loop here until a lex function returns a token; this avoids recursion.
894 while (!CurLexerCallback(*this, Result))
895 ;
896
897 if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
898 return;
899
900 if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
901 // Remember the identifier before code completion token.
902 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
903 setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
904 // Set IdenfitierInfo to null to avoid confusing code that handles both
905 // identifiers and completion tokens.
906 Result.setIdentifierInfo(nullptr);
907 }
908
909 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
910 // if this token is being produced as a result of phase 4 of translation.
911 // Update TrackGMFState to decide if we are currently in a Global Module
912 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
913 // depends on the prevailing StdCXXImportSeq state in two cases.
914 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
915 !Result.getFlag(Flag: Token::IsReinjected)) {
916 switch (Result.getKind()) {
917 case tok::l_paren: case tok::l_square: case tok::l_brace:
918 StdCXXImportSeqState.handleOpenBracket();
919 break;
920 case tok::r_paren: case tok::r_square:
921 StdCXXImportSeqState.handleCloseBracket();
922 break;
923 case tok::r_brace:
924 StdCXXImportSeqState.handleCloseBrace();
925 break;
926#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
927// For `#pragma ...` mimic ';'.
928#include "clang/Basic/TokenKinds.def"
929#undef PRAGMA_ANNOTATION
930 // This token is injected to represent the translation of '#include "a.h"'
931 // into "import a.h;". Mimic the notional ';'.
932 case tok::annot_module_include:
933 case tok::semi:
934 TrackGMFState.handleSemi();
935 StdCXXImportSeqState.handleSemi();
936 ModuleDeclState.handleSemi();
937 break;
938 case tok::header_name:
939 case tok::annot_header_unit:
940 StdCXXImportSeqState.handleHeaderName();
941 break;
942 case tok::kw_export:
943 TrackGMFState.handleExport();
944 StdCXXImportSeqState.handleExport();
945 ModuleDeclState.handleExport();
946 break;
947 case tok::colon:
948 ModuleDeclState.handleColon();
949 break;
950 case tok::period:
951 ModuleDeclState.handlePeriod();
952 break;
953 case tok::identifier:
954 // Check "import" and "module" when there is no open bracket. The two
955 // identifiers are not meaningful with open brackets.
956 if (StdCXXImportSeqState.atTopLevel()) {
957 if (Result.getIdentifierInfo()->isModulesImport()) {
958 TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
959 StdCXXImportSeqState.handleImport();
960 if (StdCXXImportSeqState.afterImportSeq()) {
961 ModuleImportLoc = Result.getLocation();
962 NamedModuleImportPath.clear();
963 IsAtImport = false;
964 ModuleImportExpectsIdentifier = true;
965 CurLexerCallback = CLK_LexAfterModuleImport;
966 }
967 break;
968 } else if (Result.getIdentifierInfo() == getIdentifierInfo(Name: "module")) {
969 TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
970 ModuleDeclState.handleModule();
971 break;
972 }
973 }
974 ModuleDeclState.handleIdentifier(Identifier: Result.getIdentifierInfo());
975 if (ModuleDeclState.isModuleCandidate())
976 break;
977 [[fallthrough]];
978 default:
979 TrackGMFState.handleMisc();
980 StdCXXImportSeqState.handleMisc();
981 ModuleDeclState.handleMisc();
982 break;
983 }
984 }
985
986 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
987 CheckPoints[CurLexer->getFileID()].push_back(Elt: CurLexer->BufferPtr);
988 CheckPointCounter = 0;
989 }
990
991 LastTokenWasAt = Result.is(K: tok::at);
992 --LexLevel;
993
994 if ((LexLevel == 0 || PreprocessToken) &&
995 !Result.getFlag(Flag: Token::IsReinjected)) {
996 if (LexLevel == 0)
997 ++TokenCount;
998 if (OnToken)
999 OnToken(Result);
1000 }
1001}
1002
1003void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1004 while (1) {
1005 Token Tok;
1006 Lex(Result&: Tok);
1007 if (Tok.isOneOf(Ks: tok::unknown, Ks: tok::eof, Ks: tok::eod,
1008 Ks: tok::annot_repl_input_end))
1009 break;
1010 if (Tokens != nullptr)
1011 Tokens->push_back(x: Tok);
1012 }
1013}
1014
1015/// Lex a header-name token (including one formed from header-name-tokens if
1016/// \p AllowMacroExpansion is \c true).
1017///
1018/// \param FilenameTok Filled in with the next token. On success, this will
1019/// be either a header_name token. On failure, it will be whatever other
1020/// token was found instead.
1021/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1022/// by macro expansion (concatenating tokens as necessary if the first
1023/// token is a '<').
1024/// \return \c true if we reached EOD or EOF while looking for a > token in
1025/// a concatenated header name and diagnosed it. \c false otherwise.
1026bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1027 // Lex using header-name tokenization rules if tokens are being lexed from
1028 // a file. Just grab a token normally if we're in a macro expansion.
1029 if (CurPPLexer)
1030 CurPPLexer->LexIncludeFilename(FilenameTok);
1031 else
1032 Lex(Result&: FilenameTok);
1033
1034 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1035 // case, glue the tokens together into an angle_string_literal token.
1036 SmallString<128> FilenameBuffer;
1037 if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1038 bool StartOfLine = FilenameTok.isAtStartOfLine();
1039 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1040 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1041
1042 SourceLocation Start = FilenameTok.getLocation();
1043 SourceLocation End;
1044 FilenameBuffer.push_back(Elt: '<');
1045
1046 // Consume tokens until we find a '>'.
1047 // FIXME: A header-name could be formed starting or ending with an
1048 // alternative token. It's not clear whether that's ill-formed in all
1049 // cases.
1050 while (FilenameTok.isNot(K: tok::greater)) {
1051 Lex(Result&: FilenameTok);
1052 if (FilenameTok.isOneOf(Ks: tok::eod, Ks: tok::eof)) {
1053 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_expected) << tok::greater;
1054 Diag(Loc: Start, DiagID: diag::note_matching) << tok::less;
1055 return true;
1056 }
1057
1058 End = FilenameTok.getLocation();
1059
1060 // FIXME: Provide code completion for #includes.
1061 if (FilenameTok.is(K: tok::code_completion)) {
1062 setCodeCompletionReached();
1063 Lex(Result&: FilenameTok);
1064 continue;
1065 }
1066
1067 // Append the spelling of this token to the buffer. If there was a space
1068 // before it, add it now.
1069 if (FilenameTok.hasLeadingSpace())
1070 FilenameBuffer.push_back(Elt: ' ');
1071
1072 // Get the spelling of the token, directly into FilenameBuffer if
1073 // possible.
1074 size_t PreAppendSize = FilenameBuffer.size();
1075 FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1076
1077 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1078 unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1079
1080 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1081 if (BufPtr != &FilenameBuffer[PreAppendSize])
1082 memcpy(dest: &FilenameBuffer[PreAppendSize], src: BufPtr, n: ActualLen);
1083
1084 // Resize FilenameBuffer to the correct size.
1085 if (FilenameTok.getLength() != ActualLen)
1086 FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1087 }
1088
1089 FilenameTok.startToken();
1090 FilenameTok.setKind(tok::header_name);
1091 FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1092 FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1093 FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1094 CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1095 } else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1096 // Convert a string-literal token of the form " h-char-sequence "
1097 // (produced by macro expansion) into a header-name token.
1098 //
1099 // The rules for header-names don't quite match the rules for
1100 // string-literals, but all the places where they differ result in
1101 // undefined behavior, so we can and do treat them the same.
1102 //
1103 // A string-literal with a prefix or suffix is not translated into a
1104 // header-name. This could theoretically be observable via the C++20
1105 // context-sensitive header-name formation rules.
1106 StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1107 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1108 FilenameTok.setKind(tok::header_name);
1109 }
1110
1111 return false;
1112}
1113
1114/// Collect the tokens of a C++20 pp-import-suffix.
1115void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1116 // FIXME: For error recovery, consider recognizing attribute syntax here
1117 // and terminating / diagnosing a missing semicolon if we find anything
1118 // else? (Can we leave that to the parser?)
1119 unsigned BracketDepth = 0;
1120 while (true) {
1121 Toks.emplace_back();
1122 Lex(Result&: Toks.back());
1123
1124 switch (Toks.back().getKind()) {
1125 case tok::l_paren: case tok::l_square: case tok::l_brace:
1126 ++BracketDepth;
1127 break;
1128
1129 case tok::r_paren: case tok::r_square: case tok::r_brace:
1130 if (BracketDepth == 0)
1131 return;
1132 --BracketDepth;
1133 break;
1134
1135 case tok::semi:
1136 if (BracketDepth == 0)
1137 return;
1138 break;
1139
1140 case tok::eof:
1141 return;
1142
1143 default:
1144 break;
1145 }
1146 }
1147}
1148
1149
1150/// Lex a token following the 'import' contextual keyword.
1151///
1152/// pp-import: [C++20]
1153/// import header-name pp-import-suffix[opt] ;
1154/// import header-name-tokens pp-import-suffix[opt] ;
1155/// [ObjC] @ import module-name ;
1156/// [Clang] import module-name ;
1157///
1158/// header-name-tokens:
1159/// string-literal
1160/// < [any sequence of preprocessing-tokens other than >] >
1161///
1162/// module-name:
1163/// module-name-qualifier[opt] identifier
1164///
1165/// module-name-qualifier
1166/// module-name-qualifier[opt] identifier .
1167///
1168/// We respond to a pp-import by importing macros from the named module.
1169bool Preprocessor::LexAfterModuleImport(Token &Result) {
1170 // Figure out what kind of lexer we actually have.
1171 recomputeCurLexerKind();
1172
1173 // Lex the next token. The header-name lexing rules are used at the start of
1174 // a pp-import.
1175 //
1176 // For now, we only support header-name imports in C++20 mode.
1177 // FIXME: Should we allow this in all language modes that support an import
1178 // declaration as an extension?
1179 if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1180 if (LexHeaderName(FilenameTok&: Result))
1181 return true;
1182
1183 if (Result.is(K: tok::colon) && ModuleDeclState.isNamedModule()) {
1184 std::string Name = ModuleDeclState.getPrimaryName().str();
1185 Name += ":";
1186 NamedModuleImportPath.emplace_back(Args: Result.getLocation(),
1187 Args: getIdentifierInfo(Name));
1188 CurLexerCallback = CLK_LexAfterModuleImport;
1189 return true;
1190 }
1191 } else {
1192 Lex(Result);
1193 }
1194
1195 // Allocate a holding buffer for a sequence of tokens and introduce it into
1196 // the token stream.
1197 auto EnterTokens = [this](ArrayRef<Token> Toks) {
1198 auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1199 std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1200 EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1201 /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1202 };
1203
1204 bool ImportingHeader = Result.is(K: tok::header_name);
1205 // Check for a header-name.
1206 SmallVector<Token, 32> Suffix;
1207 if (ImportingHeader) {
1208 // Enter the header-name token into the token stream; a Lex action cannot
1209 // both return a token and cache tokens (doing so would corrupt the token
1210 // cache if the call to Lex comes from CachingLex / PeekAhead).
1211 Suffix.push_back(Elt: Result);
1212
1213 // Consume the pp-import-suffix and expand any macros in it now. We'll add
1214 // it back into the token stream later.
1215 CollectPpImportSuffix(Toks&: Suffix);
1216 if (Suffix.back().isNot(K: tok::semi)) {
1217 // This is not a pp-import after all.
1218 EnterTokens(Suffix);
1219 return false;
1220 }
1221
1222 // C++2a [cpp.module]p1:
1223 // The ';' preprocessing-token terminating a pp-import shall not have
1224 // been produced by macro replacement.
1225 SourceLocation SemiLoc = Suffix.back().getLocation();
1226 if (SemiLoc.isMacroID())
1227 Diag(Loc: SemiLoc, DiagID: diag::err_header_import_semi_in_macro);
1228
1229 // Reconstitute the import token.
1230 Token ImportTok;
1231 ImportTok.startToken();
1232 ImportTok.setKind(tok::kw_import);
1233 ImportTok.setLocation(ModuleImportLoc);
1234 ImportTok.setIdentifierInfo(getIdentifierInfo(Name: "import"));
1235 ImportTok.setLength(6);
1236
1237 auto Action = HandleHeaderIncludeOrImport(
1238 /*HashLoc*/ SourceLocation(), IncludeTok&: ImportTok, FilenameTok&: Suffix.front(), EndLoc: SemiLoc);
1239 switch (Action.Kind) {
1240 case ImportAction::None:
1241 break;
1242
1243 case ImportAction::ModuleBegin:
1244 // Let the parser know we're textually entering the module.
1245 Suffix.emplace_back();
1246 Suffix.back().startToken();
1247 Suffix.back().setKind(tok::annot_module_begin);
1248 Suffix.back().setLocation(SemiLoc);
1249 Suffix.back().setAnnotationEndLoc(SemiLoc);
1250 Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1251 [[fallthrough]];
1252
1253 case ImportAction::ModuleImport:
1254 case ImportAction::HeaderUnitImport:
1255 case ImportAction::SkippedModuleImport:
1256 // We chose to import (or textually enter) the file. Convert the
1257 // header-name token into a header unit annotation token.
1258 Suffix[0].setKind(tok::annot_header_unit);
1259 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1260 Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1261 // FIXME: Call the moduleImport callback?
1262 break;
1263 case ImportAction::Failure:
1264 assert(TheModuleLoader.HadFatalFailure &&
1265 "This should be an early exit only to a fatal error");
1266 Result.setKind(tok::eof);
1267 CurLexer->cutOffLexing();
1268 EnterTokens(Suffix);
1269 return true;
1270 }
1271
1272 EnterTokens(Suffix);
1273 return false;
1274 }
1275
1276 // The token sequence
1277 //
1278 // import identifier (. identifier)*
1279 //
1280 // indicates a module import directive. We already saw the 'import'
1281 // contextual keyword, so now we're looking for the identifiers.
1282 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1283 // We expected to see an identifier here, and we did; continue handling
1284 // identifiers.
1285 NamedModuleImportPath.emplace_back(Args: Result.getLocation(),
1286 Args: Result.getIdentifierInfo());
1287 ModuleImportExpectsIdentifier = false;
1288 CurLexerCallback = CLK_LexAfterModuleImport;
1289 return true;
1290 }
1291
1292 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1293 // see the next identifier. (We can also see a '[[' that begins an
1294 // attribute-specifier-seq here under the Standard C++ Modules.)
1295 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1296 ModuleImportExpectsIdentifier = true;
1297 CurLexerCallback = CLK_LexAfterModuleImport;
1298 return true;
1299 }
1300
1301 // If we didn't recognize a module name at all, this is not a (valid) import.
1302 if (NamedModuleImportPath.empty() || Result.is(K: tok::eof))
1303 return true;
1304
1305 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1306 // at the semicolon already.
1307 SourceLocation SemiLoc = Result.getLocation();
1308 if (Result.isNot(K: tok::semi)) {
1309 Suffix.push_back(Elt: Result);
1310 CollectPpImportSuffix(Toks&: Suffix);
1311 if (Suffix.back().isNot(K: tok::semi)) {
1312 // This is not an import after all.
1313 EnterTokens(Suffix);
1314 return false;
1315 }
1316 SemiLoc = Suffix.back().getLocation();
1317 }
1318
1319 // Under the standard C++ Modules, the dot is just part of the module name,
1320 // and not a real hierarchy separator. Flatten such module names now.
1321 //
1322 // FIXME: Is this the right level to be performing this transformation?
1323 std::string FlatModuleName;
1324 if (getLangOpts().CPlusPlusModules) {
1325 for (auto &Piece : NamedModuleImportPath) {
1326 // If the FlatModuleName ends with colon, it implies it is a partition.
1327 if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
1328 FlatModuleName += ".";
1329 FlatModuleName += Piece.getIdentifierInfo()->getName();
1330 }
1331 SourceLocation FirstPathLoc = NamedModuleImportPath[0].getLoc();
1332 NamedModuleImportPath.clear();
1333 NamedModuleImportPath.emplace_back(Args&: FirstPathLoc,
1334 Args: getIdentifierInfo(Name: FlatModuleName));
1335 }
1336
1337 Module *Imported = nullptr;
1338 // We don't/shouldn't load the standard c++20 modules when preprocessing.
1339 if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
1340 Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc,
1341 Path: NamedModuleImportPath,
1342 Visibility: Module::Hidden,
1343 /*IsInclusionDirective=*/false);
1344 if (Imported)
1345 makeModuleVisible(M: Imported, Loc: SemiLoc);
1346 }
1347
1348 if (Callbacks)
1349 Callbacks->moduleImport(ImportLoc: ModuleImportLoc, Path: NamedModuleImportPath, Imported);
1350
1351 if (!Suffix.empty()) {
1352 EnterTokens(Suffix);
1353 return false;
1354 }
1355 return true;
1356}
1357
1358void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc,
1359 bool IncludeExports) {
1360 CurSubmoduleState->VisibleModules.setVisible(
1361 M, Loc, IncludeExports, Vis: [](Module *) {},
1362 Cb: [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1363 // FIXME: Include the path in the diagnostic.
1364 // FIXME: Include the import location for the conflicting module.
1365 Diag(Loc: ModuleImportLoc, DiagID: diag::warn_module_conflict)
1366 << Path[0]->getFullModuleName()
1367 << Conflict->getFullModuleName()
1368 << Message;
1369 });
1370
1371 // Add this module to the imports list of the currently-built submodule.
1372 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1373 BuildingSubmoduleStack.back().M->Imports.insert(X: M);
1374}
1375
1376bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1377 const char *DiagnosticTag,
1378 bool AllowMacroExpansion) {
1379 // We need at least one string literal.
1380 if (Result.isNot(K: tok::string_literal)) {
1381 Diag(Tok: Result, DiagID: diag::err_expected_string_literal)
1382 << /*Source='in...'*/0 << DiagnosticTag;
1383 return false;
1384 }
1385
1386 // Lex string literal tokens, optionally with macro expansion.
1387 SmallVector<Token, 4> StrToks;
1388 do {
1389 StrToks.push_back(Elt: Result);
1390
1391 if (Result.hasUDSuffix())
1392 Diag(Tok: Result, DiagID: diag::err_invalid_string_udl);
1393
1394 if (AllowMacroExpansion)
1395 Lex(Result);
1396 else
1397 LexUnexpandedToken(Result);
1398 } while (Result.is(K: tok::string_literal));
1399
1400 // Concatenate and parse the strings.
1401 StringLiteralParser Literal(StrToks, *this);
1402 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1403
1404 if (Literal.hadError)
1405 return false;
1406
1407 if (Literal.Pascal) {
1408 Diag(Loc: StrToks[0].getLocation(), DiagID: diag::err_expected_string_literal)
1409 << /*Source='in...'*/0 << DiagnosticTag;
1410 return false;
1411 }
1412
1413 String = std::string(Literal.GetString());
1414 return true;
1415}
1416
1417bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1418 assert(Tok.is(tok::numeric_constant));
1419 SmallString<8> IntegerBuffer;
1420 bool NumberInvalid = false;
1421 StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1422 if (NumberInvalid)
1423 return false;
1424 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1425 getLangOpts(), getTargetInfo(),
1426 getDiagnostics());
1427 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1428 return false;
1429 llvm::APInt APVal(64, 0);
1430 if (Literal.GetIntegerValue(Val&: APVal))
1431 return false;
1432 Lex(Result&: Tok);
1433 Value = APVal.getLimitedValue();
1434 return true;
1435}
1436
1437void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1438 assert(Handler && "NULL comment handler");
1439 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1440 "Comment handler already registered");
1441 CommentHandlers.push_back(x: Handler);
1442}
1443
1444void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1445 std::vector<CommentHandler *>::iterator Pos =
1446 llvm::find(Range&: CommentHandlers, Val: Handler);
1447 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1448 CommentHandlers.erase(position: Pos);
1449}
1450
1451bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1452 bool AnyPendingTokens = false;
1453 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1454 HEnd = CommentHandlers.end();
1455 H != HEnd; ++H) {
1456 if ((*H)->HandleComment(PP&: *this, Comment))
1457 AnyPendingTokens = true;
1458 }
1459 if (!AnyPendingTokens || getCommentRetentionState())
1460 return false;
1461 Lex(Result&: result);
1462 return true;
1463}
1464
1465void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1466 const MacroAnnotations &A =
1467 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1468 assert(A.DeprecationInfo &&
1469 "Macro deprecation warning without recorded annotation!");
1470 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1471 if (Info.Message.empty())
1472 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1473 << Identifier.getIdentifierInfo() << 0;
1474 else
1475 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1476 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1477 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 0;
1478}
1479
1480void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1481 const MacroAnnotations &A =
1482 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1483 assert(A.RestrictExpansionInfo &&
1484 "Macro restricted expansion warning without recorded annotation!");
1485 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1486 if (Info.Message.empty())
1487 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1488 << Identifier.getIdentifierInfo() << 0;
1489 else
1490 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1491 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1492 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 1;
1493}
1494
1495void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1496 unsigned DiagSelection) const {
1497 Diag(Tok: Identifier, DiagID: diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1498}
1499
1500void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1501 bool IsUndef) const {
1502 const MacroAnnotations &A =
1503 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1504 assert(A.FinalAnnotationLoc &&
1505 "Final macro warning without recorded annotation!");
1506
1507 Diag(Tok: Identifier, DiagID: diag::warn_pragma_final_macro)
1508 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1509 Diag(Loc: *A.FinalAnnotationLoc, DiagID: diag::note_pp_macro_annotation) << 2;
1510}
1511
1512bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1513 const SourceLocation &Loc) const {
1514 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1515 // region map:
1516 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1517 const SourceLocation &Loc) -> bool {
1518 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1519 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1520 Range: Map, P: [&SourceMgr,
1521 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1522 return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1523 });
1524
1525 if (FirstRegionEndingAfterLoc != Map.end()) {
1526 // To test if the start location of the found region precedes `Loc`:
1527 return SourceMgr.isBeforeInTranslationUnit(
1528 LHS: FirstRegionEndingAfterLoc->first, RHS: Loc);
1529 }
1530 // If we do not find a region whose end location passes `Loc`, we want to
1531 // check if the current region is still open:
1532 if (!Map.empty() && Map.back().first == Map.back().second)
1533 return SourceMgr.isBeforeInTranslationUnit(LHS: Map.back().first, RHS: Loc);
1534 return false;
1535 };
1536
1537 // What the following does:
1538 //
1539 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1540 // Otherwise, `Loc` is from a loaded AST. We look up the
1541 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1542 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1543 // region w.r.t. the region map. If the region map is absent, it means there
1544 // is no opt-out pragma in that loaded AST.
1545 //
1546 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1547 // one of them. That means if you put the pragmas around a `#include
1548 // "module.h"`, where module.h is a module, it is not actually suppressing
1549 // warnings in module.h. This is fine because warnings in module.h will be
1550 // reported when module.h is compiled in isolation and nothing in module.h
1551 // will be analyzed ever again. So you will not see warnings from the file
1552 // that imports module.h anyway. And you can't even do the same thing for PCHs
1553 // because they can only be included from the command line.
1554
1555 if (SourceMgr.isLocalSourceLocation(Loc))
1556 return TestInMap(SafeBufferOptOutMap, Loc);
1557
1558 const SafeBufferOptOutRegionsTy *LoadedRegions =
1559 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SrcMgr: SourceMgr);
1560
1561 if (LoadedRegions)
1562 return TestInMap(*LoadedRegions, Loc);
1563 return false;
1564}
1565
1566bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1567 bool isEnter, const SourceLocation &Loc) {
1568 if (isEnter) {
1569 if (isPPInSafeBufferOptOutRegion())
1570 return true; // invalid enter action
1571 InSafeBufferOptOutRegion = true;
1572 CurrentSafeBufferOptOutStart = Loc;
1573
1574 // To set the start location of a new region:
1575
1576 if (!SafeBufferOptOutMap.empty()) {
1577 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1578 assert(PrevRegion->first != PrevRegion->second &&
1579 "Shall not begin a safe buffer opt-out region before closing the "
1580 "previous one.");
1581 }
1582 // If the start location equals to the end location, we call the region a
1583 // open region or a unclosed region (i.e., end location has not been set
1584 // yet).
1585 SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1586 } else {
1587 if (!isPPInSafeBufferOptOutRegion())
1588 return true; // invalid enter action
1589 InSafeBufferOptOutRegion = false;
1590
1591 // To set the end location of the current open region:
1592
1593 assert(!SafeBufferOptOutMap.empty() &&
1594 "Misordered safe buffer opt-out regions");
1595 auto *CurrRegion = &SafeBufferOptOutMap.back();
1596 assert(CurrRegion->first == CurrRegion->second &&
1597 "Set end location to a closed safe buffer opt-out region");
1598 CurrRegion->second = Loc;
1599 }
1600 return false;
1601}
1602
1603bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1604 return InSafeBufferOptOutRegion;
1605}
1606bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1607 StartLoc = CurrentSafeBufferOptOutStart;
1608 return InSafeBufferOptOutRegion;
1609}
1610
1611SmallVector<SourceLocation, 64>
1612Preprocessor::serializeSafeBufferOptOutMap() const {
1613 assert(!InSafeBufferOptOutRegion &&
1614 "Attempt to serialize safe buffer opt-out regions before file being "
1615 "completely preprocessed");
1616
1617 SmallVector<SourceLocation, 64> SrcSeq;
1618
1619 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1620 SrcSeq.push_back(Elt: begin);
1621 SrcSeq.push_back(Elt: end);
1622 }
1623 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1624 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1625 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1626 // It means that for each loading pch/module m, it just needs to load m's own
1627 // `SafeBufferOptOutMap`.
1628 return SrcSeq;
1629}
1630
1631bool Preprocessor::setDeserializedSafeBufferOptOutMap(
1632 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1633 if (SourceLocations.size() == 0)
1634 return false;
1635
1636 assert(SourceLocations.size() % 2 == 0 &&
1637 "ill-formed SourceLocation sequence");
1638
1639 auto It = SourceLocations.begin();
1640 SafeBufferOptOutRegionsTy &Regions =
1641 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(Loc: *It, SrcMgr&: SourceMgr);
1642
1643 do {
1644 SourceLocation Begin = *It++;
1645 SourceLocation End = *It++;
1646
1647 Regions.emplace_back(Args&: Begin, Args&: End);
1648 } while (It != SourceLocations.end());
1649 return true;
1650}
1651
1652ModuleLoader::~ModuleLoader() = default;
1653
1654CommentHandler::~CommentHandler() = default;
1655
1656EmptylineHandler::~EmptylineHandler() = default;
1657
1658CodeCompletionHandler::~CodeCompletionHandler() = default;
1659
1660void Preprocessor::createPreprocessingRecord() {
1661 if (Record)
1662 return;
1663
1664 Record = new PreprocessingRecord(getSourceManager());
1665 addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1666}
1667
1668const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1669 if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1670 const SmallVector<const char *> &FileCheckPoints = It->second;
1671 const char *Last = nullptr;
1672 // FIXME: Do better than a linear search.
1673 for (const char *P : FileCheckPoints) {
1674 if (P > Start)
1675 break;
1676 Last = P;
1677 }
1678 return Last;
1679 }
1680
1681 return nullptr;
1682}
1683