1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Basic/Builtins.h"
29#include "clang/Basic/FileManager.h"
30#include "clang/Basic/IdentifierTable.h"
31#include "clang/Basic/LLVM.h"
32#include "clang/Basic/LangOptions.h"
33#include "clang/Basic/Module.h"
34#include "clang/Basic/SourceLocation.h"
35#include "clang/Basic/SourceManager.h"
36#include "clang/Basic/TargetInfo.h"
37#include "clang/Lex/CodeCompletionHandler.h"
38#include "clang/Lex/DependencyDirectivesScanner.h"
39#include "clang/Lex/ExternalPreprocessorSource.h"
40#include "clang/Lex/HeaderSearch.h"
41#include "clang/Lex/LexDiagnostic.h"
42#include "clang/Lex/Lexer.h"
43#include "clang/Lex/LiteralSupport.h"
44#include "clang/Lex/MacroArgs.h"
45#include "clang/Lex/MacroInfo.h"
46#include "clang/Lex/ModuleLoader.h"
47#include "clang/Lex/NoTrivialPPDirectiveTracer.h"
48#include "clang/Lex/Pragma.h"
49#include "clang/Lex/PreprocessingRecord.h"
50#include "clang/Lex/PreprocessorLexer.h"
51#include "clang/Lex/PreprocessorOptions.h"
52#include "clang/Lex/ScratchBuffer.h"
53#include "clang/Lex/Token.h"
54#include "clang/Lex/TokenLexer.h"
55#include "llvm/ADT/APInt.h"
56#include "llvm/ADT/ArrayRef.h"
57#include "llvm/ADT/DenseMap.h"
58#include "llvm/ADT/STLExtras.h"
59#include "llvm/ADT/ScopeExit.h"
60#include "llvm/ADT/SmallVector.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/Support/Capacity.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Support/FormatVariadic.h"
65#include "llvm/Support/MemoryBuffer.h"
66#include "llvm/Support/MemoryBufferRef.h"
67#include "llvm/Support/SaveAndRestore.h"
68#include "llvm/Support/raw_ostream.h"
69#include <algorithm>
70#include <cassert>
71#include <memory>
72#include <optional>
73#include <string>
74#include <utility>
75#include <vector>
76
77using namespace clang;
78
79/// Minimum distance between two check points, in tokens.
80static constexpr unsigned CheckPointStepSize = 1024;
81
82LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
83
84ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
85
86Preprocessor::Preprocessor(const PreprocessorOptions &PPOpts,
87 DiagnosticsEngine &diags, const LangOptions &opts,
88 SourceManager &SM, HeaderSearch &Headers,
89 ModuleLoader &TheModuleLoader,
90 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
91 TranslationUnitKind TUKind)
92 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
93 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
94 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
95 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
96 // As the language options may have not been loaded yet (when
97 // deserializing an ASTUnit), adding keywords to the identifier table is
98 // deferred to Preprocessor::Initialize().
99 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
100 TUKind(TUKind), SkipMainFilePreamble(0, true),
101 CurSubmoduleState(&NullSubmoduleState) {
102 OwnsHeaderSearch = OwnsHeaders;
103
104 // Default to discarding comments.
105 KeepComments = false;
106 KeepMacroComments = false;
107 SuppressIncludeNotFoundError = false;
108
109 // Macro expansion is enabled.
110 DisableMacroExpansion = false;
111 MacroExpansionInDirectivesOverride = false;
112 InMacroArgs = false;
113 ArgMacro = nullptr;
114 InMacroArgPreExpansion = false;
115 NumCachedTokenLexers = 0;
116 PragmasEnabled = true;
117 ParsingIfOrElifDirective = false;
118 PreprocessedOutput = false;
119
120 // We haven't read anything from the external source.
121 ReadMacrosFromExternalSource = false;
122
123 LastExportKeyword.startToken();
124
125 BuiltinInfo = std::make_unique<Builtin::Context>();
126
127 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
128 // a macro. They get unpoisoned where it is allowed.
129 (Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
130 SetPoisonReason(II: Ident__VA_ARGS__,DiagID: diag::ext_pp_bad_vaargs_use);
131 (Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
132 SetPoisonReason(II: Ident__VA_OPT__,DiagID: diag::ext_pp_bad_vaopt_use);
133
134 // Initialize the pragma handlers.
135 RegisterBuiltinPragmas();
136
137 // Initialize builtin macros like __LINE__ and friends.
138 RegisterBuiltinMacros();
139
140 if(LangOpts.Borland) {
141 Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
142 Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
143 Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
144 Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
145 Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
146 Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
147 Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
148 Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
149 Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
150 } else {
151 Ident__exception_info = Ident__exception_code = nullptr;
152 Ident__abnormal_termination = Ident___exception_info = nullptr;
153 Ident___exception_code = Ident___abnormal_termination = nullptr;
154 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
155 Ident_AbnormalTermination = nullptr;
156 }
157
158 // Default incremental processing to -fincremental-extensions, clients can
159 // override with `enableIncrementalProcessing` if desired.
160 IncrementalProcessing = LangOpts.IncrementalExtensions;
161
162 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
163 if (usingPCHWithPragmaHdrStop())
164 SkippingUntilPragmaHdrStop = true;
165
166 // If using a PCH with a through header, start skipping tokens.
167 if (!this->PPOpts.PCHThroughHeader.empty() &&
168 !this->PPOpts.ImplicitPCHInclude.empty())
169 SkippingUntilPCHThroughHeader = true;
170
171 if (this->PPOpts.GeneratePreamble)
172 PreambleConditionalStack.startRecording();
173
174 MaxTokens = LangOpts.MaxTokens;
175}
176
177Preprocessor::~Preprocessor() {
178 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
179
180 IncludeMacroStack.clear();
181
182 // Free any cached macro expanders.
183 // This populates MacroArgCache, so all TokenLexers need to be destroyed
184 // before the code below that frees up the MacroArgCache list.
185 std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
186 CurTokenLexer.reset();
187
188 // Free any cached MacroArgs.
189 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
190 ArgList = ArgList->deallocate();
191
192 // Delete the header search info, if we own it.
193 if (OwnsHeaderSearch)
194 delete &HeaderInfo;
195}
196
197void Preprocessor::Initialize(const TargetInfo &Target,
198 const TargetInfo *AuxTarget) {
199 assert((!this->Target || this->Target == &Target) &&
200 "Invalid override of target information");
201 this->Target = &Target;
202
203 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
204 "Invalid override of aux target information.");
205 this->AuxTarget = AuxTarget;
206
207 // Initialize information about built-ins.
208 BuiltinInfo->InitializeTarget(Target, AuxTarget);
209 HeaderInfo.setTarget(Target);
210
211 // Populate the identifier table with info about keywords for the current language.
212 Identifiers.AddKeywords(LangOpts);
213
214 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
215 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
216
217 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
218 // Use setting from TargetInfo.
219 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: Target.getFPEvalMethod());
220 else
221 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
222 setCurrentFPEvalMethod(PragmaLoc: SourceLocation(), Val: getLangOpts().getFPEvalMethod());
223}
224
225void Preprocessor::InitializeForModelFile() {
226 NumEnteredSourceFiles = 0;
227
228 // Reset pragmas
229 PragmaHandlersBackup = std::move(PragmaHandlers);
230 PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
231 RegisterBuiltinPragmas();
232
233 // Reset PredefinesFileID
234 PredefinesFileID = FileID();
235}
236
237void Preprocessor::FinalizeForModelFile() {
238 NumEnteredSourceFiles = 1;
239
240 PragmaHandlers = std::move(PragmaHandlersBackup);
241}
242
243void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
244 std::string TokenStr;
245 llvm::raw_string_ostream OS(TokenStr);
246
247 // The alignment of 16 is chosen to comfortably fit most identifiers.
248 OS << llvm::formatv(Fmt: "{0,-16} ", Vals: tok::getTokenName(Kind: Tok.getKind()));
249
250 // Annotation tokens are just markers that don't have a spelling -- they
251 // indicate where something expanded.
252 if (!Tok.isAnnotation()) {
253 OS << "'";
254 // Escape string to prevent token spelling from spanning multiple lines.
255 OS.write_escaped(Str: getSpelling(Tok));
256 OS << "'";
257 }
258
259 // The alignment of 48 (32 characters for the spelling + the 16 for
260 // the identifier name) fits most variable names, keywords and annotations.
261 llvm::errs() << llvm::formatv(Fmt: "{0,-48} ", Vals&: OS.str());
262
263 if (!DumpFlags) return;
264
265 auto Loc = Tok.getLocation();
266 llvm::errs() << "Loc=<";
267 DumpLocation(Loc);
268 llvm::errs() << ">";
269
270 // If the token points directly to a file location (i.e. not a macro
271 // expansion), then add additional padding so that trailing markers
272 // align, provided the line/column numbers are reasonably sized.
273 //
274 // Otherwise, if it's a macro expansion, don't bother with alignment,
275 // as the line will include multiple locations and be very long.
276 //
277 // NOTE: To keep this stateless, it doesn't account for filename
278 // length, so when a header starts markers will be temporarily misaligned.
279 if (Loc.isFileID()) {
280 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
281
282 if (!PLoc.isInvalid()) {
283 int LineWidth = llvm::utostr(X: PLoc.getLine()).size();
284 int ColumnWidth = llvm::utostr(X: PLoc.getColumn()).size();
285
286 // Reserve space for lines up to 9999 and columns up to 99,
287 // which is 4 + 2 = 6 characters in total.
288 const int ReservedSpace = 6;
289
290 int LeftSpace = ReservedSpace - LineWidth - ColumnWidth;
291 int Padding = std::max<int>(a: 0, b: LeftSpace);
292
293 llvm::errs().indent(NumSpaces: Padding);
294 }
295 }
296
297 if (Tok.isAtStartOfLine())
298 llvm::errs() << " [StartOfLine]";
299 if (Tok.hasLeadingSpace())
300 llvm::errs() << " [LeadingSpace]";
301 if (Tok.isExpandDisabled())
302 llvm::errs() << " [ExpandDisabled]";
303 if (Tok.needsCleaning()) {
304 const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
305 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']";
306 }
307}
308
309void Preprocessor::DumpLocation(SourceLocation Loc) const {
310 Loc.print(OS&: llvm::errs(), SM: SourceMgr);
311}
312
313void Preprocessor::DumpMacro(const MacroInfo &MI) const {
314 llvm::errs() << "MACRO: ";
315 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
316 DumpToken(Tok: MI.getReplacementToken(Tok: i));
317 llvm::errs() << " ";
318 }
319 llvm::errs() << "\n";
320}
321
322void Preprocessor::PrintStats() {
323 llvm::errs() << "\n*** Preprocessor Stats:\n";
324 llvm::errs() << NumDirectives << " directives found:\n";
325 llvm::errs() << " " << NumDefined << " #define.\n";
326 llvm::errs() << " " << NumUndefined << " #undef.\n";
327 llvm::errs() << " #include/#include_next/#import:\n";
328 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
329 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
330 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
331 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
332 llvm::errs() << " " << NumEndif << " #endif.\n";
333 llvm::errs() << " " << NumPragma << " #pragma.\n";
334 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
335
336 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
337 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
338 << NumFastMacroExpanded << " on the fast path.\n";
339 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
340 << " token paste (##) operations performed, "
341 << NumFastTokenPaste << " on the fast path.\n";
342
343 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
344
345 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
346 llvm::errs() << "\n Macro Expanded Tokens: "
347 << llvm::capacity_in_bytes(X: MacroExpandedTokens);
348 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
349 // FIXME: List information for all submodules.
350 llvm::errs() << "\n Macros: "
351 << llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
352 llvm::errs() << "\n #pragma push_macro Info: "
353 << llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
354 llvm::errs() << "\n Poison Reasons: "
355 << llvm::capacity_in_bytes(X: PoisonReasons);
356 llvm::errs() << "\n Comment Handlers: "
357 << llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
358}
359
360llvm::iterator_range<Preprocessor::macro_iterator>
361Preprocessor::macros(bool IncludeExternalMacros) const {
362 if (IncludeExternalMacros && ExternalSource &&
363 !ReadMacrosFromExternalSource) {
364 ReadMacrosFromExternalSource = true;
365 ExternalSource->ReadDefinedMacros();
366 }
367 // Make sure we cover all macros in visible modules.
368 for (const ModuleMacro &Macro : ModuleMacros)
369 CurSubmoduleState->Macros.try_emplace(Key: Macro.II);
370
371 return CurSubmoduleState->Macros;
372}
373
374size_t Preprocessor::getTotalMemory() const {
375 return BP.getTotalMemory()
376 + llvm::capacity_in_bytes(X: MacroExpandedTokens)
377 + Predefines.capacity() /* Predefines buffer. */
378 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
379 // and ModuleMacros.
380 + llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
381 + llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
382 + llvm::capacity_in_bytes(X: PoisonReasons)
383 + llvm::capacity_in_bytes(x: CommentHandlers);
384}
385
386/// Compares macro tokens with a specified token value sequence.
387static bool MacroDefinitionEquals(const MacroInfo *MI,
388 ArrayRef<TokenValue> Tokens) {
389 return Tokens.size() == MI->getNumTokens() &&
390 std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
391}
392
393StringRef Preprocessor::getLastMacroWithSpelling(
394 SourceLocation Loc,
395 ArrayRef<TokenValue> Tokens) const {
396 SourceLocation BestLocation;
397 StringRef BestSpelling;
398 for (const auto &M : macros()) {
399 const MacroDirective::DefInfo Def =
400 M.second.findDirectiveAtLoc(Loc, SourceMgr);
401 if (!Def || !Def.getMacroInfo())
402 continue;
403 if (!Def.getMacroInfo()->isObjectLike())
404 continue;
405 if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
406 continue;
407 SourceLocation Location = Def.getLocation();
408 // Choose the macro defined latest.
409 if (BestLocation.isInvalid() ||
410 (Location.isValid() &&
411 SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
412 BestLocation = Location;
413 BestSpelling = M.first->getName();
414 }
415 }
416 return BestSpelling;
417}
418
419void Preprocessor::recomputeCurLexerKind() {
420 if (InCachingLexMode())
421 CurLexerCallback = CLK_CachingLexer;
422 else if (CurLexer)
423 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
424 ? CLK_DependencyDirectivesLexer
425 : CLK_Lexer;
426 else if (CurTokenLexer)
427 CurLexerCallback = CLK_TokenLexer;
428 else
429 CurLexerCallback = CLK_Lexer;
430}
431
432bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
433 unsigned CompleteLine,
434 unsigned CompleteColumn) {
435 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
436 assert(!CodeCompletionFile && "Already set");
437
438 // Load the actual file's contents.
439 std::optional<llvm::MemoryBufferRef> Buffer =
440 SourceMgr.getMemoryBufferForFileOrNone(File);
441 if (!Buffer)
442 return true;
443
444 // Find the byte position of the truncation point.
445 const char *Position = Buffer->getBufferStart();
446 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
447 for (; *Position; ++Position) {
448 if (*Position != '\r' && *Position != '\n')
449 continue;
450
451 // Eat \r\n or \n\r as a single line.
452 if ((Position[1] == '\r' || Position[1] == '\n') &&
453 Position[0] != Position[1])
454 ++Position;
455 ++Position;
456 break;
457 }
458 }
459
460 Position += CompleteColumn - 1;
461
462 // If pointing inside the preamble, adjust the position at the beginning of
463 // the file after the preamble.
464 if (SkipMainFilePreamble.first &&
465 SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
466 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
467 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
468 }
469
470 if (Position > Buffer->getBufferEnd())
471 Position = Buffer->getBufferEnd();
472
473 CodeCompletionFile = File;
474 CodeCompletionOffset = Position - Buffer->getBufferStart();
475
476 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
477 Size: Buffer->getBufferSize() + 1, BufferName: Buffer->getBufferIdentifier());
478 char *NewBuf = NewBuffer->getBufferStart();
479 char *NewPos = std::copy(first: Buffer->getBufferStart(), last: Position, result: NewBuf);
480 *NewPos = '\0';
481 std::copy(first: Position, last: Buffer->getBufferEnd(), result: NewPos+1);
482 SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
483
484 return false;
485}
486
487void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
488 bool IsAngled) {
489 setCodeCompletionReached();
490 if (CodeComplete)
491 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
492}
493
494void Preprocessor::CodeCompleteNaturalLanguage() {
495 setCodeCompletionReached();
496 if (CodeComplete)
497 CodeComplete->CodeCompleteNaturalLanguage();
498}
499
500/// getSpelling - This method is used to get the spelling of a token into a
501/// SmallVector. Note that the returned StringRef may not point to the
502/// supplied buffer if a copy can be avoided.
503StringRef Preprocessor::getSpelling(const Token &Tok,
504 SmallVectorImpl<char> &Buffer,
505 bool *Invalid) const {
506 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
507 if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
508 // Try the fast path.
509 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
510 return II->getName();
511 }
512
513 // Resize the buffer if we need to copy into it.
514 if (Tok.needsCleaning())
515 Buffer.resize(N: Tok.getLength());
516
517 const char *Ptr = Buffer.data();
518 unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
519 return StringRef(Ptr, Len);
520}
521
522/// CreateString - Plop the specified string into a scratch buffer and return a
523/// location for it. If specified, the source location provides a source
524/// location for the token.
525void Preprocessor::CreateString(StringRef Str, Token &Tok,
526 SourceLocation ExpansionLocStart,
527 SourceLocation ExpansionLocEnd) {
528 Tok.setLength(Str.size());
529
530 const char *DestPtr;
531 SourceLocation Loc = ScratchBuf->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
532
533 if (ExpansionLocStart.isValid())
534 Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
535 ExpansionLocEnd, Length: Str.size());
536 Tok.setLocation(Loc);
537
538 // If this is a raw identifier or a literal token, set the pointer data.
539 if (Tok.is(K: tok::raw_identifier))
540 Tok.setRawIdentifierData(DestPtr);
541 else if (Tok.isLiteral())
542 Tok.setLiteralData(DestPtr);
543}
544
545SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
546 auto &SM = getSourceManager();
547 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
548 FileIDAndOffset LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
549 bool Invalid = false;
550 StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
551 if (Invalid)
552 return SourceLocation();
553
554 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
555 const char *DestPtr;
556 SourceLocation Spelling =
557 ScratchBuf->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
558 return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
559}
560
561Module *Preprocessor::getCurrentModule() {
562 if (!getLangOpts().isCompilingModule())
563 return nullptr;
564
565 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
566}
567
568Module *Preprocessor::getCurrentModuleImplementation() {
569 if (!getLangOpts().isCompilingModuleImplementation())
570 return nullptr;
571
572 return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
573}
574
575//===----------------------------------------------------------------------===//
576// Preprocessor Initialization Methods
577//===----------------------------------------------------------------------===//
578
579/// EnterMainSourceFile - Enter the specified FileID as the main source file,
580/// which implicitly adds the builtin defines etc.
581void Preprocessor::EnterMainSourceFile() {
582 // We do not allow the preprocessor to reenter the main file. Doing so will
583 // cause FileID's to accumulate information from both runs (e.g. #line
584 // information) and predefined macros aren't guaranteed to be set properly.
585 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
586 FileID MainFileID = SourceMgr.getMainFileID();
587
588 // If MainFileID is loaded it means we loaded an AST file, no need to enter
589 // a main file.
590 if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
591 // Enter the main file source buffer.
592 EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation());
593
594 // If we've been asked to skip bytes in the main file (e.g., as part of a
595 // precompiled preamble), do so now.
596 if (SkipMainFilePreamble.first > 0)
597 CurLexer->SetByteOffset(Offset: SkipMainFilePreamble.first,
598 StartOfLine: SkipMainFilePreamble.second);
599
600 // Tell the header info that the main file was entered. If the file is later
601 // #imported, it won't be re-entered.
602 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
603 markIncluded(File: *FE);
604
605 // Record the first PP token in the main file. This is used to generate
606 // better diagnostics for C++ modules.
607 //
608 // // This is a comment.
609 // #define FOO int // note: add 'module;' to the start of the file
610 // ^ FirstPPToken // to introduce a global module fragment.
611 //
612 // export module M; // error: module declaration must occur
613 // // at the start of the translation unit.
614 if (getLangOpts().CPlusPlusModules) {
615 std::optional<StringRef> Input =
616 getSourceManager().getBufferDataOrNone(FID: MainFileID);
617 if (!isPreprocessedModuleFile() && Input)
618 MainFileIsPreprocessedModuleFile =
619 clang::isPreprocessedModuleFile(Source: *Input);
620 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(args&: *this);
621 DirTracer = Tracer.get();
622 addPPCallbacks(C: std::move(Tracer));
623 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
624 if (FirstPPTok)
625 FirstPPTokenLoc = FirstPPTok->getLocation();
626 }
627 }
628
629 // Preprocess Predefines to populate the initial preprocessor state.
630 std::unique_ptr<llvm::MemoryBuffer> SB =
631 llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
632 assert(SB && "Cannot create predefined source buffer");
633 FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
634 assert(FID.isValid() && "Could not create FileID for predefines?");
635 setPredefinesFileID(FID);
636
637 // Start parsing the predefines.
638 EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation());
639
640 if (!PPOpts.PCHThroughHeader.empty()) {
641 // Lookup and save the FileID for the through header. If it isn't found
642 // in the search path, it's a fatal error.
643 OptionalFileEntryRef File = LookupFile(
644 FilenameLoc: SourceLocation(), Filename: PPOpts.PCHThroughHeader,
645 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
646 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
647 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
648 /*IsFrameworkFound=*/nullptr);
649 if (!File) {
650 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_found)
651 << PPOpts.PCHThroughHeader;
652 return;
653 }
654 setPCHThroughHeaderFileID(
655 SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation(), FileCharacter: SrcMgr::C_User));
656 }
657
658 // Skip tokens from the Predefines and if needed the main file.
659 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
660 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
661 SkipTokensWhileUsingPCH();
662}
663
664void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
665 assert(PCHThroughHeaderFileID.isInvalid() &&
666 "PCHThroughHeaderFileID already set!");
667 PCHThroughHeaderFileID = FID;
668}
669
670bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
671 assert(PCHThroughHeaderFileID.isValid() &&
672 "Invalid PCH through header FileID");
673 return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
674}
675
676bool Preprocessor::creatingPCHWithThroughHeader() {
677 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
678 PCHThroughHeaderFileID.isValid();
679}
680
681bool Preprocessor::usingPCHWithThroughHeader() {
682 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
683 PCHThroughHeaderFileID.isValid();
684}
685
686bool Preprocessor::creatingPCHWithPragmaHdrStop() {
687 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
688}
689
690bool Preprocessor::usingPCHWithPragmaHdrStop() {
691 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
692}
693
694/// Skip tokens until after the #include of the through header or
695/// until after a #pragma hdrstop is seen. Tokens in the predefines file
696/// and the main file may be skipped. If the end of the predefines file
697/// is reached, skipping continues into the main file. If the end of the
698/// main file is reached, it's a fatal error.
699void Preprocessor::SkipTokensWhileUsingPCH() {
700 bool ReachedMainFileEOF = false;
701 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
702 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
703 Token Tok;
704 while (true) {
705 bool InPredefines =
706 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
707 CurLexerCallback(*this, Tok);
708 if (Tok.is(K: tok::eof) && !InPredefines) {
709 ReachedMainFileEOF = true;
710 break;
711 }
712 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
713 break;
714 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
715 break;
716 }
717 if (ReachedMainFileEOF) {
718 if (UsingPCHThroughHeader)
719 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_through_header_not_seen)
720 << PPOpts.PCHThroughHeader << 1;
721 else if (!PPOpts.PCHWithHdrStopCreate)
722 Diag(Loc: SourceLocation(), DiagID: diag::err_pp_pragma_hdrstop_not_seen);
723 }
724}
725
726void Preprocessor::replayPreambleConditionalStack() {
727 // Restore the conditional stack from the preamble, if there is one.
728 if (PreambleConditionalStack.isReplaying()) {
729 assert(CurPPLexer &&
730 "CurPPLexer is null when calling replayPreambleConditionalStack.");
731 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
732 PreambleConditionalStack.doneReplaying();
733 if (PreambleConditionalStack.reachedEOFWhileSkipping())
734 SkipExcludedConditionalBlock(
735 HashTokenLoc: PreambleConditionalStack.SkipInfo->HashTokenLoc,
736 IfTokenLoc: PreambleConditionalStack.SkipInfo->IfTokenLoc,
737 FoundNonSkipPortion: PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
738 FoundElse: PreambleConditionalStack.SkipInfo->FoundElse,
739 ElseLoc: PreambleConditionalStack.SkipInfo->ElseLoc);
740 }
741}
742
743void Preprocessor::EndSourceFile() {
744 // Notify the client that we reached the end of the source file.
745 if (Callbacks)
746 Callbacks->EndOfMainFile();
747}
748
749//===----------------------------------------------------------------------===//
750// Lexer Event Handling.
751//===----------------------------------------------------------------------===//
752
753/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
754/// identifier information for the token and install it into the token,
755/// updating the token kind accordingly.
756IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
757 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
758
759 // Look up this token, see if it is a macro, or if it is a language keyword.
760 IdentifierInfo *II;
761 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
762 // No cleaning needed, just use the characters from the lexed buffer.
763 II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
764 } else {
765 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
766 SmallString<64> IdentifierBuffer;
767 StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
768
769 if (Identifier.hasUCN()) {
770 SmallString<64> UCNIdentifierBuffer;
771 expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
772 II = getIdentifierInfo(Name: UCNIdentifierBuffer);
773 } else {
774 II = getIdentifierInfo(Name: CleanedStr);
775 }
776 }
777
778 // Update the token info (identifier info and appropriate token kind).
779 // FIXME: the raw_identifier may contain leading whitespace which is removed
780 // from the cleaned identifier token. The SourceLocation should be updated to
781 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
782 // line continuation before 'B') is parsed as a single tok::raw_identifier and
783 // is cleaned to tok::identifier "B". After cleaning the token's length is
784 // still 3 and the SourceLocation refers to the location of the backslash.
785 Identifier.setIdentifierInfo(II);
786 Identifier.setKind(II->getTokenID());
787
788 return II;
789}
790
791void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
792 PoisonReasons[II] = DiagID;
793}
794
795void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
796 assert(Ident__exception_code && Ident__exception_info);
797 assert(Ident___exception_code && Ident___exception_info);
798 Ident__exception_code->setIsPoisoned(Poison);
799 Ident___exception_code->setIsPoisoned(Poison);
800 Ident_GetExceptionCode->setIsPoisoned(Poison);
801 Ident__exception_info->setIsPoisoned(Poison);
802 Ident___exception_info->setIsPoisoned(Poison);
803 Ident_GetExceptionInfo->setIsPoisoned(Poison);
804 Ident__abnormal_termination->setIsPoisoned(Poison);
805 Ident___abnormal_termination->setIsPoisoned(Poison);
806 Ident_AbnormalTermination->setIsPoisoned(Poison);
807}
808
809void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
810 assert(Identifier.getIdentifierInfo() &&
811 "Can't handle identifiers without identifier info!");
812 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
813 PoisonReasons.find(Val: Identifier.getIdentifierInfo());
814 if(it == PoisonReasons.end())
815 Diag(Tok: Identifier, DiagID: diag::err_pp_used_poisoned_id);
816 else
817 Diag(Tok: Identifier,DiagID: it->second) << Identifier.getIdentifierInfo();
818}
819
820void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
821 assert(II.isOutOfDate() && "not out of date");
822 assert(getExternalSource() &&
823 "getExternalSource() should not return nullptr");
824 getExternalSource()->updateOutOfDateIdentifier(II);
825}
826
827/// HandleIdentifier - This callback is invoked when the lexer reads an
828/// identifier. This callback looks up the identifier in the map and/or
829/// potentially macro expands it or turns it into a named token (like 'for').
830///
831/// Note that callers of this method are guarded by checking the
832/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
833/// IdentifierInfo methods that compute these properties will need to change to
834/// match.
835bool Preprocessor::HandleIdentifier(Token &Identifier) {
836 assert(Identifier.getIdentifierInfo() &&
837 "Can't handle identifiers without identifier info!");
838
839 IdentifierInfo &II = *Identifier.getIdentifierInfo();
840
841 // If the information about this identifier is out of date, update it from
842 // the external source.
843 // We have to treat __VA_ARGS__ in a special way, since it gets
844 // serialized with isPoisoned = true, but our preprocessor may have
845 // unpoisoned it if we're defining a C99 macro.
846 if (II.isOutOfDate()) {
847 bool CurrentIsPoisoned = false;
848 const bool IsSpecialVariadicMacro =
849 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
850 if (IsSpecialVariadicMacro)
851 CurrentIsPoisoned = II.isPoisoned();
852
853 updateOutOfDateIdentifier(II);
854 Identifier.setKind(II.getTokenID());
855
856 if (IsSpecialVariadicMacro)
857 II.setIsPoisoned(CurrentIsPoisoned);
858 }
859
860 // If this identifier was poisoned, and if it was not produced from a macro
861 // expansion, emit an error.
862 if (II.isPoisoned() && CurPPLexer) {
863 HandlePoisonedIdentifier(Identifier);
864 }
865
866 // If this is a macro to be expanded, do it.
867 if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
868 const auto *MI = MD.getMacroInfo();
869 assert(MI && "macro definition with no macro info?");
870 if (!DisableMacroExpansion) {
871 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
872 // C99 6.10.3p10: If the preprocessing token immediately after the
873 // macro name isn't a '(', this macro should not be expanded.
874 if (!MI->isFunctionLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
875 return HandleMacroExpandedIdentifier(Identifier, MD);
876 } else {
877 // C99 6.10.3.4p2 says that a disabled macro may never again be
878 // expanded, even if it's in a context where it could be expanded in the
879 // future.
880 Identifier.setFlag(Token::DisableExpand);
881 if (MI->isObjectLike() || isNextPPTokenOneOf(Ks: tok::l_paren))
882 Diag(Tok: Identifier, DiagID: diag::pp_disabled_macro_expansion);
883 }
884 }
885 }
886
887 // If this identifier is a keyword in a newer Standard or proposed Standard,
888 // produce a warning. Don't warn if we're not considering macro expansion,
889 // since this identifier might be the name of a macro.
890 // FIXME: This warning is disabled in cases where it shouldn't be, like
891 // "#define constexpr constexpr", "int constexpr;"
892 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
893 Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
894 << II.getName();
895 // Don't diagnose this keyword again in this translation unit.
896 II.setIsFutureCompatKeyword(false);
897 }
898
899 // If this identifier would be a keyword in C++, diagnose as a compatibility
900 // issue.
901 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
902 Diag(Tok: Identifier, DiagID: diag::warn_pp_identifier_is_cpp_keyword) << &II;
903
904 // If this is an extension token, diagnose its use.
905 // We avoid diagnosing tokens that originate from macro definitions.
906 // FIXME: This warning is disabled in cases where it shouldn't be,
907 // like "#define TY typeof", "TY(1) x".
908 if (II.isExtensionToken() && !DisableMacroExpansion)
909 Diag(Tok: Identifier, DiagID: diag::ext_token_used);
910
911 // Handle module contextual keywords.
912 if (getLangOpts().CPlusPlusModules && CurLexer &&
913 !CurLexer->isLexingRawMode() && !CurLexer->isPragmaLexer() &&
914 !CurLexer->ParsingPreprocessorDirective &&
915 Identifier.isModuleContextualKeyword() &&
916 HandleModuleContextualKeyword(Result&: Identifier)) {
917 HandleDirective(Result&: Identifier);
918 // With a fatal failure in the module loader, we abort parsing.
919 return hadModuleLoaderFatalFailure();
920 }
921
922 return true;
923}
924
925void Preprocessor::Lex(Token &Result) {
926 ++LexLevel;
927
928 // We loop here until a lex function returns a token; this avoids recursion.
929 while (!CurLexerCallback(*this, Result))
930 ;
931
932 if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
933 return;
934
935 if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
936 // Remember the identifier before code completion token.
937 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
938 setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
939 // Set IdenfitierInfo to null to avoid confusing code that handles both
940 // identifiers and completion tokens.
941 Result.setIdentifierInfo(nullptr);
942 }
943
944 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
945 // if this token is being produced as a result of phase 4 of translation.
946 // Update TrackGMFState to decide if we are currently in a Global Module
947 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
948 // depends on the prevailing StdCXXImportSeq state in two cases.
949 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
950 !Result.getFlag(Flag: Token::IsReinjected)) {
951 switch (Result.getKind()) {
952 case tok::l_paren: case tok::l_square: case tok::l_brace:
953 StdCXXImportSeqState.handleOpenBracket();
954 break;
955 case tok::r_paren: case tok::r_square:
956 StdCXXImportSeqState.handleCloseBracket();
957 break;
958 case tok::r_brace:
959 StdCXXImportSeqState.handleCloseBrace();
960 break;
961#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
962// For `#pragma ...` mimic ';'.
963#include "clang/Basic/TokenKinds.def"
964#undef PRAGMA_ANNOTATION
965 // This token is injected to represent the translation of '#include "a.h"'
966 // into "import a.h;". Mimic the notional ';'.
967 case tok::annot_module_include:
968 case tok::annot_repl_input_end:
969 case tok::semi:
970 TrackGMFState.handleSemi();
971 StdCXXImportSeqState.handleSemi();
972 ModuleDeclState.handleSemi();
973 break;
974 case tok::header_name:
975 case tok::annot_header_unit:
976 StdCXXImportSeqState.handleHeaderName();
977 break;
978 case tok::kw_export:
979 if (hasSeenNoTrivialPPDirective())
980 Result.setFlag(Token::HasSeenNoTrivialPPDirective);
981 TrackGMFState.handleExport();
982 StdCXXImportSeqState.handleExport();
983 ModuleDeclState.handleExport();
984 break;
985 case tok::colon:
986 ModuleDeclState.handleColon();
987 break;
988 case tok::kw_import:
989 if (StdCXXImportSeqState.atTopLevel()) {
990 TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
991 StdCXXImportSeqState.handleImport();
992 }
993 break;
994 case tok::kw_module:
995 if (StdCXXImportSeqState.atTopLevel()) {
996 if (hasSeenNoTrivialPPDirective())
997 Result.setFlag(Token::HasSeenNoTrivialPPDirective);
998 TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
999 ModuleDeclState.handleModule();
1000 }
1001 break;
1002 case tok::annot_module_name:
1003 ModuleDeclState.handleModuleName(
1004 NameLoc: static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
1005 if (ModuleDeclState.isModuleCandidate())
1006 break;
1007 [[fallthrough]];
1008 default:
1009 TrackGMFState.handleMisc();
1010 StdCXXImportSeqState.handleMisc();
1011 ModuleDeclState.handleMisc();
1012 break;
1013 }
1014 }
1015
1016 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
1017 CheckPoints[CurLexer->getFileID()].push_back(Elt: CurLexer->BufferPtr);
1018 CheckPointCounter = 0;
1019 }
1020
1021 if (Result.isNot(K: tok::kw_export))
1022 LastExportKeyword.startToken();
1023
1024 --LexLevel;
1025
1026 // Destroy any lexers that were deferred while we were in nested Lex calls.
1027 // This must happen after decrementing LexLevel but before any other
1028 // processing that might re-enter Lex.
1029 if (LexLevel == 0 && !PendingDestroyLexers.empty())
1030 PendingDestroyLexers.clear();
1031
1032 if ((LexLevel == 0 || PreprocessToken) &&
1033 !Result.getFlag(Flag: Token::IsReinjected)) {
1034 if (LexLevel == 0)
1035 ++TokenCount;
1036 if (OnToken)
1037 OnToken(Result);
1038 }
1039}
1040
1041void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1042 while (1) {
1043 Token Tok;
1044 Lex(Result&: Tok);
1045 if (Tok.isOneOf(Ks: tok::unknown, Ks: tok::eof, Ks: tok::eod,
1046 Ks: tok::annot_repl_input_end))
1047 break;
1048 if (Tokens != nullptr)
1049 Tokens->push_back(x: Tok);
1050 }
1051}
1052
1053/// Lex a header-name token (including one formed from header-name-tokens if
1054/// \p AllowMacroExpansion is \c true).
1055///
1056/// \param FilenameTok Filled in with the next token. On success, this will
1057/// be either a header_name token. On failure, it will be whatever other
1058/// token was found instead.
1059/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1060/// by macro expansion (concatenating tokens as necessary if the first
1061/// token is a '<').
1062/// \return \c true if we reached EOD or EOF while looking for a > token in
1063/// a concatenated header name and diagnosed it. \c false otherwise.
1064bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1065 // Lex using header-name tokenization rules if tokens are being lexed from
1066 // a file. Just grab a token normally if we're in a macro expansion.
1067 if (CurPPLexer) {
1068 // Avoid nested header-name lexing when macro expansion recurses
1069 // __has_include(__has_include))
1070 if (CurPPLexer->ParsingFilename)
1071 LexUnexpandedToken(Result&: FilenameTok);
1072 else
1073 CurPPLexer->LexIncludeFilename(FilenameTok);
1074 } else {
1075 Lex(Result&: FilenameTok);
1076 }
1077
1078 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1079 // case, glue the tokens together into an angle_string_literal token.
1080 SmallString<128> FilenameBuffer;
1081 if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1082 bool StartOfLine = FilenameTok.isAtStartOfLine();
1083 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1084 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1085
1086 SourceLocation Start = FilenameTok.getLocation();
1087 SourceLocation End;
1088 FilenameBuffer.push_back(Elt: '<');
1089
1090 // Consume tokens until we find a '>'.
1091 // FIXME: A header-name could be formed starting or ending with an
1092 // alternative token. It's not clear whether that's ill-formed in all
1093 // cases.
1094 while (FilenameTok.isNot(K: tok::greater)) {
1095 Lex(Result&: FilenameTok);
1096 if (FilenameTok.isOneOf(Ks: tok::eod, Ks: tok::eof)) {
1097 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_expected) << tok::greater;
1098 Diag(Loc: Start, DiagID: diag::note_matching) << tok::less;
1099 return true;
1100 }
1101
1102 End = FilenameTok.getLocation();
1103
1104 // FIXME: Provide code completion for #includes.
1105 if (FilenameTok.is(K: tok::code_completion)) {
1106 setCodeCompletionReached();
1107 Lex(Result&: FilenameTok);
1108 continue;
1109 }
1110
1111 // Append the spelling of this token to the buffer. If there was a space
1112 // before it, add it now.
1113 if (FilenameTok.hasLeadingSpace())
1114 FilenameBuffer.push_back(Elt: ' ');
1115
1116 // Get the spelling of the token, directly into FilenameBuffer if
1117 // possible.
1118 size_t PreAppendSize = FilenameBuffer.size();
1119 FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1120
1121 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1122 unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1123
1124 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1125 if (BufPtr != &FilenameBuffer[PreAppendSize])
1126 memcpy(dest: &FilenameBuffer[PreAppendSize], src: BufPtr, n: ActualLen);
1127
1128 // Resize FilenameBuffer to the correct size.
1129 if (FilenameTok.getLength() != ActualLen)
1130 FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1131 }
1132
1133 FilenameTok.startToken();
1134 FilenameTok.setKind(tok::header_name);
1135 FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1136 FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1137 FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1138 CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1139 } else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1140 // Convert a string-literal token of the form " h-char-sequence "
1141 // (produced by macro expansion) into a header-name token.
1142 //
1143 // The rules for header-names don't quite match the rules for
1144 // string-literals, but all the places where they differ result in
1145 // undefined behavior, so we can and do treat them the same.
1146 //
1147 // A string-literal with a prefix or suffix is not translated into a
1148 // header-name. This could theoretically be observable via the C++20
1149 // context-sensitive header-name formation rules.
1150 StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1151 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1152 FilenameTok.setKind(tok::header_name);
1153 }
1154
1155 return false;
1156}
1157
1158std::optional<Token> Preprocessor::peekNextPPToken() const {
1159 // Do some quick tests for rejection cases.
1160 std::optional<Token> Val;
1161 if (CurLexer)
1162 Val = CurLexer->peekNextPPToken();
1163 else
1164 Val = CurTokenLexer->peekNextPPToken();
1165
1166 if (!Val) {
1167 // We have run off the end. If it's a source file we don't
1168 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1169 // macro stack.
1170 if (CurPPLexer)
1171 return std::nullopt;
1172 for (const IncludeStackInfo &Entry : llvm::reverse(C: IncludeMacroStack)) {
1173 if (Entry.TheLexer)
1174 Val = Entry.TheLexer->peekNextPPToken();
1175 else
1176 Val = Entry.TheTokenLexer->peekNextPPToken();
1177
1178 if (Val)
1179 break;
1180
1181 // Ran off the end of a source file?
1182 if (Entry.ThePPLexer)
1183 return std::nullopt;
1184 }
1185 }
1186
1187 // Okay, we found the token and return. Otherwise we found the end of the
1188 // translation unit.
1189 return Val;
1190}
1191
1192// We represent the primary and partition names as 'Paths' which are sections
1193// of the hierarchical access path for a clang module. However for C++20
1194// the periods in a name are just another character, and we will need to
1195// flatten them into a string.
1196std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) {
1197 std::string Name;
1198 if (Path.empty())
1199 return Name;
1200
1201 for (auto &Piece : Path) {
1202 assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1203 if (!Name.empty())
1204 Name += ".";
1205 Name += Piece.getIdentifierInfo()->getName();
1206 }
1207 return Name;
1208}
1209
1210ModuleNameLoc *ModuleNameLoc::Create(Preprocessor &PP, ModuleIdPath Path) {
1211 assert(!Path.empty() && "expect at least one identifier in a module name");
1212 void *Mem = PP.getPreprocessorAllocator().Allocate(
1213 Size: totalSizeToAlloc<IdentifierLoc>(Counts: Path.size()), Alignment: alignof(ModuleNameLoc));
1214 return new (Mem) ModuleNameLoc(Path);
1215}
1216
1217bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc,
1218 SmallVectorImpl<Token> &Suffix,
1219 SmallVectorImpl<IdentifierLoc> &Path,
1220 bool AllowMacroExpansion,
1221 bool IsPartition) {
1222 auto ConsumeToken = [&]() {
1223 if (AllowMacroExpansion)
1224 Lex(Result&: Tok);
1225 else
1226 LexUnexpandedToken(Result&: Tok);
1227 Suffix.push_back(Elt: Tok);
1228 };
1229
1230 while (true) {
1231 if (Tok.isNot(K: tok::identifier)) {
1232 if (Tok.is(K: tok::code_completion)) {
1233 CurLexer->cutOffLexing();
1234 CodeComplete->CodeCompleteModuleImport(ImportLoc: UseLoc, Path);
1235 return true;
1236 }
1237
1238 Diag(Tok, DiagID: diag::err_pp_module_expected_ident) << Path.empty();
1239 return true;
1240 }
1241
1242 // [cpp.pre]/p2:
1243 // No identifier in the pp-module-name or pp-module-partition shall
1244 // currently be defined as an object-like macro.
1245 if (MacroInfo *MI = getMacroInfo(II: Tok.getIdentifierInfo());
1246 MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1247 !AllowMacroExpansion) {
1248 Diag(Tok, DiagID: diag::err_pp_module_name_is_macro)
1249 << IsPartition << Tok.getIdentifierInfo();
1250 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::note_macro_here)
1251 << Tok.getIdentifierInfo();
1252 }
1253
1254 // Record this part of the module path.
1255 Path.emplace_back(Args: Tok.getLocation(), Args: Tok.getIdentifierInfo());
1256 ConsumeToken();
1257
1258 if (Tok.isNot(K: tok::period))
1259 return false;
1260
1261 ConsumeToken();
1262 }
1263}
1264
1265bool Preprocessor::HandleModuleName(StringRef DirType, SourceLocation UseLoc,
1266 Token &Tok,
1267 SmallVectorImpl<IdentifierLoc> &Path,
1268 SmallVectorImpl<Token> &DirToks,
1269 bool AllowMacroExpansion,
1270 bool IsPartition) {
1271 bool LeadingSpace = Tok.hasLeadingSpace();
1272 unsigned NumToksInDirective = DirToks.size();
1273 if (LexModuleNameContinue(Tok, UseLoc, Suffix&: DirToks, Path, AllowMacroExpansion,
1274 IsPartition)) {
1275 if (Tok.isNot(K: tok::eod))
1276 CheckEndOfDirective(DirType,
1277 /*EnableMacros=*/false, ExtraToks: &DirToks);
1278 EnterModuleSuffixTokenStream(Toks: DirToks);
1279 return true;
1280 }
1281
1282 // Clean the module-name tokens and replace these tokens with
1283 // annot_module_name.
1284 DirToks.resize(N: NumToksInDirective);
1285 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(PP&: *this, Path);
1286 DirToks.emplace_back();
1287 DirToks.back().setKind(tok::annot_module_name);
1288 DirToks.back().setAnnotationRange(NameLoc->getRange());
1289 DirToks.back().setAnnotationValue(static_cast<void *>(NameLoc));
1290 DirToks.back().setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1291 DirToks.push_back(Elt: Tok);
1292 return false;
1293}
1294
1295/// [cpp.pre]/p2:
1296/// A preprocessing directive consists of a sequence of preprocessing tokens
1297/// that satisfies the following constraints: At the start of translation phase
1298/// 4, the first preprocessing token in the sequence, referred to as a
1299/// directive-introducing token, begins with the first character in the source
1300/// file (optionally after whitespace containing no new-line characters) or
1301/// follows whitespace containing at least one new-line character, and is:
1302/// - a # preprocessing token, or
1303/// - an import preprocessing token immediately followed on the same logical
1304/// source line by a header-name, <, identifier, or : preprocessing token, or
1305/// - a module preprocessing token immediately followed on the same logical
1306/// source line by an identifier, :, or ; preprocessing token, or
1307/// - an export preprocessing token immediately followed on the same logical
1308/// source line by one of the two preceding forms.
1309///
1310///
1311/// At the start of phase 4 an import or module token is treated as starting a
1312/// directive and are converted to their respective keywords iff:
1313/// - After skipping horizontal whitespace are
1314/// - at the start of a logical line, or
1315/// - preceded by an 'export' at the start of the logical line.
1316/// - Are followed by an identifier pp token (before macro expansion), or
1317/// - <, ", or : (but not ::) pp tokens for 'import', or
1318/// - ; for 'module'
1319/// Otherwise the token is treated as an identifier.
1320bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
1321 if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
1322 return false;
1323
1324 if (Result.is(K: tok::kw_export)) {
1325 LastExportKeyword = Result;
1326 return false;
1327 }
1328
1329 /// Trait 'module' and 'import' as a identifier when the main file is a
1330 /// preprocessed module file. We only allow '__preprocessed_module' and
1331 /// '__preprocessed_import' in this context.
1332 IdentifierInfo *II = Result.getIdentifierInfo();
1333 if (isPreprocessedModuleFile() &&
1334 (II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_import)) ||
1335 II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_module))))
1336 return false;
1337
1338 if (LastExportKeyword.is(K: tok::kw_export)) {
1339 // The export keyword was not at the start of line, it's not a
1340 // directive-introducing token.
1341 if (!LastExportKeyword.isAtPhysicalStartOfLine())
1342 return false;
1343 // [cpp.pre]/1.4
1344 // export // not a preprocessing directive
1345 // import foo; // preprocessing directive (ill-formed at phase7)
1346 if (Result.isAtPhysicalStartOfLine())
1347 return false;
1348 } else if (!Result.isAtPhysicalStartOfLine())
1349 return false;
1350
1351 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1352 CurPPLexer->ParsingPreprocessorDirective, true);
1353
1354 // The next token may be an angled string literal after import keyword.
1355 llvm::SaveAndRestore<bool> SavedParsingFilemame(
1356 CurPPLexer->ParsingFilename,
1357 Result.getIdentifierInfo()->isImportKeyword());
1358
1359 std::optional<Token> NextTok = peekNextPPToken();
1360 if (!NextTok)
1361 return false;
1362
1363 if (NextTok->is(K: tok::raw_identifier))
1364 LookUpIdentifierInfo(Identifier&: *NextTok);
1365
1366 if (Result.getIdentifierInfo()->isImportKeyword()) {
1367 if (NextTok->isOneOf(Ks: tok::identifier, Ks: tok::less, Ks: tok::colon,
1368 Ks: tok::header_name)) {
1369 Result.setKind(tok::kw_import);
1370 ModuleImportLoc = Result.getLocation();
1371 return true;
1372 }
1373 }
1374
1375 if (Result.getIdentifierInfo()->isModuleKeyword() &&
1376 NextTok->isOneOf(Ks: tok::identifier, Ks: tok::colon, Ks: tok::semi)) {
1377 Result.setKind(tok::kw_module);
1378 ModuleDeclLoc = Result.getLocation();
1379 return true;
1380 }
1381
1382 // Ok, it's an identifier.
1383 return false;
1384}
1385
1386bool Preprocessor::CollectPPImportSuffixAndEnterStream(
1387 SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1388 CollectPPImportSuffix(Toks);
1389 EnterModuleSuffixTokenStream(Toks);
1390 return false;
1391}
1392
1393/// Collect the tokens of a C++20 pp-import-suffix.
1394void Preprocessor::CollectPPImportSuffix(SmallVectorImpl<Token> &Toks,
1395 bool StopUntilEOD) {
1396 while (true) {
1397 Toks.emplace_back();
1398 Lex(Result&: Toks.back());
1399
1400 switch (Toks.back().getKind()) {
1401 case tok::semi:
1402 if (!StopUntilEOD)
1403 return;
1404 [[fallthrough]];
1405 case tok::eod:
1406 case tok::eof:
1407 return;
1408 default:
1409 break;
1410 }
1411 }
1412}
1413
1414// Allocate a holding buffer for a sequence of tokens and introduce it into
1415// the token stream.
1416void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef<Token> Toks) {
1417 if (Toks.empty())
1418 return;
1419 auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1420 std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1421 EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1422 /*DisableMacroExpansion*/ false, /*IsReinject*/ false);
1423 assert(CurTokenLexer && "Must have a TokenLexer");
1424 CurTokenLexer->setLexingCXXModuleDirective();
1425}
1426
1427void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc,
1428 bool IncludeExports) {
1429 CurSubmoduleState->VisibleModules.setVisible(
1430 M, Loc, IncludeExports, Vis: [](Module *) {},
1431 Cb: [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1432 // FIXME: Include the path in the diagnostic.
1433 // FIXME: Include the import location for the conflicting module.
1434 Diag(Loc: ModuleImportLoc, DiagID: diag::warn_module_conflict)
1435 << Path[0]->getFullModuleName()
1436 << Conflict->getFullModuleName()
1437 << Message;
1438 });
1439
1440 // Add this module to the imports list of the currently-built submodule.
1441 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1442 BuildingSubmoduleStack.back().M->Imports.push_back(Elt: M);
1443}
1444
1445bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1446 const char *DiagnosticTag,
1447 bool AllowMacroExpansion) {
1448 // We need at least one string literal.
1449 if (Result.isNot(K: tok::string_literal)) {
1450 Diag(Tok: Result, DiagID: diag::err_expected_string_literal)
1451 << /*Source='in...'*/0 << DiagnosticTag;
1452 return false;
1453 }
1454
1455 // Lex string literal tokens, optionally with macro expansion.
1456 SmallVector<Token, 4> StrToks;
1457 do {
1458 StrToks.push_back(Elt: Result);
1459
1460 if (Result.hasUDSuffix())
1461 Diag(Tok: Result, DiagID: diag::err_invalid_string_udl);
1462
1463 if (AllowMacroExpansion)
1464 Lex(Result);
1465 else
1466 LexUnexpandedToken(Result);
1467 } while (Result.is(K: tok::string_literal));
1468
1469 // Concatenate and parse the strings.
1470 StringLiteralParser Literal(StrToks, *this);
1471 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1472
1473 if (Literal.hadError)
1474 return false;
1475
1476 if (Literal.Pascal) {
1477 Diag(Loc: StrToks[0].getLocation(), DiagID: diag::err_expected_string_literal)
1478 << /*Source='in...'*/0 << DiagnosticTag;
1479 return false;
1480 }
1481
1482 String = std::string(Literal.GetString());
1483 return true;
1484}
1485
1486bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1487 assert(Tok.is(tok::numeric_constant));
1488 SmallString<8> IntegerBuffer;
1489 bool NumberInvalid = false;
1490 StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1491 if (NumberInvalid)
1492 return false;
1493 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1494 getLangOpts(), getTargetInfo(),
1495 getDiagnostics());
1496 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1497 return false;
1498 llvm::APInt APVal(64, 0);
1499 if (Literal.GetIntegerValue(Val&: APVal))
1500 return false;
1501 Lex(Result&: Tok);
1502 Value = APVal.getLimitedValue();
1503 return true;
1504}
1505
1506void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1507 assert(Handler && "NULL comment handler");
1508 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1509 "Comment handler already registered");
1510 CommentHandlers.push_back(x: Handler);
1511}
1512
1513void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1514 std::vector<CommentHandler *>::iterator Pos =
1515 llvm::find(Range&: CommentHandlers, Val: Handler);
1516 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1517 CommentHandlers.erase(position: Pos);
1518}
1519
1520bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1521 bool AnyPendingTokens = false;
1522 for (CommentHandler *H : CommentHandlers) {
1523 if (H->HandleComment(PP&: *this, Comment))
1524 AnyPendingTokens = true;
1525 }
1526 if (!AnyPendingTokens || getCommentRetentionState())
1527 return false;
1528 Lex(Result&: result);
1529 return true;
1530}
1531
1532void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1533 const MacroAnnotations &A =
1534 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1535 assert(A.DeprecationInfo &&
1536 "Macro deprecation warning without recorded annotation!");
1537 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1538 if (Info.Message.empty())
1539 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1540 << Identifier.getIdentifierInfo() << 0;
1541 else
1542 Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1543 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1544 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 0;
1545}
1546
1547void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1548 const MacroAnnotations &A =
1549 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1550 assert(A.RestrictExpansionInfo &&
1551 "Macro restricted expansion warning without recorded annotation!");
1552 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1553 if (Info.Message.empty())
1554 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1555 << Identifier.getIdentifierInfo() << 0;
1556 else
1557 Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1558 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1559 Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << 1;
1560}
1561
1562void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1563 unsigned DiagSelection) const {
1564 Diag(Tok: Identifier, DiagID: diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1565}
1566
1567void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1568 bool IsUndef) const {
1569 const MacroAnnotations &A =
1570 getMacroAnnotations(II: Identifier.getIdentifierInfo());
1571 assert(A.FinalAnnotationLoc &&
1572 "Final macro warning without recorded annotation!");
1573
1574 Diag(Tok: Identifier, DiagID: diag::warn_pragma_final_macro)
1575 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1576 Diag(Loc: *A.FinalAnnotationLoc, DiagID: diag::note_pp_macro_annotation) << 2;
1577}
1578
1579bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1580 const SourceLocation &Loc) const {
1581 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1582 // region map:
1583 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1584 const SourceLocation &Loc) -> bool {
1585 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1586 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1587 Range: Map, P: [&SourceMgr,
1588 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1589 return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1590 });
1591
1592 if (FirstRegionEndingAfterLoc != Map.end()) {
1593 // To test if the start location of the found region precedes `Loc`:
1594 return SourceMgr.isBeforeInTranslationUnit(
1595 LHS: FirstRegionEndingAfterLoc->first, RHS: Loc);
1596 }
1597 // If we do not find a region whose end location passes `Loc`, we want to
1598 // check if the current region is still open:
1599 if (!Map.empty() && Map.back().first == Map.back().second)
1600 return SourceMgr.isBeforeInTranslationUnit(LHS: Map.back().first, RHS: Loc);
1601 return false;
1602 };
1603
1604 // What the following does:
1605 //
1606 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1607 // Otherwise, `Loc` is from a loaded AST. We look up the
1608 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1609 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1610 // region w.r.t. the region map. If the region map is absent, it means there
1611 // is no opt-out pragma in that loaded AST.
1612 //
1613 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1614 // one of them. That means if you put the pragmas around a `#include
1615 // "module.h"`, where module.h is a module, it is not actually suppressing
1616 // warnings in module.h. This is fine because warnings in module.h will be
1617 // reported when module.h is compiled in isolation and nothing in module.h
1618 // will be analyzed ever again. So you will not see warnings from the file
1619 // that imports module.h anyway. And you can't even do the same thing for PCHs
1620 // because they can only be included from the command line.
1621
1622 if (SourceMgr.isLocalSourceLocation(Loc))
1623 return TestInMap(SafeBufferOptOutMap, Loc);
1624
1625 const SafeBufferOptOutRegionsTy *LoadedRegions =
1626 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SrcMgr: SourceMgr);
1627
1628 if (LoadedRegions)
1629 return TestInMap(*LoadedRegions, Loc);
1630 return false;
1631}
1632
1633bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1634 bool isEnter, const SourceLocation &Loc) {
1635 if (isEnter) {
1636 if (isPPInSafeBufferOptOutRegion())
1637 return true; // invalid enter action
1638 InSafeBufferOptOutRegion = true;
1639 CurrentSafeBufferOptOutStart = Loc;
1640
1641 // To set the start location of a new region:
1642
1643 if (!SafeBufferOptOutMap.empty()) {
1644 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1645 assert(PrevRegion->first != PrevRegion->second &&
1646 "Shall not begin a safe buffer opt-out region before closing the "
1647 "previous one.");
1648 }
1649 // If the start location equals to the end location, we call the region a
1650 // open region or a unclosed region (i.e., end location has not been set
1651 // yet).
1652 SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1653 } else {
1654 if (!isPPInSafeBufferOptOutRegion())
1655 return true; // invalid enter action
1656 InSafeBufferOptOutRegion = false;
1657
1658 // To set the end location of the current open region:
1659
1660 assert(!SafeBufferOptOutMap.empty() &&
1661 "Misordered safe buffer opt-out regions");
1662 auto *CurrRegion = &SafeBufferOptOutMap.back();
1663 assert(CurrRegion->first == CurrRegion->second &&
1664 "Set end location to a closed safe buffer opt-out region");
1665 CurrRegion->second = Loc;
1666 }
1667 return false;
1668}
1669
1670bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1671 return InSafeBufferOptOutRegion;
1672}
1673bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1674 StartLoc = CurrentSafeBufferOptOutStart;
1675 return InSafeBufferOptOutRegion;
1676}
1677
1678SmallVector<SourceLocation, 64>
1679Preprocessor::serializeSafeBufferOptOutMap() const {
1680 assert(!InSafeBufferOptOutRegion &&
1681 "Attempt to serialize safe buffer opt-out regions before file being "
1682 "completely preprocessed");
1683
1684 SmallVector<SourceLocation, 64> SrcSeq;
1685
1686 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1687 SrcSeq.push_back(Elt: begin);
1688 SrcSeq.push_back(Elt: end);
1689 }
1690 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1691 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1692 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1693 // It means that for each loading pch/module m, it just needs to load m's own
1694 // `SafeBufferOptOutMap`.
1695 return SrcSeq;
1696}
1697
1698bool Preprocessor::setDeserializedSafeBufferOptOutMap(
1699 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1700 if (SourceLocations.size() == 0)
1701 return false;
1702
1703 assert(SourceLocations.size() % 2 == 0 &&
1704 "ill-formed SourceLocation sequence");
1705
1706 auto It = SourceLocations.begin();
1707 SafeBufferOptOutRegionsTy &Regions =
1708 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(Loc: *It, SrcMgr&: SourceMgr);
1709
1710 do {
1711 SourceLocation Begin = *It++;
1712 SourceLocation End = *It++;
1713
1714 Regions.emplace_back(Args&: Begin, Args&: End);
1715 } while (It != SourceLocations.end());
1716 return true;
1717}
1718
1719ModuleLoader::~ModuleLoader() = default;
1720
1721CommentHandler::~CommentHandler() = default;
1722
1723EmptylineHandler::~EmptylineHandler() = default;
1724
1725CodeCompletionHandler::~CodeCompletionHandler() = default;
1726
1727void Preprocessor::createPreprocessingRecord() {
1728 if (Record)
1729 return;
1730
1731 Record = new PreprocessingRecord(getSourceManager());
1732 addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1733}
1734
1735void Preprocessor::removePPCallbacks() {
1736 auto IsPreserved = [&](PPCallbacks *C) {
1737 return C == Record || C == DirTracer;
1738 };
1739 SmallVector<PPCallbacks *, 2> Released;
1740 PPCallbacks::releaseIfPreserved(CB&: Callbacks, Pred: IsPreserved, Released);
1741 Callbacks.reset();
1742 for (auto *P : Released)
1743 addPPCallbacks(C: std::unique_ptr<PPCallbacks>(P));
1744}
1745
1746const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1747 if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1748 const SmallVector<const char *> &FileCheckPoints = It->second;
1749 auto P = llvm::upper_bound(Range: FileCheckPoints, Value&: Start);
1750 if (P == FileCheckPoints.begin())
1751 return nullptr;
1752 return *std::prev(x: P);
1753 }
1754 return nullptr;
1755}
1756
1757bool Preprocessor::hasSeenNoTrivialPPDirective() const {
1758 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1759}
1760
1761bool NoTrivialPPDirectiveTracer::hasSeenNoTrivialPPDirective() const {
1762 return SeenNoTrivialPPDirective;
1763}
1764
1765void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1766 if (InMainFile && !SeenNoTrivialPPDirective)
1767 SeenNoTrivialPPDirective = true;
1768}
1769
1770void NoTrivialPPDirectiveTracer::LexedFileChanged(
1771 FileID FID, LexedFileChangeReason Reason,
1772 SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1773 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1774}
1775
1776void NoTrivialPPDirectiveTracer::MacroExpands(const Token &MacroNameTok,
1777 const MacroDefinition &MD,
1778 SourceRange Range,
1779 const MacroArgs *Args) {
1780 // FIXME: Does only enable builtin macro expansion make sense?
1781 if (!MD.getMacroInfo()->isBuiltinMacro())
1782 setSeenNoTrivialPPDirective();
1783}
1784