Preprocessor.cpp source code [llvm_projects/clang/lib/Lex/Preprocessor.cpp]

1	//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the Preprocessor interface.
10	//
11	//===----------------------------------------------------------------------===//
12	//
13	// Options to support:
14	// -H - Print the name of each header file used.
15	// -d[DNI] - Dump various things.
16	// -fworking-directory - #line's with preprocessor's working dir.
17	// -fpreprocessed
18	// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19	// -W*
20	// -w
21	//
22	// Messages to emit:
23	// "Multiple include guards may be useful for:\n"
24	//
25	//===----------------------------------------------------------------------===//
26
27	#include "clang/Lex/Preprocessor.h"
28	#include "clang/Basic/Builtins.h"
29	#include "clang/Basic/FileManager.h"
30	#include "clang/Basic/IdentifierTable.h"
31	#include "clang/Basic/LLVM.h"
32	#include "clang/Basic/LangOptions.h"
33	#include "clang/Basic/Module.h"
34	#include "clang/Basic/SourceLocation.h"
35	#include "clang/Basic/SourceManager.h"
36	#include "clang/Basic/TargetInfo.h"
37	#include "clang/Lex/CodeCompletionHandler.h"
38	#include "clang/Lex/DependencyDirectivesScanner.h"
39	#include "clang/Lex/ExternalPreprocessorSource.h"
40	#include "clang/Lex/HeaderSearch.h"
41	#include "clang/Lex/LexDiagnostic.h"
42	#include "clang/Lex/Lexer.h"
43	#include "clang/Lex/LiteralSupport.h"
44	#include "clang/Lex/MacroArgs.h"
45	#include "clang/Lex/MacroInfo.h"
46	#include "clang/Lex/ModuleLoader.h"
47	#include "clang/Lex/NoTrivialPPDirectiveTracer.h"
48	#include "clang/Lex/Pragma.h"
49	#include "clang/Lex/PreprocessingRecord.h"
50	#include "clang/Lex/PreprocessorLexer.h"
51	#include "clang/Lex/PreprocessorOptions.h"
52	#include "clang/Lex/ScratchBuffer.h"
53	#include "clang/Lex/Token.h"
54	#include "clang/Lex/TokenLexer.h"
55	#include "llvm/ADT/APInt.h"
56	#include "llvm/ADT/ArrayRef.h"
57	#include "llvm/ADT/DenseMap.h"
58	#include "llvm/ADT/STLExtras.h"
59	#include "llvm/ADT/ScopeExit.h"
60	#include "llvm/ADT/SmallVector.h"
61	#include "llvm/ADT/StringRef.h"
62	#include "llvm/Support/Capacity.h"
63	#include "llvm/Support/ErrorHandling.h"
64	#include "llvm/Support/MemoryBuffer.h"
65	#include "llvm/Support/MemoryBufferRef.h"
66	#include "llvm/Support/SaveAndRestore.h"
67	#include "llvm/Support/raw_ostream.h"
68	#include <algorithm>
69	#include <cassert>
70	#include <memory>
71	#include <optional>
72	#include <string>
73	#include <utility>
74	#include <vector>
75
76	using namespace clang;
77
78	/// Minimum distance between two check points, in tokens.
79	static constexpr unsigned CheckPointStepSize = `1024`;
80
81	LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
82
83	ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
84
85	Preprocessor::Preprocessor(const PreprocessorOptions &PPOpts,
86	DiagnosticsEngine &diags, const LangOptions &opts,
87	SourceManager &SM, HeaderSearch &Headers,
88	ModuleLoader &TheModuleLoader,
89	IdentifierInfoLookup IILookup, bool* OwnsHeaders,
90	TranslationUnitKind TUKind)
91	: PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
92	FileMgr(Headers.getFileMgr()), SourceMgr(SM),
93	ScratchBuf (new ScratchBuffer (SourceMgr)), HeaderInfo(Headers),
94	TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
95	// As the language options may have not been loaded yet (when
96	// deserializing an ASTUnit), adding keywords to the identifier table is
97	// deferred to Preprocessor::Initialize().
98	Identifiers (IILookup), PragmaHandlers (new PragmaNamespace (StringRef())),
99	TUKind(TUKind), SkipMainFilePreamble (`0`, true),
100	CurSubmoduleState(&NullSubmoduleState) {
101	OwnsHeaderSearch = OwnsHeaders;
102
103	// Default to discarding comments.
104	KeepComments = false;
105	KeepMacroComments = false;
106	SuppressIncludeNotFoundError = false;
107
108	// Macro expansion is enabled.
109	DisableMacroExpansion = false;
110	MacroExpansionInDirectivesOverride = false;
111	InMacroArgs = false;
112	ArgMacro = nullptr;
113	InMacroArgPreExpansion = false;
114	NumCachedTokenLexers = `0`;
115	PragmasEnabled = true;
116	ParsingIfOrElifDirective = false;
117	PreprocessedOutput = false;
118
119	// We haven't read anything from the external source.
120	ReadMacrosFromExternalSource = false;
121
122	LastExportKeyword.startToken();
123
124	BuiltinInfo = std::make_unique<Builtin::Context>();
125
126	// "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
127	// a macro. They get unpoisoned where it is allowed.
128	(Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
129	SetPoisonReason(II: Ident__VA_ARGS__,DiagID: diag::ext_pp_bad_vaargs_use);
130	(Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
131	SetPoisonReason(II: Ident__VA_OPT__,DiagID: diag::ext_pp_bad_vaopt_use);
132
133	// Initialize the pragma handlers.
134	RegisterBuiltinPragmas();
135
136	// Initialize builtin macros like __LINE__ and friends.
137	RegisterBuiltinMacros();
138
139	if(LangOpts.Borland) {
140	Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
141	Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
142	Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
143	Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
144	Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
145	Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
146	Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
147	Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
148	Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
149	} else {
150	Ident__exception_info = Ident__exception_code = nullptr;
151	Ident__abnormal_termination = Ident___exception_info = nullptr;
152	Ident___exception_code = Ident___abnormal_termination = nullptr;
153	Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
154	Ident_AbnormalTermination = nullptr;
155	}
156
157	// Default incremental processing to -fincremental-extensions, clients can
158	// override with `enableIncrementalProcessing` if desired.
159	IncrementalProcessing = LangOpts.IncrementalExtensions;
160
161	// If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
162	if (usingPCHWithPragmaHdrStop())
163	SkippingUntilPragmaHdrStop = true;
164
165	// If using a PCH with a through header, start skipping tokens.
166	if (!this->PPOpts.PCHThroughHeader.empty() &&
167	!this->PPOpts.ImplicitPCHInclude.empty())
168	SkippingUntilPCHThroughHeader = true;
169
170	if (this->PPOpts.GeneratePreamble)
171	PreambleConditionalStack.startRecording();
172
173	MaxTokens = LangOpts.MaxTokens;
174	}
175
176	Preprocessor::~Preprocessor() {
177	assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
178
179	IncludeMacroStack.clear();
180
181	// Free any cached macro expanders.
182	// This populates MacroArgCache, so all TokenLexers need to be destroyed
183	// before the code below that frees up the MacroArgCache list.
184	std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
185	CurTokenLexer.reset();
186
187	// Free any cached MacroArgs.
188	for (MacroArgs *ArgList = MacroArgCache; ArgList;)
189	ArgList = ArgList->deallocate();
190
191	// Delete the header search info, if we own it.
192	if (OwnsHeaderSearch)
193	delete &HeaderInfo;
194	}
195
196	void Preprocessor::Initialize(const TargetInfo &Target,
197	const TargetInfo *AuxTarget) {
198	assert((!this->Target \|\| this->Target == &Target) &&
199	"Invalid override of target information");
200	this->Target = &Target;
201
202	assert((!this->AuxTarget \|\| this->AuxTarget == AuxTarget) &&
203	"Invalid override of aux target information.");
204	this->AuxTarget = AuxTarget;
205
206	// Initialize information about built-ins.
207	BuiltinInfo ->InitializeTarget(Target, AuxTarget);
208	HeaderInfo.setTarget(Target);
209
210	// Populate the identifier table with info about keywords for the current language.
211	Identifiers.AddKeywords(LangOpts);
212
213	// Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
214	setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
215
216	if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
217	// Use setting from TargetInfo.
218	setCurrentFPEvalMethod(PragmaLoc: SourceLocation (), Val: Target.getFPEvalMethod());
219	else
220	// Set initial value of __FLT_EVAL_METHOD__ from the command line.
221	setCurrentFPEvalMethod(PragmaLoc: SourceLocation (), Val: getLangOpts().getFPEvalMethod());
222	}
223
224	void Preprocessor::InitializeForModelFile() {
225	NumEnteredSourceFiles = `0`;
226
227	// Reset pragmas
228	PragmaHandlersBackup = std::move(PragmaHandlers);
229	PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
230	RegisterBuiltinPragmas();
231
232	// Reset PredefinesFileID
233	PredefinesFileID = FileID ();
234	}
235
236	void Preprocessor::FinalizeForModelFile() {
237	NumEnteredSourceFiles = `1`;
238
239	PragmaHandlers = std::move(PragmaHandlersBackup);
240	}
241
242	void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
243	llvm::errs() << tok::getTokenName(Kind: Tok.getKind());
244
245	if (!Tok.isAnnotation())
246	llvm::errs() << " '" << getSpelling(Tok) << "'";
247
248	if (!DumpFlags) return;
249
250	llvm::errs() << "\t";
251	if (Tok.isAtStartOfLine())
252	llvm::errs() << " [StartOfLine]";
253	if (Tok.hasLeadingSpace())
254	llvm::errs() << " [LeadingSpace]";
255	if (Tok.isExpandDisabled())
256	llvm::errs() << " [ExpandDisabled]";
257	if (Tok.needsCleaning()) {
258	const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
259	llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
260	<< "']";
261	}
262
263	llvm::errs() << "\tLoc=<";
264	DumpLocation(Loc: Tok.getLocation());
265	llvm::errs() << ">";
266	}
267
268	void Preprocessor::DumpLocation(SourceLocation Loc) const {
269	Loc.print(OS&: llvm::errs(), SM: SourceMgr);
270	}
271
272	void Preprocessor::DumpMacro(const MacroInfo &MI) const {
273	llvm::errs() << "MACRO: ";
274	for (unsigned i = `0`, e = MI.getNumTokens(); i != e; ++i) {
275	DumpToken(Tok: MI.getReplacementToken(Tok: i));
276	llvm::errs() << " ";
277	}
278	llvm::errs() << "\n";
279	}
280
281	void Preprocessor::PrintStats() {
282	llvm::errs() << "\n*** Preprocessor Stats:\n";
283	llvm::errs() << NumDirectives << " directives found:\n";
284	llvm::errs() << " " << NumDefined << " #define.\n";
285	llvm::errs() << " " << NumUndefined << " #undef.\n";
286	llvm::errs() << " #include/#include_next/#import:\n";
287	llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
288	llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
289	llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
290	llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
291	llvm::errs() << " " << NumEndif << " #endif.\n";
292	llvm::errs() << " " << NumPragma << " #pragma.\n";
293	llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
294
295	llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
296	<< NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
297	<< NumFastMacroExpanded << " on the fast path.\n";
298	llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
299	<< " token paste (##) operations performed, "
300	<< NumFastTokenPaste << " on the fast path.\n";
301
302	llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
303
304	llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
305	llvm::errs() << "\n Macro Expanded Tokens: "
306	<< llvm::capacity_in_bytes(X: MacroExpandedTokens);
307	llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
308	// FIXME: List information for all submodules.
309	llvm::errs() << "\n Macros: "
310	<< llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
311	llvm::errs() << "\n #pragma push_macro Info: "
312	<< llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
313	llvm::errs() << "\n Poison Reasons: "
314	<< llvm::capacity_in_bytes(X: PoisonReasons);
315	llvm::errs() << "\n Comment Handlers: "
316	<< llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
317	}
318
319	Preprocessor::macro_iterator
320	Preprocessor::macro_begin(bool IncludeExternalMacros) const {
321	if (IncludeExternalMacros && ExternalSource &&
322	!ReadMacrosFromExternalSource) {
323	ReadMacrosFromExternalSource = true;
324	ExternalSource->ReadDefinedMacros();
325	}
326
327	// Make sure we cover all macros in visible modules.
328	for (const ModuleMacro &Macro : ModuleMacros)
329	CurSubmoduleState->Macros.try_emplace(Key: Macro.II);
330
331	return CurSubmoduleState->Macros.begin();
332	}
333
334	size_t Preprocessor::getTotalMemory() const {
335	return BP.getTotalMemory()
336	+ llvm::capacity_in_bytes(X: MacroExpandedTokens)
337	+ Predefines.capacity() / Predefines buffer. /
338	// FIXME: Include sizes from all submodules, and include MacroInfo sizes,
339	// and ModuleMacros.
340	+ llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
341	+ llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
342	+ llvm::capacity_in_bytes(X: PoisonReasons)
343	+ llvm::capacity_in_bytes(x: CommentHandlers);
344	}
345
346	Preprocessor::macro_iterator
347	Preprocessor::macro_end(bool IncludeExternalMacros) const {
348	if (IncludeExternalMacros && ExternalSource &&
349	!ReadMacrosFromExternalSource) {
350	ReadMacrosFromExternalSource = true;
351	ExternalSource->ReadDefinedMacros();
352	}
353
354	return CurSubmoduleState->Macros.end();
355	}
356
357	/// Compares macro tokens with a specified token value sequence.
358	static bool MacroDefinitionEquals(const MacroInfo *MI,
359	ArrayRef<TokenValue> Tokens) {
360	return Tokens.size() == MI->getNumTokens() &&
361	std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
362	}
363
364	StringRef Preprocessor::getLastMacroWithSpelling(
365	SourceLocation Loc,
366	ArrayRef<TokenValue> Tokens) const {
367	SourceLocation BestLocation;
368	StringRef BestSpelling;
369	for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
370	I != E; ++I) {
371	const MacroDirective::DefInfo
372	Def = I ->second.findDirectiveAtLoc(Loc, SourceMgr);
373	if (!Def \|\| !Def.getMacroInfo())
374	continue;
375	if (!Def.getMacroInfo()->isObjectLike())
376	continue;
377	if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
378	continue;
379	SourceLocation Location = Def.getLocation();
380	// Choose the macro defined latest.
381	if (BestLocation.isInvalid() \|\|
382	(Location.isValid() &&
383	SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
384	BestLocation = Location;
385	BestSpelling = I ->first->getName();
386	}
387	}
388	return BestSpelling;
389	}
390
391	void Preprocessor::recomputeCurLexerKind() {
392	if (CurLexer)
393	CurLexerCallback = CurLexer ->isDependencyDirectivesLexer()
394	? CLK_DependencyDirectivesLexer
395	: CLK_Lexer;
396	else if (CurTokenLexer)
397	CurLexerCallback = CLK_TokenLexer;
398	else
399	CurLexerCallback = CLK_CachingLexer;
400	}
401
402	bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
403	unsigned CompleteLine,
404	unsigned CompleteColumn) {
405	assert(CompleteLine && CompleteColumn && "Starts from 1:1");
406	assert(!CodeCompletionFile && "Already set");
407
408	// Load the actual file's contents.
409	std::optional<llvm::MemoryBufferRef> Buffer =
410	SourceMgr.getMemoryBufferForFileOrNone(File);
411	if (!Buffer)
412	return true;
413
414	// Find the byte position of the truncation point.
415	const char *Position = Buffer ->getBufferStart();
416	for (unsigned Line = `1`; Line < CompleteLine; ++Line) {
417	for (; *Position; ++Position) {
418	if (Position != `'\r'` && Position != `'\n'`)
419	continue;
420
421	// Eat \r\n or \n\r as a single line.
422	if ((Position[`1`] == `'\r'` \|\| Position[`1`] == `'\n'`) &&
423	Position[`0`] != Position[`1`])
424	++Position;
425	++Position;
426	break;
427	}
428	}
429
430	Position += CompleteColumn - `1`;
431
432	// If pointing inside the preamble, adjust the position at the beginning of
433	// the file after the preamble.
434	if (SkipMainFilePreamble.first &&
435	SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
436	if (Position - Buffer ->getBufferStart() < SkipMainFilePreamble.first)
437	Position = Buffer ->getBufferStart() + SkipMainFilePreamble.first;
438	}
439
440	if (Position > Buffer ->getBufferEnd())
441	Position = Buffer ->getBufferEnd();
442
443	CodeCompletionFile = File;
444	CodeCompletionOffset = Position - Buffer ->getBufferStart();
445
446	auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
447	Size: Buffer ->getBufferSize() + `1`, BufferName: Buffer ->getBufferIdentifier());
448	char *NewBuf = NewBuffer ->getBufferStart();
449	char *NewPos = std::copy(first: Buffer ->getBufferStart(), last: Position, result: NewBuf);
450	*NewPos = `'\0'`;
451	std::copy(first: Position, last: Buffer ->getBufferEnd(), result: NewPos+`1`);
452	SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
453
454	return false;
455	}
456
457	void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
458	bool IsAngled) {
459	setCodeCompletionReached();
460	if (CodeComplete)
461	CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
462	}
463
464	void Preprocessor::CodeCompleteNaturalLanguage() {
465	setCodeCompletionReached();
466	if (CodeComplete)
467	CodeComplete->CodeCompleteNaturalLanguage();
468	}
469
470	/// getSpelling - This method is used to get the spelling of a token into a
471	/// SmallVector. Note that the returned StringRef may not point to the
472	/// supplied buffer if a copy can be avoided.
473	StringRef Preprocessor::getSpelling(const Token &Tok,
474	SmallVectorImpl<char> &Buffer,
475	bool Invalid) const* {
476	// NOTE: this has to be checked before* testing for an IdentifierInfo.*
477	if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
478	// Try the fast path.
479	if (const IdentifierInfo *II = Tok.getIdentifierInfo())
480	return II->getName();
481	}
482
483	// Resize the buffer if we need to copy into it.
484	if (Tok.needsCleaning())
485	Buffer.resize(N: Tok.getLength());
486
487	const char *Ptr = Buffer.data();
488	unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
489	return StringRef(Ptr, Len);
490	}
491
492	/// CreateString - Plop the specified string into a scratch buffer and return a
493	/// location for it. If specified, the source location provides a source
494	/// location for the token.
495	void Preprocessor::CreateString(StringRef Str, Token &Tok,
496	SourceLocation ExpansionLocStart,
497	SourceLocation ExpansionLocEnd) {
498	Tok.setLength(Str.size());
499
500	const char *DestPtr;
501	SourceLocation Loc = ScratchBuf ->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
502
503	if (ExpansionLocStart.isValid())
504	Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
505	ExpansionLocEnd, Length: Str.size());
506	Tok.setLocation(Loc);
507
508	// If this is a raw identifier or a literal token, set the pointer data.
509	if (Tok.is(K: tok::raw_identifier))
510	Tok.setRawIdentifierData(DestPtr);
511	else if (Tok.isLiteral())
512	Tok.setLiteralData(DestPtr);
513	}
514
515	SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
516	auto &SM = getSourceManager();
517	SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
518	FileIDAndOffset LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
519	bool Invalid = false;
520	StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
521	if (Invalid)
522	return SourceLocation ();
523
524	// FIXME: We could consider re-using spelling for tokens we see repeatedly.
525	const char *DestPtr;
526	SourceLocation Spelling =
527	ScratchBuf ->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
528	return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
529	}
530
531	Module *Preprocessor::getCurrentModule() {
532	if (!getLangOpts().isCompilingModule())
533	return nullptr;
534
535	return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
536	}
537
538	Module *Preprocessor::getCurrentModuleImplementation() {
539	if (!getLangOpts().isCompilingModuleImplementation())
540	return nullptr;
541
542	return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
543	}
544
545	//===----------------------------------------------------------------------===//
546	// Preprocessor Initialization Methods
547	//===----------------------------------------------------------------------===//
548
549	/// EnterMainSourceFile - Enter the specified FileID as the main source file,
550	/// which implicitly adds the builtin defines etc.
551	void Preprocessor::EnterMainSourceFile() {
552	// We do not allow the preprocessor to reenter the main file. Doing so will
553	// cause FileID's to accumulate information from both runs (e.g. #line
554	// information) and predefined macros aren't guaranteed to be set properly.
555	assert(NumEnteredSourceFiles == `0` && "Cannot reenter the main file!");
556	FileID MainFileID = SourceMgr.getMainFileID();
557
558	// If MainFileID is loaded it means we loaded an AST file, no need to enter
559	// a main file.
560	if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
561	// Enter the main file source buffer.
562	EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation ());
563
564	// If we've been asked to skip bytes in the main file (e.g., as part of a
565	// precompiled preamble), do so now.
566	if (SkipMainFilePreamble.first > `0`)
567	CurLexer ->SetByteOffset(Offset: SkipMainFilePreamble.first,
568	StartOfLine: SkipMainFilePreamble.second);
569
570	// Tell the header info that the main file was entered. If the file is later
571	// #imported, it won't be re-entered.
572	if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
573	markIncluded(File: *FE);
574
575	// Record the first PP token in the main file. This is used to generate
576	// better diagnostics for C++ modules.
577	//
578	// // This is a comment.
579	// #define FOO int // note: add 'module;' to the start of the file
580	// ^ FirstPPToken // to introduce a global module fragment.
581	//
582	// export module M; // error: module declaration must occur
583	// // at the start of the translation unit.
584	if (getLangOpts().CPlusPlusModules) {
585	std::optional<StringRef> Input =
586	getSourceManager().getBufferDataOrNone(FID: MainFileID);
587	if (!isPreprocessedModuleFile() && Input)
588	MainFileIsPreprocessedModuleFile =
589	clang::isPreprocessedModuleFile(Source: *Input);
590	auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(args&: *this);
591	DirTracer = Tracer.get();
592	addPPCallbacks(C: std::move(Tracer));
593	std::optional<Token> FirstPPTok = CurLexer ->peekNextPPToken();
594	if (FirstPPTok)
595	FirstPPTokenLoc = FirstPPTok ->getLocation();
596	}
597	}
598
599	// Preprocess Predefines to populate the initial preprocessor state.
600	std::unique_ptr<llvm::MemoryBuffer> SB =
601	llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
602	assert(SB && "Cannot create predefined source buffer");
603	FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
604	assert(FID.isValid() && "Could not create FileID for predefines?");
605	setPredefinesFileID(FID);
606
607	// Start parsing the predefines.
608	EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation ());
609
610	if (!PPOpts.PCHThroughHeader.empty()) {
611	// Lookup and save the FileID for the through header. If it isn't found
612	// in the search path, it's a fatal error.
613	OptionalFileEntryRef File = LookupFile(
614	FilenameLoc: SourceLocation (), Filename: PPOpts.PCHThroughHeader,
615	/isAngled=/false, /FromDir=/nullptr, /FromFile=/nullptr,
616	/CurDir=/nullptr, /SearchPath=/nullptr, /RelativePath=/nullptr,
617	/SuggestedModule=/nullptr, /IsMapped=/nullptr,
618	/IsFrameworkFound=/nullptr);
619	if (!File) {
620	Diag(Loc: SourceLocation (), DiagID: diag::err_pp_through_header_not_found)
621	<< PPOpts.PCHThroughHeader;
622	return;
623	}
624	setPCHThroughHeaderFileID(
625	SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation (), FileCharacter: SrcMgr::C_User));
626	}
627
628	// Skip tokens from the Predefines and if needed the main file.
629	if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) \|\|
630	(usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
631	SkipTokensWhileUsingPCH();
632	}
633
634	void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
635	assert(PCHThroughHeaderFileID.isInvalid() &&
636	"PCHThroughHeaderFileID already set!");
637	PCHThroughHeaderFileID = FID;
638	}
639
640	bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
641	assert(PCHThroughHeaderFileID.isValid() &&
642	"Invalid PCH through header FileID");
643	return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
644	}
645
646	bool Preprocessor::creatingPCHWithThroughHeader() {
647	return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
648	PCHThroughHeaderFileID.isValid();
649	}
650
651	bool Preprocessor::usingPCHWithThroughHeader() {
652	return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
653	PCHThroughHeaderFileID.isValid();
654	}
655
656	bool Preprocessor::creatingPCHWithPragmaHdrStop() {
657	return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
658	}
659
660	bool Preprocessor::usingPCHWithPragmaHdrStop() {
661	return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
662	}
663
664	/// Skip tokens until after the #include of the through header or
665	/// until after a #pragma hdrstop is seen. Tokens in the predefines file
666	/// and the main file may be skipped. If the end of the predefines file
667	/// is reached, skipping continues into the main file. If the end of the
668	/// main file is reached, it's a fatal error.
669	void Preprocessor::SkipTokensWhileUsingPCH() {
670	bool ReachedMainFileEOF = false;
671	bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
672	bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
673	Token Tok;
674	while (true) {
675	bool InPredefines =
676	(CurLexer && CurLexer ->getFileID() == getPredefinesFileID());
677	CurLexerCallback(*this, Tok);
678	if (Tok.is(K: tok::eof) && !InPredefines) {
679	ReachedMainFileEOF = true;
680	break;
681	}
682	if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
683	break;
684	if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
685	break;
686	}
687	if (ReachedMainFileEOF) {
688	if (UsingPCHThroughHeader)
689	Diag(Loc: SourceLocation (), DiagID: diag::err_pp_through_header_not_seen)
690	<< PPOpts.PCHThroughHeader << `1`;
691	else if (!PPOpts.PCHWithHdrStopCreate)
692	Diag(Loc: SourceLocation (), DiagID: diag::err_pp_pragma_hdrstop_not_seen);
693	}
694	}
695
696	void Preprocessor::replayPreambleConditionalStack() {
697	// Restore the conditional stack from the preamble, if there is one.
698	if (PreambleConditionalStack.isReplaying()) {
699	assert(CurPPLexer &&
700	"CurPPLexer is null when calling replayPreambleConditionalStack.");
701	CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
702	PreambleConditionalStack.doneReplaying();
703	if (PreambleConditionalStack.reachedEOFWhileSkipping())
704	SkipExcludedConditionalBlock(
705	HashTokenLoc: PreambleConditionalStack.SkipInfo ->HashTokenLoc,
706	IfTokenLoc: PreambleConditionalStack.SkipInfo ->IfTokenLoc,
707	FoundNonSkipPortion: PreambleConditionalStack.SkipInfo ->FoundNonSkipPortion,
708	FoundElse: PreambleConditionalStack.SkipInfo ->FoundElse,
709	ElseLoc: PreambleConditionalStack.SkipInfo ->ElseLoc);
710	}
711	}
712
713	void Preprocessor::EndSourceFile() {
714	// Notify the client that we reached the end of the source file.
715	if (Callbacks)
716	Callbacks ->EndOfMainFile();
717	}
718
719	//===----------------------------------------------------------------------===//
720	// Lexer Event Handling.
721	//===----------------------------------------------------------------------===//
722
723	/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
724	/// identifier information for the token and install it into the token,
725	/// updating the token kind accordingly.
726	IdentifierInfo Preprocessor::LookUpIdentifierInfo(Token &Identifier) const* {
727	assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
728
729	// Look up this token, see if it is a macro, or if it is a language keyword.
730	IdentifierInfo *II;
731	if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
732	// No cleaning needed, just use the characters from the lexed buffer.
733	II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
734	} else {
735	// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
736	SmallString<`64`> IdentifierBuffer;
737	StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
738
739	if (Identifier.hasUCN()) {
740	SmallString<`64`> UCNIdentifierBuffer;
741	expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
742	II = getIdentifierInfo(Name: UCNIdentifierBuffer);
743	} else {
744	II = getIdentifierInfo(Name: CleanedStr);
745	}
746	}
747
748	// Update the token info (identifier info and appropriate token kind).
749	// FIXME: the raw_identifier may contain leading whitespace which is removed
750	// from the cleaned identifier token. The SourceLocation should be updated to
751	// refer to the non-whitespace character. For instance, the text "\\\nB" (a
752	// line continuation before 'B') is parsed as a single tok::raw_identifier and
753	// is cleaned to tok::identifier "B". After cleaning the token's length is
754	// still 3 and the SourceLocation refers to the location of the backslash.
755	Identifier.setIdentifierInfo(II);
756	Identifier.setKind(II->getTokenID());
757
758	return II;
759	}
760
761	void Preprocessor::SetPoisonReason(IdentifierInfo II, unsigned* DiagID) {
762	PoisonReasons [II] = DiagID;
763	}
764
765	void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
766	assert(Ident__exception_code && Ident__exception_info);
767	assert(Ident___exception_code && Ident___exception_info);
768	Ident__exception_code->setIsPoisoned(Poison);
769	Ident___exception_code->setIsPoisoned(Poison);
770	Ident_GetExceptionCode->setIsPoisoned(Poison);
771	Ident__exception_info->setIsPoisoned(Poison);
772	Ident___exception_info->setIsPoisoned(Poison);
773	Ident_GetExceptionInfo->setIsPoisoned(Poison);
774	Ident__abnormal_termination->setIsPoisoned(Poison);
775	Ident___abnormal_termination->setIsPoisoned(Poison);
776	Ident_AbnormalTermination->setIsPoisoned(Poison);
777	}
778
779	void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
780	assert(Identifier.getIdentifierInfo() &&
781	"Can't handle identifiers without identifier info!");
782	llvm::DenseMap<IdentifierInfo,unsigned*>::const_iterator it =
783	PoisonReasons.find(Val: Identifier.getIdentifierInfo());
784	if(it == PoisonReasons.end())
785	Diag(Tok: Identifier, DiagID: diag::err_pp_used_poisoned_id);
786	else
787	Diag(Tok: Identifier,DiagID: it ->second) << Identifier.getIdentifierInfo();
788	}
789
790	void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
791	assert(II.isOutOfDate() && "not out of date");
792	assert(getExternalSource() &&
793	"getExternalSource() should not return nullptr");
794	getExternalSource()->updateOutOfDateIdentifier(II);
795	}
796
797	/// HandleIdentifier - This callback is invoked when the lexer reads an
798	/// identifier. This callback looks up the identifier in the map and/or
799	/// potentially macro expands it or turns it into a named token (like 'for').
800	///
801	/// Note that callers of this method are guarded by checking the
802	/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
803	/// IdentifierInfo methods that compute these properties will need to change to
804	/// match.
805	bool Preprocessor::HandleIdentifier(Token &Identifier) {
806	assert(Identifier.getIdentifierInfo() &&
807	"Can't handle identifiers without identifier info!");
808
809	IdentifierInfo &II = *Identifier.getIdentifierInfo();
810
811	// If the information about this identifier is out of date, update it from
812	// the external source.
813	// We have to treat __VA_ARGS__ in a special way, since it gets
814	// serialized with isPoisoned = true, but our preprocessor may have
815	// unpoisoned it if we're defining a C99 macro.
816	if (II.isOutOfDate()) {
817	bool CurrentIsPoisoned = false;
818	const bool IsSpecialVariadicMacro =
819	&II == Ident__VA_ARGS__ \|\| &II == Ident__VA_OPT__;
820	if (IsSpecialVariadicMacro)
821	CurrentIsPoisoned = II.isPoisoned();
822
823	updateOutOfDateIdentifier(II);
824	Identifier.setKind(II.getTokenID());
825
826	if (IsSpecialVariadicMacro)
827	II.setIsPoisoned(CurrentIsPoisoned);
828	}
829
830	// If this identifier was poisoned, and if it was not produced from a macro
831	// expansion, emit an error.
832	if (II.isPoisoned() && CurPPLexer) {
833	HandlePoisonedIdentifier(Identifier);
834	}
835
836	// If this is a macro to be expanded, do it.
837	if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
838	const auto *MI = MD.getMacroInfo();
839	assert(MI && "macro definition with no macro info?");
840	if (!DisableMacroExpansion) {
841	if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
842	// C99 6.10.3p10: If the preprocessing token immediately after the
843	// macro name isn't a '(', this macro should not be expanded.
844	if (!MI->isFunctionLike() \|\| isNextPPTokenOneOf(Ks: tok::l_paren))
845	return HandleMacroExpandedIdentifier(Identifier, MD);
846	} else {
847	// C99 6.10.3.4p2 says that a disabled macro may never again be
848	// expanded, even if it's in a context where it could be expanded in the
849	// future.
850	Identifier.setFlag(Token::DisableExpand);
851	if (MI->isObjectLike() \|\| isNextPPTokenOneOf(Ks: tok::l_paren))
852	Diag(Tok: Identifier, DiagID: diag::pp_disabled_macro_expansion);
853	}
854	}
855	}
856
857	// If this identifier is a keyword in a newer Standard or proposed Standard,
858	// produce a warning. Don't warn if we're not considering macro expansion,
859	// since this identifier might be the name of a macro.
860	// FIXME: This warning is disabled in cases where it shouldn't be, like
861	// "#define constexpr constexpr", "int constexpr;"
862	if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
863	Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
864	<< II.getName();
865	// Don't diagnose this keyword again in this translation unit.
866	II.setIsFutureCompatKeyword(false);
867	}
868
869	// If this identifier would be a keyword in C++, diagnose as a compatibility
870	// issue.
871	if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
872	Diag(Tok: Identifier, DiagID: diag::warn_pp_identifier_is_cpp_keyword) << &II;
873
874	// If this is an extension token, diagnose its use.
875	// We avoid diagnosing tokens that originate from macro definitions.
876	// FIXME: This warning is disabled in cases where it shouldn't be,
877	// like "#define TY typeof", "TY(1) x".
878	if (II.isExtensionToken() && !DisableMacroExpansion)
879	Diag(Tok: Identifier, DiagID: diag::ext_token_used);
880
881	// Handle module contextual keywords.
882	if (getLangOpts().CPlusPlusModules && CurLexer &&
883	!CurLexer ->isLexingRawMode() && !CurLexer ->isPragmaLexer() &&
884	!CurLexer ->ParsingPreprocessorDirective &&
885	Identifier.isModuleContextualKeyword() &&
886	HandleModuleContextualKeyword(Result&: Identifier)) {
887	HandleDirective(Result&: Identifier);
888	// With a fatal failure in the module loader, we abort parsing.
889	return hadModuleLoaderFatalFailure();
890	}
891
892	// If this is the 'import' contextual keyword following an '@', note
893	// that the next token indicates a module name.
894	//
895	// Note that we do not treat 'import' as a contextual
896	// keyword when we're in a caching lexer, because caching lexers only get
897	// used in contexts where import declarations are disallowed.
898	//
899	// Likewise if this is the standard C++ import keyword.
900	if (((LastTokenWasAt && II.isImportKeyword()) \|\|
901	Identifier.is(K: tok::kw_import)) &&
902	!InMacroArgs &&
903	(!DisableMacroExpansion \|\| MacroExpansionInDirectivesOverride) &&
904	CurLexerCallback != CLK_CachingLexer) {
905	ModuleImportLoc = Identifier.getLocation();
906	IsAtImport = true;
907	CurLexerCallback = CLK_LexAfterModuleImport;
908	}
909	return true;
910	}
911
912	void Preprocessor::Lex(Token &Result) {
913	++LexLevel;
914
915	// We loop here until a lex function returns a token; this avoids recursion.
916	while (!CurLexerCallback(*this, Result))
917	;
918
919	if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
920	return;
921
922	if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
923	// Remember the identifier before code completion token.
924	setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
925	setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
926	// Set IdenfitierInfo to null to avoid confusing code that handles both
927	// identifiers and completion tokens.
928	Result.setIdentifierInfo(nullptr);
929	}
930
931	// Update StdCXXImportSeqState to track our position within a C++20 import-seq
932	// if this token is being produced as a result of phase 4 of translation.
933	// Update TrackGMFState to decide if we are currently in a Global Module
934	// Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
935	// depends on the prevailing StdCXXImportSeq state in two cases.
936	if (getLangOpts().CPlusPlusModules && LexLevel == `1` &&
937	!Result.getFlag(Flag: Token::IsReinjected)) {
938	switch (Result.getKind()) {
939	case tok::l_paren: case tok::l_square: case tok::l_brace:
940	StdCXXImportSeqState.handleOpenBracket();
941	break;
942	case tok::r_paren: case tok::r_square:
943	StdCXXImportSeqState.handleCloseBracket();
944	break;
945	case tok::r_brace:
946	StdCXXImportSeqState.handleCloseBrace();
947	break;
948	#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
949	// For `#pragma ...` mimic ';'.
950	#include "clang/Basic/TokenKinds.def"
951	#undef PRAGMA_ANNOTATION
952	// This token is injected to represent the translation of '#include "a.h"'
953	// into "import a.h;". Mimic the notional ';'.
954	case tok::annot_module_include:
955	case tok::annot_repl_input_end:
956	case tok::semi:
957	TrackGMFState.handleSemi();
958	StdCXXImportSeqState.handleSemi();
959	ModuleDeclState.handleSemi();
960	break;
961	case tok::header_name:
962	case tok::annot_header_unit:
963	StdCXXImportSeqState.handleHeaderName();
964	break;
965	case tok::kw_export:
966	if (hasSeenNoTrivialPPDirective())
967	Result.setFlag(Token::HasSeenNoTrivialPPDirective);
968	TrackGMFState.handleExport();
969	StdCXXImportSeqState.handleExport();
970	ModuleDeclState.handleExport();
971	break;
972	case tok::colon:
973	ModuleDeclState.handleColon();
974	break;
975	case tok::kw_import:
976	if (StdCXXImportSeqState.atTopLevel()) {
977	TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
978	StdCXXImportSeqState.handleImport();
979	}
980	break;
981	case tok::kw_module:
982	if (StdCXXImportSeqState.atTopLevel()) {
983	if (hasSeenNoTrivialPPDirective())
984	Result.setFlag(Token::HasSeenNoTrivialPPDirective);
985	TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
986	ModuleDeclState.handleModule();
987	}
988	break;
989	case tok::annot_module_name:
990	ModuleDeclState.handleModuleName(
991	NameLoc: static_cast<ModuleNameLoc *>(Result.getAnnotationValue()));
992	if (ModuleDeclState.isModuleCandidate())
993	break;
994	[[fallthrough]];
995	default:
996	TrackGMFState.handleMisc();
997	StdCXXImportSeqState.handleMisc();
998	ModuleDeclState.handleMisc();
999	break;
1000	}
1001	}
1002
1003	if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
1004	CheckPoints [CurLexer ->getFileID()].push_back(Elt: CurLexer ->BufferPtr);
1005	CheckPointCounter = `0`;
1006	}
1007
1008	LastTokenWasAt = Result.is(K: tok::at);
1009	if (Result.isNot(K: tok::kw_export))
1010	LastExportKeyword.startToken();
1011
1012	--LexLevel;
1013
1014	// Destroy any lexers that were deferred while we were in nested Lex calls.
1015	// This must happen after decrementing LexLevel but before any other
1016	// processing that might re-enter Lex.
1017	if (LexLevel == `0` && !PendingDestroyLexers.empty())
1018	PendingDestroyLexers.clear();
1019
1020	if ((LexLevel == `0` \|\| PreprocessToken) &&
1021	!Result.getFlag(Flag: Token::IsReinjected)) {
1022	if (LexLevel == `0`)
1023	++TokenCount;
1024	if (OnToken)
1025	OnToken (Result);
1026	}
1027	}
1028
1029	void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1030	while (`1`) {
1031	Token Tok;
1032	Lex(Result&: Tok);
1033	if (Tok.isOneOf(Ks: tok::unknown, Ks: tok::eof, Ks: tok::eod,
1034	Ks: tok::annot_repl_input_end))
1035	break;
1036	if (Tokens != nullptr)
1037	Tokens->push_back(x: Tok);
1038	}
1039	}
1040
1041	/// Lex a header-name token (including one formed from header-name-tokens if
1042	/// \p AllowMacroExpansion is \c true).
1043	///
1044	/// \param FilenameTok Filled in with the next token. On success, this will
1045	/// be either a header_name token. On failure, it will be whatever other
1046	/// token was found instead.
1047	/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1048	/// by macro expansion (concatenating tokens as necessary if the first
1049	/// token is a '<').
1050	/// \return \c true if we reached EOD or EOF while looking for a > token in
1051	/// a concatenated header name and diagnosed it. \c false otherwise.
1052	bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1053	// Lex using header-name tokenization rules if tokens are being lexed from
1054	// a file. Just grab a token normally if we're in a macro expansion.
1055	if (CurPPLexer) {
1056	// Avoid nested header-name lexing when macro expansion recurses
1057	// __has_include(__has_include))
1058	if (CurPPLexer->ParsingFilename)
1059	LexUnexpandedToken(Result&: FilenameTok);
1060	else
1061	CurPPLexer->LexIncludeFilename(FilenameTok);
1062	} else {
1063	Lex(Result&: FilenameTok);
1064	}
1065
1066	// This could be a <foo/bar.h> file coming from a macro expansion. In this
1067	// case, glue the tokens together into an angle_string_literal token.
1068	SmallString<`128`> FilenameBuffer;
1069	if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1070	bool StartOfLine = FilenameTok.isAtStartOfLine();
1071	bool LeadingSpace = FilenameTok.hasLeadingSpace();
1072	bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1073
1074	SourceLocation Start = FilenameTok.getLocation();
1075	SourceLocation End;
1076	FilenameBuffer.push_back(Elt: `'<'`);
1077
1078	// Consume tokens until we find a '>'.
1079	// FIXME: A header-name could be formed starting or ending with an
1080	// alternative token. It's not clear whether that's ill-formed in all
1081	// cases.
1082	while (FilenameTok.isNot(K: tok::greater)) {
1083	Lex(Result&: FilenameTok);
1084	if (FilenameTok.isOneOf(Ks: tok::eod, Ks: tok::eof)) {
1085	Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_expected) << tok::greater;
1086	Diag(Loc: Start, DiagID: diag::note_matching) << tok::less;
1087	return true;
1088	}
1089
1090	End = FilenameTok.getLocation();
1091
1092	// FIXME: Provide code completion for #includes.
1093	if (FilenameTok.is(K: tok::code_completion)) {
1094	setCodeCompletionReached();
1095	Lex(Result&: FilenameTok);
1096	continue;
1097	}
1098
1099	// Append the spelling of this token to the buffer. If there was a space
1100	// before it, add it now.
1101	if (FilenameTok.hasLeadingSpace())
1102	FilenameBuffer.push_back(Elt: `' '`);
1103
1104	// Get the spelling of the token, directly into FilenameBuffer if
1105	// possible.
1106	size_t PreAppendSize = FilenameBuffer.size();
1107	FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1108
1109	const char *BufPtr = &FilenameBuffer [PreAppendSize];
1110	unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1111
1112	// If the token was spelled somewhere else, copy it into FilenameBuffer.
1113	if (BufPtr != &FilenameBuffer [PreAppendSize])
1114	memcpy(dest: &FilenameBuffer [PreAppendSize], src: BufPtr, n: ActualLen);
1115
1116	// Resize FilenameBuffer to the correct size.
1117	if (FilenameTok.getLength() != ActualLen)
1118	FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1119	}
1120
1121	FilenameTok.startToken();
1122	FilenameTok.setKind(tok::header_name);
1123	FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1124	FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1125	FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1126	CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1127	} else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1128	// Convert a string-literal token of the form " h-char-sequence "
1129	// (produced by macro expansion) into a header-name token.
1130	//
1131	// The rules for header-names don't quite match the rules for
1132	// string-literals, but all the places where they differ result in
1133	// undefined behavior, so we can and do treat them the same.
1134	//
1135	// A string-literal with a prefix or suffix is not translated into a
1136	// header-name. This could theoretically be observable via the C++20
1137	// context-sensitive header-name formation rules.
1138	StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1139	if (Str.size() >= `2` && Str.front() == `'"'` && Str.back() == `'"'`)
1140	FilenameTok.setKind(tok::header_name);
1141	}
1142
1143	return false;
1144	}
1145
1146	std::optional<Token> Preprocessor::peekNextPPToken() const {
1147	// Do some quick tests for rejection cases.
1148	std::optional<Token> Val;
1149	if (CurLexer)
1150	Val = CurLexer ->peekNextPPToken();
1151	else
1152	Val = CurTokenLexer ->peekNextPPToken();
1153
1154	if (!Val) {
1155	// We have run off the end. If it's a source file we don't
1156	// examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
1157	// macro stack.
1158	if (CurPPLexer)
1159	return std::nullopt;
1160	for (const IncludeStackInfo &Entry : llvm::reverse(C: IncludeMacroStack)) {
1161	if (Entry.TheLexer)
1162	Val = Entry.TheLexer ->peekNextPPToken();
1163	else
1164	Val = Entry.TheTokenLexer ->peekNextPPToken();
1165
1166	if (Val)
1167	break;
1168
1169	// Ran off the end of a source file?
1170	if (Entry.ThePPLexer)
1171	return std::nullopt;
1172	}
1173	}
1174
1175	// Okay, we found the token and return. Otherwise we found the end of the
1176	// translation unit.
1177	return Val;
1178	}
1179
1180	// We represent the primary and partition names as 'Paths' which are sections
1181	// of the hierarchical access path for a clang module. However for C++20
1182	// the periods in a name are just another character, and we will need to
1183	// flatten them into a string.
1184	std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) {
1185	std::string Name;
1186	if (Path.empty())
1187	return Name;
1188
1189	for (auto &Piece : Path) {
1190	assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
1191	if (!Name.empty())
1192	Name += ".";
1193	Name += Piece.getIdentifierInfo()->getName();
1194	}
1195	return Name;
1196	}
1197
1198	ModuleNameLoc *ModuleNameLoc::Create(Preprocessor &PP, ModuleIdPath Path) {
1199	assert(!Path.empty() && "expect at least one identifier in a module name");
1200	void *Mem = PP.getPreprocessorAllocator().Allocate(
1201	Size: totalSizeToAlloc<IdentifierLoc>(Counts: Path.size()), Alignment: alignof(ModuleNameLoc));
1202	return new (Mem) ModuleNameLoc (Path);
1203	}
1204
1205	bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc,
1206	SmallVectorImpl<Token> &Suffix,
1207	SmallVectorImpl<IdentifierLoc> &Path,
1208	bool AllowMacroExpansion,
1209	bool IsPartition) {
1210	auto ConsumeToken = [&]() {
1211	if (AllowMacroExpansion)
1212	Lex(Result&: Tok);
1213	else
1214	LexUnexpandedToken(Result&: Tok);
1215	Suffix.push_back(Elt: Tok);
1216	};
1217
1218	while (true) {
1219	if (Tok.isNot(K: tok::identifier)) {
1220	if (Tok.is(K: tok::code_completion)) {
1221	CurLexer ->cutOffLexing();
1222	CodeComplete->CodeCompleteModuleImport(ImportLoc: UseLoc, Path);
1223	return true;
1224	}
1225
1226	Diag(Tok, DiagID: diag::err_pp_module_expected_ident) << Path.empty();
1227	return true;
1228	}
1229
1230	// [cpp.pre]/p2:
1231	// No identifier in the pp-module-name or pp-module-partition shall
1232	// currently be defined as an object-like macro.
1233	if (MacroInfo *MI = getMacroInfo(II: Tok.getIdentifierInfo());
1234	MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 &&
1235	!AllowMacroExpansion) {
1236	Diag(Tok, DiagID: diag::err_pp_module_name_is_macro)
1237	<< IsPartition << Tok.getIdentifierInfo();
1238	Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::note_macro_here)
1239	<< Tok.getIdentifierInfo();
1240	}
1241
1242	// Record this part of the module path.
1243	Path.emplace_back(Args: Tok.getLocation(), Args: Tok.getIdentifierInfo());
1244	ConsumeToken ();
1245
1246	if (Tok.isNot(K: tok::period))
1247	return false;
1248
1249	ConsumeToken ();
1250	}
1251	}
1252
1253	/// [cpp.pre]/p2:
1254	/// A preprocessing directive consists of a sequence of preprocessing tokens
1255	/// that satisfies the following constraints: At the start of translation phase
1256	/// 4, the first preprocessing token in the sequence, referred to as a
1257	/// directive-introducing token, begins with the first character in the source
1258	/// file (optionally after whitespace containing no new-line characters) or
1259	/// follows whitespace containing at least one new-line character, and is:
1260	/// - a # preprocessing token, or
1261	/// - an import preprocessing token immediately followed on the same logical
1262	/// source line by a header-name, <, identifier, or : preprocessing token, or
1263	/// - a module preprocessing token immediately followed on the same logical
1264	/// source line by an identifier, :, or ; preprocessing token, or
1265	/// - an export preprocessing token immediately followed on the same logical
1266	/// source line by one of the two preceding forms.
1267	///
1268	///
1269	/// At the start of phase 4 an import or module token is treated as starting a
1270	/// directive and are converted to their respective keywords iff:
1271	/// - After skipping horizontal whitespace are
1272	/// - at the start of a logical line, or
1273	/// - preceded by an 'export' at the start of the logical line.
1274	/// - Are followed by an identifier pp token (before macro expansion), or
1275	/// - <, ", or : (but not ::) pp tokens for 'import', or
1276	/// - ; for 'module'
1277	/// Otherwise the token is treated as an identifier.
1278	bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
1279	if (!getLangOpts().CPlusPlusModules \|\| !Result.isModuleContextualKeyword())
1280	return false;
1281
1282	if (Result.is(K: tok::kw_export)) {
1283	LastExportKeyword = Result;
1284	return false;
1285	}
1286
1287	/// Trait 'module' and 'import' as a identifier when the main file is a
1288	/// preprocessed module file. We only allow '__preprocessed_module' and
1289	/// '__preprocessed_import' in this context.
1290	IdentifierInfo *II = Result.getIdentifierInfo();
1291	if (isPreprocessedModuleFile() &&
1292	(II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_import)) \|\|
1293	II->isStr(Str: tok::getKeywordSpelling(Kind: tok::kw_module))))
1294	return false;
1295
1296	if (LastExportKeyword.is(K: tok::kw_export)) {
1297	// The export keyword was not at the start of line, it's not a
1298	// directive-introducing token.
1299	if (!LastExportKeyword.isAtPhysicalStartOfLine())
1300	return false;
1301	// [cpp.pre]/1.4
1302	// export // not a preprocessing directive
1303	// import foo; // preprocessing directive (ill-formed at phase7)
1304	if (Result.isAtPhysicalStartOfLine())
1305	return false;
1306	} else if (!Result.isAtPhysicalStartOfLine())
1307	return false;
1308
1309	llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
1310	CurPPLexer->ParsingPreprocessorDirective, true);
1311
1312	// The next token may be an angled string literal after import keyword.
1313	llvm::SaveAndRestore<bool> SavedParsingFilemame(
1314	CurPPLexer->ParsingFilename,
1315	Result.getIdentifierInfo()->isImportKeyword());
1316
1317	std::optional<Token> NextTok =
1318	CurLexer ? CurLexer ->peekNextPPToken() : CurTokenLexer ->peekNextPPToken();
1319	if (!NextTok)
1320	return false;
1321
1322	if (NextTok ->is(K: tok::raw_identifier))
1323	LookUpIdentifierInfo(Identifier&: *NextTok);
1324
1325	if (Result.getIdentifierInfo()->isImportKeyword()) {
1326	if (NextTok ->isOneOf(Ks: tok::identifier, Ks: tok::less, Ks: tok::colon,
1327	Ks: tok::header_name)) {
1328	Result.setKind(tok::kw_import);
1329	ModuleImportLoc = Result.getLocation();
1330	IsAtImport = false;
1331	return true;
1332	}
1333	}
1334
1335	if (Result.getIdentifierInfo()->isModuleKeyword() &&
1336	NextTok ->isOneOf(Ks: tok::identifier, Ks: tok::colon, Ks: tok::semi)) {
1337	Result.setKind(tok::kw_module);
1338	ModuleDeclLoc = Result.getLocation();
1339	return true;
1340	}
1341
1342	// Ok, it's an identifier.
1343	return false;
1344	}
1345
1346	bool Preprocessor::CollectPPImportSuffixAndEnterStream(
1347	SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
1348	CollectPPImportSuffix(Toks);
1349	EnterModuleSuffixTokenStream(Toks);
1350	return false;
1351	}
1352
1353	/// Collect the tokens of a C++20 pp-import-suffix.
1354	void Preprocessor::CollectPPImportSuffix(SmallVectorImpl<Token> &Toks,
1355	bool StopUntilEOD) {
1356	while (true) {
1357	Toks.emplace_back();
1358	Lex(Result&: Toks.back());
1359
1360	switch (Toks.back().getKind()) {
1361	case tok::semi:
1362	if (!StopUntilEOD)
1363	return;
1364	[[fallthrough]];
1365	case tok::eod:
1366	case tok::eof:
1367	return;
1368	default:
1369	break;
1370	}
1371	}
1372	}
1373
1374	// Allocate a holding buffer for a sequence of tokens and introduce it into
1375	// the token stream.
1376	void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef<Token> Toks) {
1377	if (Toks.empty())
1378	return;
1379	auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1380	std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1381	EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1382	/DisableMacroExpansion/ false, /IsReinject/ false);
1383	assert(CurTokenLexer && "Must have a TokenLexer");
1384	CurTokenLexer ->setLexingCXXModuleDirective();
1385	}
1386
1387	/// Lex a token following the 'import' contextual keyword.
1388	///
1389	/// pp-import: [C++20]
1390	/// import header-name pp-import-suffix[opt] ;
1391	/// import header-name-tokens pp-import-suffix[opt] ;
1392	/// [ObjC] @ import module-name ;
1393	/// [Clang] import module-name ;
1394	///
1395	/// header-name-tokens:
1396	/// string-literal
1397	/// < [any sequence of preprocessing-tokens other than >] >
1398	///
1399	/// module-name:
1400	/// module-name-qualifier[opt] identifier
1401	///
1402	/// module-name-qualifier
1403	/// module-name-qualifier[opt] identifier .
1404	///
1405	/// We respond to a pp-import by importing macros from the named module.
1406	bool Preprocessor::LexAfterModuleImport(Token &Result) {
1407	// Figure out what kind of lexer we actually have.
1408	recomputeCurLexerKind();
1409
1410	SmallVector<Token, `32`> Suffix;
1411	SmallVector<IdentifierLoc, `3`> Path;
1412	Lex(Result);
1413	if (LexModuleNameContinue(Tok&: Result, UseLoc: ModuleImportLoc, Suffix, Path))
1414	return CollectPPImportSuffixAndEnterStream(Toks&: Suffix);
1415
1416	ModuleNameLoc NameLoc = ModuleNameLoc::Create(PP&: this, Path);
1417	Suffix.clear();
1418	Suffix.emplace_back();
1419	Suffix.back().setKind(tok::annot_module_name);
1420	Suffix.back().setAnnotationRange(NameLoc->getRange());
1421	Suffix.back().setAnnotationValue(static_cast<void *>(NameLoc));
1422	Suffix.push_back(Elt: Result);
1423
1424	// Consume the pp-import-suffix and expand any macros in it now, if we're not
1425	// at the semicolon already.
1426	SourceLocation SemiLoc = Result.getLocation();
1427	if (Suffix.back().isNot(K: tok::semi)) {
1428	if (Suffix.back().isNot(K: tok::eof))
1429	CollectPPImportSuffix(Toks&: Suffix);
1430	if (Suffix.back().isNot(K: tok::semi)) {
1431	// This is not an import after all.
1432	EnterModuleSuffixTokenStream(Toks: Suffix);
1433	return false;
1434	}
1435	SemiLoc = Suffix.back().getLocation();
1436	}
1437
1438	Module Imported = nullptr*;
1439	if (getLangOpts().Modules) {
1440	Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc, Path, Visibility: Module::Hidden,
1441	/IsInclusionDirective=/false);
1442	if (Imported)
1443	makeModuleVisible(M: Imported, Loc: SemiLoc);
1444	}
1445
1446	if (Callbacks)
1447	Callbacks ->moduleImport(ImportLoc: ModuleImportLoc, Path, Imported);
1448
1449	if (!Suffix.empty()) {
1450	EnterModuleSuffixTokenStream(Toks: Suffix);
1451	return false;
1452	}
1453	return true;
1454	}
1455
1456	void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc,
1457	bool IncludeExports) {
1458	CurSubmoduleState->VisibleModules.setVisible(
1459	M, Loc, IncludeExports, Vis: [](Module *) {},
1460	Cb: [&](ArrayRef<Module > Path, Module Conflict, StringRef Message) {
1461	// FIXME: Include the path in the diagnostic.
1462	// FIXME: Include the import location for the conflicting module.
1463	Diag(Loc: ModuleImportLoc, DiagID: diag::warn_module_conflict)
1464	<< Path [`0`]->getFullModuleName()
1465	<< Conflict->getFullModuleName()
1466	<< Message;
1467	});
1468
1469	// Add this module to the imports list of the currently-built submodule.
1470	if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1471	BuildingSubmoduleStack.back().M->Imports.insert(X: M);
1472	}
1473
1474	bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1475	const char *DiagnosticTag,
1476	bool AllowMacroExpansion) {
1477	// We need at least one string literal.
1478	if (Result.isNot(K: tok::string_literal)) {
1479	Diag(Tok: Result, DiagID: diag::err_expected_string_literal)
1480	<< /Source='in...'/`0` << DiagnosticTag;
1481	return false;
1482	}
1483
1484	// Lex string literal tokens, optionally with macro expansion.
1485	SmallVector<Token, `4`> StrToks;
1486	do {
1487	StrToks.push_back(Elt: Result);
1488
1489	if (Result.hasUDSuffix())
1490	Diag(Tok: Result, DiagID: diag::err_invalid_string_udl);
1491
1492	if (AllowMacroExpansion)
1493	Lex(Result);
1494	else
1495	LexUnexpandedToken(Result);
1496	} while (Result.is(K: tok::string_literal));
1497
1498	// Concatenate and parse the strings.
1499	StringLiteralParser Literal(StrToks, *this);
1500	assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1501
1502	if (Literal.hadError)
1503	return false;
1504
1505	if (Literal.Pascal) {
1506	Diag(Loc: StrToks [`0`].getLocation(), DiagID: diag::err_expected_string_literal)
1507	<< /Source='in...'/`0` << DiagnosticTag;
1508	return false;
1509	}
1510
1511	String = std::string (Literal.GetString());
1512	return true;
1513	}
1514
1515	bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1516	assert(Tok.is(tok::numeric_constant));
1517	SmallString<`8`> IntegerBuffer;
1518	bool NumberInvalid = false;
1519	StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1520	if (NumberInvalid)
1521	return false;
1522	NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1523	getLangOpts(), getTargetInfo(),
1524	getDiagnostics());
1525	if (Literal.hadError \|\| !Literal.isIntegerLiteral() \|\| Literal.hasUDSuffix())
1526	return false;
1527	llvm::APInt APVal(`64`, `0`);
1528	if (Literal.GetIntegerValue(Val&: APVal))
1529	return false;
1530	Lex(Result&: Tok);
1531	Value = APVal.getLimitedValue();
1532	return true;
1533	}
1534
1535	void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1536	assert(Handler && "NULL comment handler");
1537	assert(!llvm::is_contained(CommentHandlers, Handler) &&
1538	"Comment handler already registered");
1539	CommentHandlers.push_back(x: Handler);
1540	}
1541
1542	void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1543	std::vector<CommentHandler *>::iterator Pos =
1544	llvm::find(Range&: CommentHandlers, Val: Handler);
1545	assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1546	CommentHandlers.erase(position: Pos);
1547	}
1548
1549	bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1550	bool AnyPendingTokens = false;
1551	for (CommentHandler *H : CommentHandlers) {
1552	if (H->HandleComment(PP&: *this, Comment))
1553	AnyPendingTokens = true;
1554	}
1555	if (!AnyPendingTokens \|\| getCommentRetentionState())
1556	return false;
1557	Lex(Result&: result);
1558	return true;
1559	}
1560
1561	void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1562	const MacroAnnotations &A =
1563	getMacroAnnotations(II: Identifier.getIdentifierInfo());
1564	assert(A.DeprecationInfo &&
1565	"Macro deprecation warning without recorded annotation!");
1566	const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1567	if (Info.Message.empty())
1568	Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1569	<< Identifier.getIdentifierInfo() << `0`;
1570	else
1571	Diag(Tok: Identifier, DiagID: diag::warn_pragma_deprecated_macro_use)
1572	<< Identifier.getIdentifierInfo() << `1` << Info.Message;
1573	Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << `0`;
1574	}
1575
1576	void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1577	const MacroAnnotations &A =
1578	getMacroAnnotations(II: Identifier.getIdentifierInfo());
1579	assert(A.RestrictExpansionInfo &&
1580	"Macro restricted expansion warning without recorded annotation!");
1581	const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1582	if (Info.Message.empty())
1583	Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1584	<< Identifier.getIdentifierInfo() << `0`;
1585	else
1586	Diag(Tok: Identifier, DiagID: diag::warn_pragma_restrict_expansion_macro_use)
1587	<< Identifier.getIdentifierInfo() << `1` << Info.Message;
1588	Diag(Loc: Info.Location, DiagID: diag::note_pp_macro_annotation) << `1`;
1589	}
1590
1591	void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1592	unsigned DiagSelection) const {
1593	Diag(Tok: Identifier, DiagID: diag::warn_fp_nan_inf_when_disabled) << DiagSelection << `1`;
1594	}
1595
1596	void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1597	bool IsUndef) const {
1598	const MacroAnnotations &A =
1599	getMacroAnnotations(II: Identifier.getIdentifierInfo());
1600	assert(A.FinalAnnotationLoc &&
1601	"Final macro warning without recorded annotation!");
1602
1603	Diag(Tok: Identifier, DiagID: diag::warn_pragma_final_macro)
1604	<< Identifier.getIdentifierInfo() << (IsUndef ? `0` : `1`);
1605	Diag(Loc: *A.FinalAnnotationLoc, DiagID: diag::note_pp_macro_annotation) << `2`;
1606	}
1607
1608	bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1609	const SourceLocation &Loc) const {
1610	// The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1611	// region map:
1612	auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1613	const SourceLocation &Loc) -> bool {
1614	// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1615	auto FirstRegionEndingAfterLoc = llvm::partition_point(
1616	Range: Map, P: [&SourceMgr,
1617	&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1618	return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1619	});
1620
1621	if (FirstRegionEndingAfterLoc != Map.end()) {
1622	// To test if the start location of the found region precedes `Loc`:
1623	return SourceMgr.isBeforeInTranslationUnit(
1624	LHS: FirstRegionEndingAfterLoc->first, RHS: Loc);
1625	}
1626	// If we do not find a region whose end location passes `Loc`, we want to
1627	// check if the current region is still open:
1628	if (!Map.empty() && Map.back().first == Map.back().second)
1629	return SourceMgr.isBeforeInTranslationUnit(LHS: Map.back().first, RHS: Loc);
1630	return false;
1631	};
1632
1633	// What the following does:
1634	//
1635	// If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1636	// Otherwise, `Loc` is from a loaded AST. We look up the
1637	// `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1638	// loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1639	// region w.r.t. the region map. If the region map is absent, it means there
1640	// is no opt-out pragma in that loaded AST.
1641	//
1642	// Opt-out pragmas in the local TU or a loaded AST is not visible to another
1643	// one of them. That means if you put the pragmas around a `#include
1644	// "module.h"`, where module.h is a module, it is not actually suppressing
1645	// warnings in module.h. This is fine because warnings in module.h will be
1646	// reported when module.h is compiled in isolation and nothing in module.h
1647	// will be analyzed ever again. So you will not see warnings from the file
1648	// that imports module.h anyway. And you can't even do the same thing for PCHs
1649	// because they can only be included from the command line.
1650
1651	if (SourceMgr.isLocalSourceLocation(Loc))
1652	return TestInMap (SafeBufferOptOutMap, Loc);
1653
1654	const SafeBufferOptOutRegionsTy *LoadedRegions =
1655	LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SrcMgr: SourceMgr);
1656
1657	if (LoadedRegions)
1658	return TestInMap (*LoadedRegions, Loc);
1659	return false;
1660	}
1661
1662	bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1663	bool isEnter, const SourceLocation &Loc) {
1664	if (isEnter) {
1665	if (isPPInSafeBufferOptOutRegion())
1666	return true; // invalid enter action
1667	InSafeBufferOptOutRegion = true;
1668	CurrentSafeBufferOptOutStart = Loc;
1669
1670	// To set the start location of a new region:
1671
1672	if (!SafeBufferOptOutMap.empty()) {
1673	[[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1674	assert(PrevRegion->first != PrevRegion->second &&
1675	"Shall not begin a safe buffer opt-out region before closing the "
1676	"previous one.");
1677	}
1678	// If the start location equals to the end location, we call the region a
1679	// open region or a unclosed region (i.e., end location has not been set
1680	// yet).
1681	SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1682	} else {
1683	if (!isPPInSafeBufferOptOutRegion())
1684	return true; // invalid enter action
1685	InSafeBufferOptOutRegion = false;
1686
1687	// To set the end location of the current open region:
1688
1689	assert(!SafeBufferOptOutMap.empty() &&
1690	"Misordered safe buffer opt-out regions");
1691	auto *CurrRegion = &SafeBufferOptOutMap.back();
1692	assert(CurrRegion->first == CurrRegion->second &&
1693	"Set end location to a closed safe buffer opt-out region");
1694	CurrRegion->second = Loc;
1695	}
1696	return false;
1697	}
1698
1699	bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1700	return InSafeBufferOptOutRegion;
1701	}
1702	bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1703	StartLoc = CurrentSafeBufferOptOutStart;
1704	return InSafeBufferOptOutRegion;
1705	}
1706
1707	SmallVector<SourceLocation, `64`>
1708	Preprocessor::serializeSafeBufferOptOutMap() const {
1709	assert(!InSafeBufferOptOutRegion &&
1710	"Attempt to serialize safe buffer opt-out regions before file being "
1711	"completely preprocessed");
1712
1713	SmallVector<SourceLocation, `64`> SrcSeq;
1714
1715	for (const auto &[begin, end] : SafeBufferOptOutMap) {
1716	SrcSeq.push_back(Elt: begin);
1717	SrcSeq.push_back(Elt: end);
1718	}
1719	// Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1720	// `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1721	// pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1722	// It means that for each loading pch/module m, it just needs to load m's own
1723	// `SafeBufferOptOutMap`.
1724	return SrcSeq;
1725	}
1726
1727	bool Preprocessor::setDeserializedSafeBufferOptOutMap(
1728	const SmallVectorImpl<SourceLocation> &SourceLocations) {
1729	if (SourceLocations.size() == `0`)
1730	return false;
1731
1732	assert(SourceLocations.size() % `2` == `0` &&
1733	"ill-formed SourceLocation sequence");
1734
1735	auto It = SourceLocations.begin();
1736	SafeBufferOptOutRegionsTy &Regions =
1737	LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(Loc: *It, SrcMgr&: SourceMgr);
1738
1739	do {
1740	SourceLocation Begin = *It++;
1741	SourceLocation End = *It++;
1742
1743	Regions.emplace_back(Args&: Begin, Args&: End);
1744	} while (It != SourceLocations.end());
1745	return true;
1746	}
1747
1748	ModuleLoader::~ModuleLoader() = default;
1749
1750	CommentHandler::~CommentHandler() = default;
1751
1752	EmptylineHandler::~EmptylineHandler() = default;
1753
1754	CodeCompletionHandler::~CodeCompletionHandler() = default;
1755
1756	void Preprocessor::createPreprocessingRecord() {
1757	if (Record)
1758	return;
1759
1760	Record = new PreprocessingRecord (getSourceManager());
1761	addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1762	}
1763
1764	const char Preprocessor::getCheckPoint(FileID FID, const* char Start) const* {
1765	if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1766	const SmallVector<const char *> &FileCheckPoints = It ->second;
1767	const char Last = nullptr*;
1768	// FIXME: Do better than a linear search.
1769	for (const char *P : FileCheckPoints) {
1770	if (P > Start)
1771	break;
1772	Last = P;
1773	}
1774	return Last;
1775	}
1776
1777	return nullptr;
1778	}
1779
1780	bool Preprocessor::hasSeenNoTrivialPPDirective() const {
1781	return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1782	}
1783
1784	bool NoTrivialPPDirectiveTracer::hasSeenNoTrivialPPDirective() const {
1785	return SeenNoTrivialPPDirective;
1786	}
1787
1788	void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1789	if (InMainFile && !SeenNoTrivialPPDirective)
1790	SeenNoTrivialPPDirective = true;
1791	}
1792
1793	void NoTrivialPPDirectiveTracer::LexedFileChanged(
1794	FileID FID, LexedFileChangeReason Reason,
1795	SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) {
1796	InMainFile = (FID == PP.getSourceManager().getMainFileID());
1797	}
1798
1799	void NoTrivialPPDirectiveTracer::MacroExpands(const Token &MacroNameTok,
1800	const MacroDefinition &MD,
1801	SourceRange Range,
1802	const MacroArgs *Args) {
1803	// FIXME: Does only enable builtin macro expansion make sense?
1804	if (!MD.getMacroInfo()->isBuiltinMacro())
1805	setSeenNoTrivialPPDirective();
1806	}
1807

Browse the source code of llvm_projects/clang/lib/Lex/Preprocessor.cpp