1//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implements # directive processing for the Preprocessor.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/AttributeCommonInfo.h"
15#include "clang/Basic/Attributes.h"
16#include "clang/Basic/CharInfo.h"
17#include "clang/Basic/DirectoryEntry.h"
18#include "clang/Basic/FileManager.h"
19#include "clang/Basic/IdentifierTable.h"
20#include "clang/Basic/LangOptions.h"
21#include "clang/Basic/Module.h"
22#include "clang/Basic/SourceLocation.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Basic/TargetInfo.h"
25#include "clang/Basic/TokenKinds.h"
26#include "clang/Lex/CodeCompletionHandler.h"
27#include "clang/Lex/HeaderSearch.h"
28#include "clang/Lex/LexDiagnostic.h"
29#include "clang/Lex/LiteralSupport.h"
30#include "clang/Lex/MacroInfo.h"
31#include "clang/Lex/ModuleLoader.h"
32#include "clang/Lex/ModuleMap.h"
33#include "clang/Lex/PPCallbacks.h"
34#include "clang/Lex/Pragma.h"
35#include "clang/Lex/Preprocessor.h"
36#include "clang/Lex/PreprocessorOptions.h"
37#include "clang/Lex/Token.h"
38#include "clang/Lex/VariadicMacroSupport.h"
39#include "llvm/ADT/ArrayRef.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/ScopeExit.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringExtras.h"
44#include "llvm/ADT/StringRef.h"
45#include "llvm/ADT/StringSwitch.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/Path.h"
48#include "llvm/Support/SaveAndRestore.h"
49#include <algorithm>
50#include <cassert>
51#include <cstddef>
52#include <cstring>
53#include <optional>
54#include <string>
55#include <utility>
56
57using namespace clang;
58
59//===----------------------------------------------------------------------===//
60// Utility Methods for Preprocessor Directive Handling.
61//===----------------------------------------------------------------------===//
62
63MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
64 static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
65 return new (BP) MacroInfo(L);
66}
67
68DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
69 SourceLocation Loc) {
70 return new (BP) DefMacroDirective(MI, Loc);
71}
72
73UndefMacroDirective *
74Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
75 return new (BP) UndefMacroDirective(UndefLoc);
76}
77
78VisibilityMacroDirective *
79Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
80 bool isPublic) {
81 return new (BP) VisibilityMacroDirective(Loc, isPublic);
82}
83
84/// Read and discard all tokens remaining on the current line until
85/// the tok::eod token is found.
86SourceRange Preprocessor::DiscardUntilEndOfDirective(
87 Token &Tmp, SmallVectorImpl<Token> *DiscardedToks) {
88 SourceRange Res;
89 auto ReadNextTok = [&]() {
90 LexUnexpandedToken(Result&: Tmp);
91 if (DiscardedToks && Tmp.isNot(K: tok::eod))
92 DiscardedToks->push_back(Elt: Tmp);
93 };
94 ReadNextTok();
95 Res.setBegin(Tmp.getLocation());
96 while (Tmp.isNot(K: tok::eod)) {
97 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
98 ReadNextTok();
99 }
100 Res.setEnd(Tmp.getLocation());
101 return Res;
102}
103
104/// Enumerates possible cases of #define/#undef a reserved identifier.
105enum MacroDiag {
106 MD_NoWarn, //> Not a reserved identifier
107 MD_KeywordDef, //> Macro hides keyword, enabled by default
108 MD_ReservedMacro, //> #define of #undef reserved id, disabled by default
109 MD_ReservedAttributeIdentifier
110};
111
112/// Enumerates possible %select values for the pp_err_elif_after_else and
113/// pp_err_elif_without_if diagnostics.
114enum PPElifDiag {
115 PED_Elif,
116 PED_Elifdef,
117 PED_Elifndef
118};
119
120static bool isFeatureTestMacro(StringRef MacroName) {
121 // list from:
122 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
123 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
124 // * man 7 feature_test_macros
125 // The list must be sorted for correct binary search.
126 static constexpr StringRef ReservedMacro[] = {
127 "_ATFILE_SOURCE",
128 "_BSD_SOURCE",
129 "_CRT_NONSTDC_NO_WARNINGS",
130 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
131 "_CRT_SECURE_NO_WARNINGS",
132 "_FILE_OFFSET_BITS",
133 "_FORTIFY_SOURCE",
134 "_GLIBCXX_ASSERTIONS",
135 "_GLIBCXX_CONCEPT_CHECKS",
136 "_GLIBCXX_DEBUG",
137 "_GLIBCXX_DEBUG_PEDANTIC",
138 "_GLIBCXX_PARALLEL",
139 "_GLIBCXX_PARALLEL_ASSERTIONS",
140 "_GLIBCXX_SANITIZE_VECTOR",
141 "_GLIBCXX_USE_CXX11_ABI",
142 "_GLIBCXX_USE_DEPRECATED",
143 "_GNU_SOURCE",
144 "_ISOC11_SOURCE",
145 "_ISOC95_SOURCE",
146 "_ISOC99_SOURCE",
147 "_LARGEFILE64_SOURCE",
148 "_POSIX_C_SOURCE",
149 "_REENTRANT",
150 "_SVID_SOURCE",
151 "_THREAD_SAFE",
152 "_XOPEN_SOURCE",
153 "_XOPEN_SOURCE_EXTENDED",
154 "__STDCPP_WANT_MATH_SPEC_FUNCS__",
155 "__STDC_FORMAT_MACROS",
156 };
157 return llvm::binary_search(Range: ReservedMacro, Value&: MacroName);
158}
159
160static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
161 const MacroInfo *MI,
162 const StringRef MacroName) {
163 // If this is a macro with special handling (like __LINE__) then it's language
164 // defined.
165 if (MI->isBuiltinMacro())
166 return true;
167 // Builtin macros are defined in the builtin file
168 if (!SourceMgr.isWrittenInBuiltinFile(Loc: MI->getDefinitionLoc()))
169 return false;
170 // C defines macros starting with __STDC, and C++ defines macros starting with
171 // __STDCPP
172 if (MacroName.starts_with(Prefix: "__STDC"))
173 return true;
174 // C++ defines the __cplusplus macro
175 if (MacroName == "__cplusplus")
176 return true;
177 // C++ defines various feature-test macros starting with __cpp
178 if (MacroName.starts_with(Prefix: "__cpp"))
179 return true;
180 // Anything else isn't language-defined
181 return false;
182}
183
184static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
185 const LangOptions &Lang = PP.getLangOpts();
186 if (Lang.CPlusPlus &&
187 hasAttribute(Syntax: AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, Attr: II,
188 Target: PP.getTargetInfo(), LangOpts: Lang, /*CheckPlugins*/ false) > 0) {
189 AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
190 AttributeCommonInfo::getCXX11AttrArgsInfo(Name: II);
191 if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
192 return PP.isNextPPTokenOneOf(Ks: tok::l_paren);
193
194 return !PP.isNextPPTokenOneOf(Ks: tok::l_paren) ||
195 AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
196 }
197 return false;
198}
199
200static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
201 const LangOptions &Lang = PP.getLangOpts();
202 StringRef Text = II->getName();
203 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
204 return isFeatureTestMacro(MacroName: Text) ? MD_NoWarn : MD_ReservedMacro;
205 if (II->isKeyword(LangOpts: Lang))
206 return MD_KeywordDef;
207 if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
208 return MD_KeywordDef;
209 if (isReservedCXXAttributeName(PP, II))
210 return MD_ReservedAttributeIdentifier;
211 return MD_NoWarn;
212}
213
214static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
215 const LangOptions &Lang = PP.getLangOpts();
216 // Do not warn on keyword undef. It is generally harmless and widely used.
217 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
218 return MD_ReservedMacro;
219 if (isReservedCXXAttributeName(PP, II))
220 return MD_ReservedAttributeIdentifier;
221 return MD_NoWarn;
222}
223
224// Return true if we want to issue a diagnostic by default if we
225// encounter this name in a #include with the wrong case. For now,
226// this includes the standard C and C++ headers, Posix headers,
227// and Boost headers. Improper case for these #includes is a
228// potential portability issue.
229static bool warnByDefaultOnWrongCase(StringRef Include) {
230 // If the first component of the path is "boost", treat this like a standard header
231 // for the purposes of diagnostics.
232 if (::llvm::sys::path::begin(path: Include)->equals_insensitive(RHS: "boost"))
233 return true;
234
235 // "condition_variable" is the longest standard header name at 18 characters.
236 // If the include file name is longer than that, it can't be a standard header.
237 static const size_t MaxStdHeaderNameLen = 18u;
238 if (Include.size() > MaxStdHeaderNameLen)
239 return false;
240
241 // Lowercase and normalize the search string.
242 SmallString<32> LowerInclude{Include};
243 for (char &Ch : LowerInclude) {
244 // In the ASCII range?
245 if (static_cast<unsigned char>(Ch) > 0x7f)
246 return false; // Can't be a standard header
247 // ASCII lowercase:
248 if (Ch >= 'A' && Ch <= 'Z')
249 Ch += 'a' - 'A';
250 // Normalize path separators for comparison purposes.
251 else if (::llvm::sys::path::is_separator(value: Ch))
252 Ch = '/';
253 }
254
255 // The standard C/C++ and Posix headers
256 return llvm::StringSwitch<bool>(LowerInclude)
257 // C library headers
258 .Cases(CaseStrings: {"assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h"}, Value: true)
259 .Cases(CaseStrings: {"float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h"},
260 Value: true)
261 .Cases(CaseStrings: {"math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h"}, Value: true)
262 .Cases(CaseStrings: {"stdatomic.h", "stdbool.h", "stdckdint.h", "stdcountof.h"}, Value: true)
263 .Cases(CaseStrings: {"stddef.h", "stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h"},
264 Value: true)
265 .Cases(CaseStrings: {"string.h", "tgmath.h", "threads.h", "time.h", "uchar.h"}, Value: true)
266 .Cases(CaseStrings: {"wchar.h", "wctype.h"}, Value: true)
267
268 // C++ headers for C library facilities
269 .Cases(CaseStrings: {"cassert", "ccomplex", "cctype", "cerrno", "cfenv"}, Value: true)
270 .Cases(CaseStrings: {"cfloat", "cinttypes", "ciso646", "climits", "clocale"}, Value: true)
271 .Cases(CaseStrings: {"cmath", "csetjmp", "csignal", "cstdalign", "cstdarg"}, Value: true)
272 .Cases(CaseStrings: {"cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib"}, Value: true)
273 .Cases(CaseStrings: {"cstring", "ctgmath", "ctime", "cuchar", "cwchar"}, Value: true)
274 .Case(S: "cwctype", Value: true)
275
276 // C++ library headers
277 .Cases(CaseStrings: {"algorithm", "fstream", "list", "regex", "thread"}, Value: true)
278 .Cases(CaseStrings: {"array", "functional", "locale", "scoped_allocator", "tuple"},
279 Value: true)
280 .Cases(CaseStrings: {"atomic", "future", "map", "set", "type_traits"}, Value: true)
281 .Cases(
282 CaseStrings: {"bitset", "initializer_list", "memory", "shared_mutex", "typeindex"},
283 Value: true)
284 .Cases(CaseStrings: {"chrono", "iomanip", "mutex", "sstream", "typeinfo"}, Value: true)
285 .Cases(CaseStrings: {"codecvt", "ios", "new", "stack", "unordered_map"}, Value: true)
286 .Cases(CaseStrings: {"complex", "iosfwd", "numeric", "stdexcept", "unordered_set"},
287 Value: true)
288 .Cases(
289 CaseStrings: {"condition_variable", "iostream", "ostream", "streambuf", "utility"},
290 Value: true)
291 .Cases(CaseStrings: {"deque", "istream", "queue", "string", "valarray"}, Value: true)
292 .Cases(CaseStrings: {"exception", "iterator", "random", "strstream", "vector"}, Value: true)
293 .Cases(CaseStrings: {"forward_list", "limits", "ratio", "system_error"}, Value: true)
294
295 // POSIX headers (which aren't also C headers)
296 .Cases(CaseStrings: {"aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h"}, Value: true)
297 .Cases(CaseStrings: {"fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h"}, Value: true)
298 .Cases(CaseStrings: {"grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h"}, Value: true)
299 .Cases(CaseStrings: {"mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h"},
300 Value: true)
301 .Cases(CaseStrings: {"netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h"},
302 Value: true)
303 .Cases(CaseStrings: {"regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h"}, Value: true)
304 .Cases(CaseStrings: {"strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h"},
305 Value: true)
306 .Cases(CaseStrings: {"sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h",
307 "sys/socket.h"},
308 Value: true)
309 .Cases(CaseStrings: {"sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h",
310 "sys/types.h"},
311 Value: true)
312 .Cases(
313 CaseStrings: {"sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h"},
314 Value: true)
315 .Cases(CaseStrings: {"tar.h", "termios.h", "trace.h", "ulimit.h"}, Value: true)
316 .Cases(CaseStrings: {"unistd.h", "utime.h", "utmpx.h", "wordexp.h"}, Value: true)
317 .Default(Value: false);
318}
319
320/// Find a similar string in `Candidates`.
321///
322/// \param LHS a string for a similar string in `Candidates`
323///
324/// \param Candidates the candidates to find a similar string.
325///
326/// \returns a similar string if exists. If no similar string exists,
327/// returns std::nullopt.
328static std::optional<StringRef>
329findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
330 // We need to check if `Candidates` has the exact case-insensitive string
331 // because the Levenshtein distance match does not care about it.
332 for (StringRef C : Candidates) {
333 if (LHS.equals_insensitive(RHS: C)) {
334 return C;
335 }
336 }
337
338 // Keep going with the Levenshtein distance match.
339 // If the LHS size is less than 3, use the LHS size minus 1 and if not,
340 // use the LHS size divided by 3.
341 size_t Length = LHS.size();
342 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
343
344 std::optional<std::pair<StringRef, size_t>> SimilarStr;
345 for (StringRef C : Candidates) {
346 size_t CurDist = LHS.edit_distance(Other: C, AllowReplacements: true);
347 if (CurDist <= MaxDist) {
348 if (!SimilarStr) {
349 // The first similar string found.
350 SimilarStr = {C, CurDist};
351 } else if (CurDist < SimilarStr->second) {
352 // More similar string found.
353 SimilarStr = {C, CurDist};
354 }
355 }
356 }
357
358 if (SimilarStr) {
359 return SimilarStr->first;
360 } else {
361 return std::nullopt;
362 }
363}
364
365bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
366 bool *ShadowFlag) {
367 // Missing macro name?
368 if (MacroNameTok.is(K: tok::eod))
369 return Diag(Tok: MacroNameTok, DiagID: diag::err_pp_missing_macro_name);
370
371 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
372 if (!II)
373 return Diag(Tok: MacroNameTok, DiagID: diag::err_pp_macro_not_identifier);
374
375 if (II->isCPlusPlusOperatorKeyword()) {
376 // C++ 2.5p2: Alternative tokens behave the same as its primary token
377 // except for their spellings.
378 Diag(Tok: MacroNameTok, DiagID: getLangOpts().MicrosoftExt
379 ? diag::ext_pp_operator_used_as_macro_name
380 : diag::err_pp_operator_used_as_macro_name)
381 << II << MacroNameTok.getKind();
382 // Allow #defining |and| and friends for Microsoft compatibility or
383 // recovery when legacy C headers are included in C++.
384 }
385
386 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
387 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
388 return Diag(Tok: MacroNameTok, DiagID: diag::err_defined_macro_name);
389 }
390
391 // If defining/undefining reserved identifier or a keyword, we need to issue
392 // a warning.
393 SourceLocation MacroNameLoc = MacroNameTok.getLocation();
394 if (ShadowFlag)
395 *ShadowFlag = false;
396 // Macro names with reserved identifiers are accepted if built-in or passed
397 // through the command line (the later may be present if -dD was used to
398 // generate the preprocessed file).
399 // NB: isInPredefinedFile() is relatively expensive, so keep it at the end
400 // of the condition.
401 if (!SourceMgr.isInSystemHeader(Loc: MacroNameLoc) &&
402 !SourceMgr.isInPredefinedFile(Loc: MacroNameLoc)) {
403 MacroDiag D = MD_NoWarn;
404 if (isDefineUndef == MU_Define) {
405 D = shouldWarnOnMacroDef(PP&: *this, II);
406 }
407 else if (isDefineUndef == MU_Undef)
408 D = shouldWarnOnMacroUndef(PP&: *this, II);
409 if (D == MD_KeywordDef) {
410 // We do not want to warn on some patterns widely used in configuration
411 // scripts. This requires analyzing next tokens, so do not issue warnings
412 // now, only inform caller.
413 if (ShadowFlag)
414 *ShadowFlag = true;
415 }
416 if (D == MD_ReservedMacro)
417 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_is_reserved_id);
418 if (D == MD_ReservedAttributeIdentifier)
419 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_is_reserved_attribute_id)
420 << II->getName();
421 }
422
423 // Okay, we got a good identifier.
424 return false;
425}
426
427/// Lex and validate a macro name, which occurs after a
428/// \#define or \#undef.
429///
430/// This sets the token kind to eod and discards the rest of the macro line if
431/// the macro name is invalid.
432///
433/// \param MacroNameTok Token that is expected to be a macro name.
434/// \param isDefineUndef Context in which macro is used.
435/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
436void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
437 bool *ShadowFlag) {
438 // Read the token, don't allow macro expansion on it.
439 LexUnexpandedToken(Result&: MacroNameTok);
440
441 if (MacroNameTok.is(K: tok::code_completion)) {
442 if (CodeComplete)
443 CodeComplete->CodeCompleteMacroName(IsDefinition: isDefineUndef == MU_Define);
444 setCodeCompletionReached();
445 LexUnexpandedToken(Result&: MacroNameTok);
446 }
447
448 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
449 return;
450
451 // Invalid macro name, read and discard the rest of the line and set the
452 // token kind to tok::eod if necessary.
453 if (MacroNameTok.isNot(K: tok::eod)) {
454 MacroNameTok.setKind(tok::eod);
455 DiscardUntilEndOfDirective();
456 }
457}
458
459/// Ensure that the next token is a tok::eod token.
460///
461/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
462/// true, then we consider macros that expand to zero tokens as being ok.
463///
464/// Returns the location of the end of the directive.
465SourceLocation
466Preprocessor::CheckEndOfDirective(StringRef DirType, bool EnableMacros,
467 SmallVectorImpl<Token> *ExtraToks) {
468 Token Tmp;
469 auto ReadNextTok = [this, ExtraToks, &Tmp](auto &&LexFn) {
470 std::invoke(LexFn, this, Tmp);
471 if (ExtraToks && Tmp.isNot(K: tok::eod))
472 ExtraToks->push_back(Elt: Tmp);
473 };
474 // Lex unexpanded tokens for most directives: macros might expand to zero
475 // tokens, causing us to miss diagnosing invalid lines. Some directives (like
476 // #line) allow empty macros.
477 if (EnableMacros)
478 ReadNextTok(&Preprocessor::Lex);
479 else
480 ReadNextTok(&Preprocessor::LexUnexpandedToken);
481
482 // There should be no tokens after the directive, but we allow them as an
483 // extension.
484 while (Tmp.is(K: tok::comment)) // Skip comments in -C mode.
485 ReadNextTok(&Preprocessor::LexUnexpandedToken);
486
487 if (Tmp.is(K: tok::eod))
488 return Tmp.getLocation();
489
490 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
491 // or if this is a macro-style preprocessing directive, because it is more
492 // trouble than it is worth to insert /**/ and check that there is no /**/
493 // in the range also.
494 FixItHint Hint;
495 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
496 !CurTokenLexer)
497 Hint = FixItHint::CreateInsertion(InsertionLoc: Tmp.getLocation(),Code: "//");
498
499 unsigned DiagID = diag::ext_pp_extra_tokens_at_eol;
500 // C++20 import or module directive has no '#' prefix.
501 if (getLangOpts().CPlusPlusModules &&
502 (DirType == "import" || DirType == "module"))
503 DiagID = diag::warn_pp_extra_tokens_at_module_directive_eol;
504
505 Diag(Tok: Tmp, DiagID) << DirType << Hint;
506 return DiscardUntilEndOfDirective(DiscardedToks: ExtraToks).getEnd();
507}
508
509void Preprocessor::SuggestTypoedDirective(const Token &Tok,
510 StringRef Directive) const {
511 // If this is a `.S` file, treat unknown # directives as non-preprocessor
512 // directives.
513 if (getLangOpts().AsmPreprocessor) return;
514
515 std::vector<StringRef> Candidates = {
516 "if", "ifdef", "ifndef", "elif", "else", "endif"
517 };
518 if (LangOpts.C23 || LangOpts.CPlusPlus23)
519 Candidates.insert(position: Candidates.end(), l: {"elifdef", "elifndef"});
520
521 if (std::optional<StringRef> Sugg = findSimilarStr(LHS: Directive, Candidates)) {
522 // Directive cannot be coming from macro.
523 assert(Tok.getLocation().isFileID());
524 CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
525 B: Tok.getLocation(),
526 E: Tok.getLocation().getLocWithOffset(Offset: Directive.size()));
527 StringRef SuggValue = *Sugg;
528
529 auto Hint = FixItHint::CreateReplacement(RemoveRange: DirectiveRange, Code: SuggValue);
530 Diag(Tok, DiagID: diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
531 }
532}
533
534/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
535/// decided that the subsequent tokens are in the \#if'd out portion of the
536/// file. Lex the rest of the file, until we see an \#endif. If
537/// FoundNonSkipPortion is true, then we have already emitted code for part of
538/// this \#if directive, so \#else/\#elif blocks should never be entered.
539/// If ElseOk is true, then \#else directives are ok, if not, then we have
540/// already seen one so a \#else directive is a duplicate. When this returns,
541/// the caller can lex the first valid token.
542void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
543 SourceLocation IfTokenLoc,
544 bool FoundNonSkipPortion,
545 bool FoundElse,
546 SourceLocation ElseLoc) {
547 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
548 // not getting called recursively by storing the RecordedSkippedRanges
549 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
550 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
551 // invalidated. If this changes and there is a need to call
552 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
553 // change to do a second lookup in endLexPass function instead of reusing the
554 // lookup pointer.
555 assert(!SkippingExcludedConditionalBlock &&
556 "calling SkipExcludedConditionalBlock recursively");
557 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
558
559 ++NumSkipped;
560 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
561 assert(CurPPLexer && "Conditional PP block must be in a file!");
562 assert(CurLexer && "Conditional PP block but no current lexer set!");
563
564 if (PreambleConditionalStack.reachedEOFWhileSkipping())
565 PreambleConditionalStack.clearSkipInfo();
566 else
567 CurPPLexer->pushConditionalLevel(DirectiveStart: IfTokenLoc, /*isSkipping*/ WasSkipping: false,
568 FoundNonSkip: FoundNonSkipPortion, FoundElse);
569
570 // Enter raw mode to disable identifier lookup (and thus macro expansion),
571 // disabling warnings, etc.
572 CurPPLexer->LexingRawMode = true;
573 Token Tok;
574 SourceLocation endLoc;
575
576 /// Keeps track and caches skipped ranges and also retrieves a prior skipped
577 /// range if the same block is re-visited.
578 struct SkippingRangeStateTy {
579 Preprocessor &PP;
580
581 const char *BeginPtr = nullptr;
582 unsigned *SkipRangePtr = nullptr;
583
584 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
585
586 void beginLexPass() {
587 if (BeginPtr)
588 return; // continue skipping a block.
589
590 // Initiate a skipping block and adjust the lexer if we already skipped it
591 // before.
592 BeginPtr = PP.CurLexer->getBufferLocation();
593 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
594 if (*SkipRangePtr) {
595 PP.CurLexer->seek(Offset: PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
596 /*IsAtStartOfLine*/ true);
597 }
598 }
599
600 void endLexPass(const char *Hashptr) {
601 if (!BeginPtr) {
602 // Not doing normal lexing.
603 assert(PP.CurLexer->isDependencyDirectivesLexer());
604 return;
605 }
606
607 // Finished skipping a block, record the range if it's first time visited.
608 if (!*SkipRangePtr) {
609 *SkipRangePtr = Hashptr - BeginPtr;
610 }
611 assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
612 BeginPtr = nullptr;
613 SkipRangePtr = nullptr;
614 }
615 } SkippingRangeState(*this);
616
617 while (true) {
618 if (CurLexer->isDependencyDirectivesLexer()) {
619 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Result&: Tok);
620 } else {
621 SkippingRangeState.beginLexPass();
622 while (true) {
623 CurLexer->Lex(Result&: Tok);
624
625 if (Tok.is(K: tok::code_completion)) {
626 setCodeCompletionReached();
627 if (CodeComplete)
628 CodeComplete->CodeCompleteInConditionalExclusion();
629 continue;
630 }
631
632 // There is actually no "skipped block" in the above because the module
633 // directive is not a text-line (https://wg21.link/cpp.pre#2) nor
634 // anything else that is allowed in a group
635 // (https://eel.is/c++draft/cpp.pre#nt:group-part).
636 //
637 // A preprocessor diagnostic (effective with -E) that triggers whenever
638 // a module directive is encountered where a control-line or a text-line
639 // is required.
640 if (getLangOpts().CPlusPlusModules && Tok.isAtStartOfLine() &&
641 Tok.is(K: tok::raw_identifier) &&
642 (Tok.getRawIdentifier() == "export" ||
643 Tok.getRawIdentifier() == "module")) {
644 llvm::SaveAndRestore ModuleDirectiveSkipping(LastExportKeyword);
645 LastExportKeyword.startToken();
646 LookUpIdentifierInfo(Identifier&: Tok);
647 IdentifierInfo *II = Tok.getIdentifierInfo();
648
649 if (II->getName()[0] == 'e') { // export
650 HandleModuleContextualKeyword(Result&: Tok);
651 CurLexer->Lex(Result&: Tok);
652 if (Tok.is(K: tok::raw_identifier)) {
653 LookUpIdentifierInfo(Identifier&: Tok);
654 II = Tok.getIdentifierInfo();
655 }
656 }
657
658 if (II->getName()[0] == 'm') { // module
659 // HandleModuleContextualKeyword changes the lexer state, so we need
660 // to save RawLexingMode
661 llvm::SaveAndRestore RestoreLexingRawMode(CurPPLexer->LexingRawMode,
662 false);
663 if (HandleModuleContextualKeyword(Result&: Tok)) {
664 // We just parsed a # character at the start of a line, so we're
665 // in directive mode. Tell the lexer this so any newlines we see
666 // will be converted into an EOD token (this terminates the
667 // macro).
668 CurPPLexer->ParsingPreprocessorDirective = true;
669 SourceLocation StartLoc = Tok.getLocation();
670 SourceLocation End = DiscardUntilEndOfDirective().getEnd();
671 Diag(Loc: StartLoc, DiagID: diag::err_pp_cond_span_module_decl)
672 << SourceRange(StartLoc, End);
673 CurPPLexer->ParsingPreprocessorDirective = false;
674 // Restore comment saving mode.
675 if (CurLexer)
676 CurLexer->resetExtendedTokenMode();
677 continue;
678 }
679 }
680 }
681
682 // If this is the end of the buffer, we have an error.
683 if (Tok.is(K: tok::eof)) {
684 // We don't emit errors for unterminated conditionals here,
685 // Lexer::LexEndOfFile can do that properly.
686 // Just return and let the caller lex after this #include.
687 if (PreambleConditionalStack.isRecording())
688 PreambleConditionalStack.SkipInfo.emplace(args&: HashTokenLoc, args&: IfTokenLoc,
689 args&: FoundNonSkipPortion,
690 args&: FoundElse, args&: ElseLoc);
691 break;
692 }
693
694 // If this token is not a preprocessor directive, just skip it.
695 if (Tok.isNot(K: tok::hash) || !Tok.isAtStartOfLine())
696 continue;
697
698 break;
699 }
700 }
701 if (Tok.is(K: tok::eof))
702 break;
703
704 // We just parsed a # character at the start of a line, so we're in
705 // directive mode. Tell the lexer this so any newlines we see will be
706 // converted into an EOD token (this terminates the macro).
707 CurPPLexer->ParsingPreprocessorDirective = true;
708 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
709
710 assert(Tok.is(tok::hash));
711 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
712 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
713
714 // Read the next token, the directive flavor.
715 LexUnexpandedToken(Result&: Tok);
716
717 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
718 // something bogus), skip it.
719 if (Tok.isNot(K: tok::raw_identifier)) {
720 CurPPLexer->ParsingPreprocessorDirective = false;
721 // Restore comment saving mode.
722 if (CurLexer) CurLexer->resetExtendedTokenMode();
723 continue;
724 }
725
726 // If the first letter isn't i or e, it isn't intesting to us. We know that
727 // this is safe in the face of spelling differences, because there is no way
728 // to spell an i/e in a strange way that is another letter. Skipping this
729 // allows us to avoid looking up the identifier info for #define/#undef and
730 // other common directives.
731 StringRef RI = Tok.getRawIdentifier();
732
733 char FirstChar = RI[0];
734 if (FirstChar >= 'a' && FirstChar <= 'z' &&
735 FirstChar != 'i' && FirstChar != 'e') {
736 CurPPLexer->ParsingPreprocessorDirective = false;
737 // Restore comment saving mode.
738 if (CurLexer) CurLexer->resetExtendedTokenMode();
739 continue;
740 }
741
742 // Get the identifier name without trigraphs or embedded newlines. Note
743 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
744 // when skipping.
745 char DirectiveBuf[20];
746 StringRef Directive;
747 if (!Tok.needsCleaning() && RI.size() < 20) {
748 Directive = RI;
749 } else {
750 std::string DirectiveStr = getSpelling(Tok);
751 size_t IdLen = DirectiveStr.size();
752 if (IdLen >= 20) {
753 CurPPLexer->ParsingPreprocessorDirective = false;
754 // Restore comment saving mode.
755 if (CurLexer) CurLexer->resetExtendedTokenMode();
756 continue;
757 }
758 memcpy(dest: DirectiveBuf, src: &DirectiveStr[0], n: IdLen);
759 Directive = StringRef(DirectiveBuf, IdLen);
760 }
761
762 if (Directive.starts_with(Prefix: "if")) {
763 StringRef Sub = Directive.substr(Start: 2);
764 if (Sub.empty() || // "if"
765 Sub == "def" || // "ifdef"
766 Sub == "ndef") { // "ifndef"
767 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
768 // bother parsing the condition.
769 DiscardUntilEndOfDirective();
770 CurPPLexer->pushConditionalLevel(DirectiveStart: Tok.getLocation(), /*wasskipping*/WasSkipping: true,
771 /*foundnonskip*/FoundNonSkip: false,
772 /*foundelse*/FoundElse: false);
773 } else {
774 SuggestTypoedDirective(Tok, Directive);
775 }
776 } else if (Directive[0] == 'e') {
777 StringRef Sub = Directive.substr(Start: 1);
778 if (Sub == "ndif") { // "endif"
779 PPConditionalInfo CondInfo;
780 CondInfo.WasSkipping = true; // Silence bogus warning.
781 bool InCond = CurPPLexer->popConditionalLevel(CI&: CondInfo);
782 (void)InCond; // Silence warning in no-asserts mode.
783 assert(!InCond && "Can't be skipping if not in a conditional!");
784
785 // If we popped the outermost skipping block, we're done skipping!
786 if (!CondInfo.WasSkipping) {
787 SkippingRangeState.endLexPass(Hashptr);
788 // Restore the value of LexingRawMode so that trailing comments
789 // are handled correctly, if we've reached the outermost block.
790 CurPPLexer->LexingRawMode = false;
791 endLoc = CheckEndOfDirective(DirType: "endif");
792 CurPPLexer->LexingRawMode = true;
793 if (Callbacks)
794 Callbacks->Endif(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
795 break;
796 } else {
797 DiscardUntilEndOfDirective();
798 }
799 } else if (Sub == "lse") { // "else".
800 // #else directive in a skipping conditional. If not in some other
801 // skipping conditional, and if #else hasn't already been seen, enter it
802 // as a non-skipping conditional.
803 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
804
805 if (!CondInfo.WasSkipping)
806 SkippingRangeState.endLexPass(Hashptr);
807
808 // If this is a #else with a #else before it, report the error.
809 if (CondInfo.FoundElse)
810 Diag(Tok, DiagID: diag::pp_err_else_after_else);
811
812 // Note that we've seen a #else in this conditional.
813 CondInfo.FoundElse = true;
814
815 // If the conditional is at the top level, and the #if block wasn't
816 // entered, enter the #else block now.
817 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
818 CondInfo.FoundNonSkip = true;
819 // Restore the value of LexingRawMode so that trailing comments
820 // are handled correctly.
821 CurPPLexer->LexingRawMode = false;
822 endLoc = CheckEndOfDirective(DirType: "else");
823 CurPPLexer->LexingRawMode = true;
824 if (Callbacks)
825 Callbacks->Else(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
826 break;
827 } else {
828 DiscardUntilEndOfDirective(); // C99 6.10p4.
829 }
830 } else if (Sub == "lif") { // "elif".
831 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
832
833 if (!CondInfo.WasSkipping)
834 SkippingRangeState.endLexPass(Hashptr);
835
836 // If this is a #elif with a #else before it, report the error.
837 if (CondInfo.FoundElse)
838 Diag(Tok, DiagID: diag::pp_err_elif_after_else) << PED_Elif;
839
840 // If this is in a skipping block or if we're already handled this #if
841 // block, don't bother parsing the condition.
842 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
843 // FIXME: We should probably do at least some minimal parsing of the
844 // condition to verify that it is well-formed. The current state
845 // allows #elif* directives with completely malformed (or missing)
846 // conditions.
847 DiscardUntilEndOfDirective();
848 } else {
849 // Restore the value of LexingRawMode so that identifiers are
850 // looked up, etc, inside the #elif expression.
851 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
852 CurPPLexer->LexingRawMode = false;
853 IdentifierInfo *IfNDefMacro = nullptr;
854 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
855 // Stop if Lexer became invalid after hitting code completion token.
856 if (!CurPPLexer)
857 return;
858 const bool CondValue = DER.Conditional;
859 CurPPLexer->LexingRawMode = true;
860 if (Callbacks) {
861 Callbacks->Elif(
862 Loc: Tok.getLocation(), ConditionRange: DER.ExprRange,
863 ConditionValue: (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
864 IfLoc: CondInfo.IfLoc);
865 }
866 // If this condition is true, enter it!
867 if (CondValue) {
868 CondInfo.FoundNonSkip = true;
869 break;
870 }
871 }
872 } else if (Sub == "lifdef" || // "elifdef"
873 Sub == "lifndef") { // "elifndef"
874 bool IsElifDef = Sub == "lifdef";
875 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
876 Token DirectiveToken = Tok;
877
878 if (!CondInfo.WasSkipping)
879 SkippingRangeState.endLexPass(Hashptr);
880
881 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
882 // if this branch is in a skipping block.
883 unsigned DiagID;
884 if (LangOpts.CPlusPlus)
885 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
886 : diag::ext_cxx23_pp_directive;
887 else
888 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
889 : diag::ext_c23_pp_directive;
890 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
891
892 // If this is a #elif with a #else before it, report the error.
893 if (CondInfo.FoundElse)
894 Diag(Tok, DiagID: diag::pp_err_elif_after_else)
895 << (IsElifDef ? PED_Elifdef : PED_Elifndef);
896
897 // If this is in a skipping block or if we're already handled this #if
898 // block, don't bother parsing the condition.
899 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
900 // FIXME: We should probably do at least some minimal parsing of the
901 // condition to verify that it is well-formed. The current state
902 // allows #elif* directives with completely malformed (or missing)
903 // conditions.
904 DiscardUntilEndOfDirective();
905 } else {
906 // Restore the value of LexingRawMode so that identifiers are
907 // looked up, etc, inside the #elif[n]def expression.
908 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
909 CurPPLexer->LexingRawMode = false;
910 Token MacroNameTok;
911 ReadMacroName(MacroNameTok);
912 CurPPLexer->LexingRawMode = true;
913
914 // If the macro name token is tok::eod, there was an error that was
915 // already reported.
916 if (MacroNameTok.is(K: tok::eod)) {
917 // Skip code until we get to #endif. This helps with recovery by
918 // not emitting an error when the #endif is reached.
919 continue;
920 }
921
922 emitMacroExpansionWarnings(Identifier: MacroNameTok);
923
924 CheckEndOfDirective(DirType: IsElifDef ? "elifdef" : "elifndef");
925
926 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
927 auto MD = getMacroDefinition(II: MII);
928 MacroInfo *MI = MD.getMacroInfo();
929
930 if (Callbacks) {
931 if (IsElifDef) {
932 Callbacks->Elifdef(Loc: DirectiveToken.getLocation(), MacroNameTok,
933 MD);
934 } else {
935 Callbacks->Elifndef(Loc: DirectiveToken.getLocation(), MacroNameTok,
936 MD);
937 }
938 }
939 // If this condition is true, enter it!
940 if (static_cast<bool>(MI) == IsElifDef) {
941 CondInfo.FoundNonSkip = true;
942 break;
943 }
944 }
945 } else {
946 SuggestTypoedDirective(Tok, Directive);
947 }
948 } else {
949 SuggestTypoedDirective(Tok, Directive);
950 }
951
952 CurPPLexer->ParsingPreprocessorDirective = false;
953 // Restore comment saving mode.
954 if (CurLexer) CurLexer->resetExtendedTokenMode();
955 }
956
957 // Finally, if we are out of the conditional (saw an #endif or ran off the end
958 // of the file, just stop skipping and return to lexing whatever came after
959 // the #if block.
960 CurPPLexer->LexingRawMode = false;
961
962 // The last skipped range isn't actually skipped yet if it's truncated
963 // by the end of the preamble; we'll resume parsing after the preamble.
964 if (Callbacks && (Tok.isNot(K: tok::eof) || !isRecordingPreamble()))
965 Callbacks->SourceRangeSkipped(
966 Range: SourceRange(HashTokenLoc, endLoc.isValid()
967 ? endLoc
968 : CurPPLexer->getSourceLocation()),
969 EndifLoc: Tok.getLocation());
970}
971
972Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
973 bool AllowTextual) {
974 if (!SourceMgr.isInMainFile(Loc)) {
975 // Try to determine the module of the include directive.
976 // FIXME: Look into directly passing the FileEntry from LookupFile instead.
977 FileID IDOfIncl = SourceMgr.getFileID(SpellingLoc: SourceMgr.getExpansionLoc(Loc));
978 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(FID: IDOfIncl)) {
979 // The include comes from an included file.
980 return HeaderInfo.getModuleMap()
981 .findModuleForHeader(File: *EntryOfIncl, AllowTextual)
982 .getModule();
983 }
984 }
985
986 // This is either in the main file or not in a file at all. It belongs
987 // to the current module, if there is one.
988 return getLangOpts().CurrentModule.empty()
989 ? nullptr
990 : HeaderInfo.lookupModule(ModuleName: getLangOpts().CurrentModule, ImportLoc: Loc);
991}
992
993OptionalFileEntryRef
994Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
995 SourceLocation Loc) {
996 Module *IncM = getModuleForLocation(
997 Loc: IncLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
998
999 // Walk up through the include stack, looking through textual headers of M
1000 // until we hit a non-textual header that we can #include. (We assume textual
1001 // headers of a module with non-textual headers aren't meant to be used to
1002 // import entities from the module.)
1003 auto &SM = getSourceManager();
1004 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
1005 auto ID = SM.getFileID(SpellingLoc: SM.getExpansionLoc(Loc));
1006 auto FE = SM.getFileEntryRefForID(FID: ID);
1007 if (!FE)
1008 break;
1009
1010 // We want to find all possible modules that might contain this header, so
1011 // search all enclosing directories for module maps and load them.
1012 HeaderInfo.hasModuleMap(Filename: FE->getName(), /*Root*/ nullptr,
1013 IsSystem: SourceMgr.isInSystemHeader(Loc));
1014
1015 bool InPrivateHeader = false;
1016 for (auto Header : HeaderInfo.findAllModulesForHeader(File: *FE)) {
1017 if (!Header.isAccessibleFrom(M: IncM)) {
1018 // It's in a private header; we can't #include it.
1019 // FIXME: If there's a public header in some module that re-exports it,
1020 // then we could suggest including that, but it's not clear that's the
1021 // expected way to make this entity visible.
1022 InPrivateHeader = true;
1023 continue;
1024 }
1025
1026 // Don't suggest explicitly excluded headers.
1027 if (Header.getRole() == ModuleMap::ExcludedHeader)
1028 continue;
1029
1030 // We'll suggest including textual headers below if they're
1031 // include-guarded.
1032 if (Header.getRole() & ModuleMap::TextualHeader)
1033 continue;
1034
1035 // If we have a module import syntax, we shouldn't include a header to
1036 // make a particular module visible. Let the caller know they should
1037 // suggest an import instead.
1038 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
1039 return std::nullopt;
1040
1041 // If this is an accessible, non-textual header of M's top-level module
1042 // that transitively includes the given location and makes the
1043 // corresponding module visible, this is the thing to #include.
1044 return *FE;
1045 }
1046
1047 // FIXME: If we're bailing out due to a private header, we shouldn't suggest
1048 // an import either.
1049 if (InPrivateHeader)
1050 return std::nullopt;
1051
1052 // If the header is includable and has an include guard, assume the
1053 // intended way to expose its contents is by #include, not by importing a
1054 // module that transitively includes it.
1055 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(File: *FE))
1056 return *FE;
1057
1058 Loc = SM.getIncludeLoc(FID: ID);
1059 }
1060
1061 return std::nullopt;
1062}
1063
1064OptionalFileEntryRef Preprocessor::LookupFile(
1065 SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
1066 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
1067 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
1068 SmallVectorImpl<char> *RelativePath,
1069 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
1070 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
1071 ConstSearchDirIterator CurDirLocal = nullptr;
1072 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
1073
1074 Module *RequestingModule = getModuleForLocation(
1075 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
1076
1077 // If the header lookup mechanism may be relative to the current inclusion
1078 // stack, record the parent #includes.
1079 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
1080 bool BuildSystemModule = false;
1081 if (!FromDir && !FromFile) {
1082 FileID FID = getCurrentFileLexer()->getFileID();
1083 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
1084
1085 // If there is no file entry associated with this file, it must be the
1086 // predefines buffer or the module includes buffer. Any other file is not
1087 // lexed with a normal lexer, so it won't be scanned for preprocessor
1088 // directives.
1089 //
1090 // If we have the predefines buffer, resolve #include references (which come
1091 // from the -include command line argument) from the current working
1092 // directory instead of relative to the main file.
1093 //
1094 // If we have the module includes buffer, resolve #include references (which
1095 // come from header declarations in the module map) relative to the module
1096 // map file.
1097 if (!FileEnt) {
1098 if (FID == SourceMgr.getMainFileID() && MainFileDir) {
1099 auto IncludeDir =
1100 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
1101 FileName: Filename, Module: getCurrentModule())
1102 ? HeaderInfo.getModuleMap().getBuiltinDir()
1103 : MainFileDir;
1104 Includers.push_back(Elt: std::make_pair(x: std::nullopt, y&: *IncludeDir));
1105 BuildSystemModule = getCurrentModule()->IsSystem;
1106 } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
1107 FID: SourceMgr.getMainFileID()))) {
1108 auto CWD = FileMgr.getOptionalDirectoryRef(DirName: ".");
1109 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y&: *CWD));
1110 }
1111 } else {
1112 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1113 }
1114
1115 // MSVC searches the current include stack from top to bottom for
1116 // headers included by quoted include directives.
1117 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1118 if (LangOpts.MSVCCompat && !isAngled) {
1119 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1120 if (IsFileLexer(I: ISEntry))
1121 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1122 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1123 }
1124 }
1125 }
1126
1127 CurDir = CurDirLookup;
1128
1129 if (FromFile) {
1130 // We're supposed to start looking from after a particular file. Search
1131 // the include path until we find that file or run out of files.
1132 ConstSearchDirIterator TmpCurDir = CurDir;
1133 ConstSearchDirIterator TmpFromDir = nullptr;
1134 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1135 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir: TmpFromDir, CurDir: &TmpCurDir,
1136 Includers, SearchPath, RelativePath, RequestingModule,
1137 SuggestedModule, /*IsMapped=*/nullptr,
1138 /*IsFrameworkFound=*/nullptr, SkipCache)) {
1139 // Keep looking as if this file did a #include_next.
1140 TmpFromDir = TmpCurDir;
1141 ++TmpFromDir;
1142 if (&FE->getFileEntry() == FromFile) {
1143 // Found it.
1144 FromDir = TmpFromDir;
1145 CurDir = TmpCurDir;
1146 break;
1147 }
1148 }
1149 }
1150
1151 // Do a standard file entry lookup.
1152 OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1153 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir, CurDir: &CurDir, Includers, SearchPath,
1154 RelativePath, RequestingModule, SuggestedModule, IsMapped,
1155 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1156 if (FE)
1157 return FE;
1158
1159 OptionalFileEntryRef CurFileEnt;
1160 // Otherwise, see if this is a subframework header. If so, this is relative
1161 // to one of the headers on the #include stack. Walk the list of the current
1162 // headers on the #include stack and pass them to HeaderInfo.
1163 if (IsFileLexer()) {
1164 if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1165 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1166 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1167 SuggestedModule)) {
1168 return FE;
1169 }
1170 }
1171 }
1172
1173 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1174 if (IsFileLexer(I: ISEntry)) {
1175 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1176 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1177 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath,
1178 RequestingModule, SuggestedModule)) {
1179 return FE;
1180 }
1181 }
1182 }
1183 }
1184
1185 // Otherwise, we really couldn't find the file.
1186 return std::nullopt;
1187}
1188
1189OptionalFileEntryRef Preprocessor::LookupEmbedFile(StringRef Filename,
1190 bool isAngled,
1191 bool OpenFile) {
1192 FileManager &FM = this->getFileManager();
1193 if (llvm::sys::path::is_absolute(path: Filename)) {
1194 // lookup path or immediately fail
1195 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1196 Filename, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1197 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1198 }
1199
1200 auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1201 StringRef StartingFrom, StringRef FileName,
1202 bool RemoveInitialFileComponentFromLookupPath) {
1203 llvm::sys::path::native(path: StartingFrom, result&: LookupPath);
1204 if (RemoveInitialFileComponentFromLookupPath)
1205 llvm::sys::path::remove_filename(path&: LookupPath);
1206 if (!LookupPath.empty() &&
1207 !llvm::sys::path::is_separator(value: LookupPath.back())) {
1208 LookupPath.push_back(Elt: llvm::sys::path::get_separator().front());
1209 }
1210 LookupPath.append(in_start: FileName.begin(), in_end: FileName.end());
1211 };
1212
1213 // Otherwise, it's search time!
1214 SmallString<512> LookupPath;
1215 // Non-angled lookup
1216 if (!isAngled) {
1217 OptionalFileEntryRef LookupFromFile = getCurrentFileLexer()->getFileEntry();
1218 if (LookupFromFile) {
1219 // Use file-based lookup.
1220 SmallString<1024> TmpDir;
1221 TmpDir = LookupFromFile->getDir().getName();
1222 llvm::sys::path::append(path&: TmpDir, a: Filename);
1223 if (!TmpDir.empty()) {
1224 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1225 Filename: TmpDir, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1226 if (ShouldBeEntry)
1227 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1228 llvm::consumeError(Err: ShouldBeEntry.takeError());
1229 }
1230 }
1231
1232 // Otherwise, do working directory lookup.
1233 LookupPath.clear();
1234 auto MaybeWorkingDirEntry = FM.getDirectoryRef(DirName: ".");
1235 if (MaybeWorkingDirEntry) {
1236 DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1237 StringRef WorkingDir = WorkingDirEntry.getName();
1238 if (!WorkingDir.empty()) {
1239 SeparateComponents(LookupPath, WorkingDir, Filename, false);
1240 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1241 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1242 if (ShouldBeEntry)
1243 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1244 llvm::consumeError(Err: ShouldBeEntry.takeError());
1245 }
1246 }
1247 }
1248
1249 for (const auto &Entry : PPOpts.EmbedEntries) {
1250 LookupPath.clear();
1251 SeparateComponents(LookupPath, Entry, Filename, false);
1252 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1253 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1254 if (ShouldBeEntry)
1255 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1256 llvm::consumeError(Err: ShouldBeEntry.takeError());
1257 }
1258 return std::nullopt;
1259}
1260
1261//===----------------------------------------------------------------------===//
1262// Preprocessor Directive Handling.
1263//===----------------------------------------------------------------------===//
1264
1265class Preprocessor::ResetMacroExpansionHelper {
1266public:
1267 ResetMacroExpansionHelper(Preprocessor *pp)
1268 : PP(pp), save(pp->DisableMacroExpansion) {
1269 if (pp->MacroExpansionInDirectivesOverride)
1270 pp->DisableMacroExpansion = false;
1271 }
1272
1273 ~ResetMacroExpansionHelper() {
1274 PP->DisableMacroExpansion = save;
1275 }
1276
1277private:
1278 Preprocessor *PP;
1279 bool save;
1280};
1281
1282/// Process a directive while looking for the through header or a #pragma
1283/// hdrstop. The following directives are handled:
1284/// #include (to check if it is the through header)
1285/// #define (to warn about macros that don't match the PCH)
1286/// #pragma (to check for pragma hdrstop).
1287/// All other directives are completely discarded.
1288void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1289 SourceLocation HashLoc) {
1290 if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1291 if (II->getPPKeywordID() == tok::pp_define) {
1292 return HandleDefineDirective(Tok&: Result,
1293 /*ImmediatelyAfterHeaderGuard=*/false);
1294 }
1295 if (SkippingUntilPCHThroughHeader &&
1296 II->getPPKeywordID() == tok::pp_include) {
1297 return HandleIncludeDirective(HashLoc, Tok&: Result);
1298 }
1299 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1300 Lex(Result);
1301 auto *II = Result.getIdentifierInfo();
1302 if (II && II->getName() == "hdrstop")
1303 return HandlePragmaHdrstop(Tok&: Result);
1304 }
1305 }
1306 DiscardUntilEndOfDirective();
1307}
1308
1309/// HandleDirective - This callback is invoked when the lexer sees a # token
1310/// at the start of a line. This consumes the directive, modifies the
1311/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1312/// read is the correct one.
1313void Preprocessor::HandleDirective(Token &Result) {
1314 // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1315
1316 // We just parsed a # character at the start of a line, so we're in directive
1317 // mode. Tell the lexer this so any newlines we see will be converted into an
1318 // EOD token (which terminates the directive).
1319 CurPPLexer->ParsingPreprocessorDirective = true;
1320 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1321
1322 bool ImmediatelyAfterTopLevelIfndef =
1323 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1324 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1325
1326 ++NumDirectives;
1327
1328 // We are about to read a token. For the multiple-include optimization FA to
1329 // work, we have to remember if we had read any tokens *before* this
1330 // pp-directive.
1331 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1332
1333 // Save the directive-introducing token('#' and import/module in C++20) in
1334 // case we need to return it later.
1335 Token Introducer = Result;
1336
1337 // Read the next token, the directive flavor. This isn't expanded due to
1338 // C99 6.10.3p8.
1339 if (Introducer.is(K: tok::hash))
1340 LexUnexpandedToken(Result);
1341
1342 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
1343 // #define A(x) #x
1344 // A(abc
1345 // #warning blah
1346 // def)
1347 // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1348 // not support this for #include-like directives, since that can result in
1349 // terrible diagnostics, and does not work in GCC.
1350 if (InMacroArgs) {
1351 if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1352 switch (II->getPPKeywordID()) {
1353 case tok::pp_include:
1354 case tok::pp_import:
1355 case tok::pp_include_next:
1356 case tok::pp___include_macros:
1357 case tok::pp_pragma:
1358 case tok::pp_embed:
1359 case tok::pp_module:
1360 case tok::pp___preprocessed_module:
1361 case tok::pp___preprocessed_import:
1362 Diag(Tok: Result, DiagID: diag::err_embedded_directive)
1363 << (getLangOpts().CPlusPlusModules &&
1364 Introducer.isModuleContextualKeyword(
1365 /*AllowExport=*/false))
1366 << II->getName();
1367 Diag(Tok: *ArgMacro, DiagID: diag::note_macro_expansion_here)
1368 << ArgMacro->getIdentifierInfo();
1369 DiscardUntilEndOfDirective();
1370 return;
1371 default:
1372 break;
1373 }
1374 }
1375 Diag(Tok: Result, DiagID: diag::ext_embedded_directive);
1376 }
1377
1378 // Temporarily enable macro expansion if set so
1379 // and reset to previous state when returning from this function.
1380 ResetMacroExpansionHelper helper(this);
1381
1382 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1383 return HandleSkippedDirectiveWhileUsingPCH(Result,
1384 HashLoc: Introducer.getLocation());
1385
1386 switch (Result.getKind()) {
1387 case tok::eod:
1388 // Ignore the null directive with regards to the multiple-include
1389 // optimization, i.e. allow the null directive to appear outside of the
1390 // include guard and still enable the multiple-include optimization.
1391 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1392 return; // null directive.
1393 case tok::code_completion:
1394 setCodeCompletionReached();
1395 if (CodeComplete)
1396 CodeComplete->CodeCompleteDirective(
1397 InConditional: CurPPLexer->getConditionalStackDepth() > 0);
1398 return;
1399 case tok::numeric_constant: // # 7 GNU line marker directive.
1400 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1401 // directive. However do permit it in the predefines file, as we use line
1402 // markers to mark the builtin macros as being in a system header.
1403 if (getLangOpts().AsmPreprocessor &&
1404 SourceMgr.getFileID(SpellingLoc: Introducer.getLocation()) != getPredefinesFileID())
1405 break;
1406 return HandleDigitDirective(Tok&: Result);
1407 default:
1408 IdentifierInfo *II = Result.getIdentifierInfo();
1409 if (!II) break; // Not an identifier.
1410
1411 // Ask what the preprocessor keyword ID is.
1412 switch (II->getPPKeywordID()) {
1413 default: break;
1414 // C99 6.10.1 - Conditional Inclusion.
1415 case tok::pp_if:
1416 return HandleIfDirective(IfToken&: Result, HashToken: Introducer,
1417 ReadAnyTokensBeforeDirective);
1418 case tok::pp_ifdef:
1419 return HandleIfdefDirective(Result, HashToken: Introducer, isIfndef: false,
1420 ReadAnyTokensBeforeDirective: true /*not valid for miopt*/);
1421 case tok::pp_ifndef:
1422 return HandleIfdefDirective(Result, HashToken: Introducer, isIfndef: true,
1423 ReadAnyTokensBeforeDirective);
1424 case tok::pp_elif:
1425 case tok::pp_elifdef:
1426 case tok::pp_elifndef:
1427 return HandleElifFamilyDirective(ElifToken&: Result, HashToken: Introducer,
1428 Kind: II->getPPKeywordID());
1429
1430 case tok::pp_else:
1431 return HandleElseDirective(Result, HashToken: Introducer);
1432 case tok::pp_endif:
1433 return HandleEndifDirective(EndifToken&: Result);
1434
1435 // C99 6.10.2 - Source File Inclusion.
1436 case tok::pp_include:
1437 // Handle #include.
1438 return HandleIncludeDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1439 case tok::pp___include_macros:
1440 // Handle -imacros.
1441 return HandleIncludeMacrosDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1442
1443 // C99 6.10.3 - Macro Replacement.
1444 case tok::pp_define:
1445 return HandleDefineDirective(Tok&: Result, ImmediatelyAfterHeaderGuard: ImmediatelyAfterTopLevelIfndef);
1446 case tok::pp_undef:
1447 return HandleUndefDirective();
1448
1449 // C99 6.10.4 - Line Control.
1450 case tok::pp_line:
1451 return HandleLineDirective();
1452
1453 // C99 6.10.5 - Error Directive.
1454 case tok::pp_error:
1455 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: false);
1456
1457 // C99 6.10.6 - Pragma Directive.
1458 case tok::pp_pragma:
1459 return HandlePragmaDirective(Introducer: {.Kind: PIK_HashPragma, .Loc: Introducer.getLocation()});
1460 case tok::pp_module:
1461 case tok::pp___preprocessed_module:
1462 return HandleCXXModuleDirective(Module: Result);
1463 case tok::pp___preprocessed_import:
1464 return HandleCXXImportDirective(Import: Result);
1465 // GNU Extensions.
1466 case tok::pp_import:
1467 if (getLangOpts().CPlusPlusModules &&
1468 Introducer.isModuleContextualKeyword(
1469 /*AllowExport=*/false))
1470 return HandleCXXImportDirective(Import: Result);
1471 return HandleImportDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1472 case tok::pp_include_next:
1473 return HandleIncludeNextDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1474
1475 case tok::pp_warning:
1476 if (LangOpts.CPlusPlus)
1477 Diag(Tok: Result, DiagID: LangOpts.CPlusPlus23
1478 ? diag::warn_cxx23_compat_warning_directive
1479 : diag::ext_pp_warning_directive)
1480 << /*C++23*/ 1;
1481 else
1482 Diag(Tok: Result, DiagID: LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1483 : diag::ext_pp_warning_directive)
1484 << /*C23*/ 0;
1485
1486 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: true);
1487 case tok::pp_ident:
1488 return HandleIdentSCCSDirective(Tok&: Result);
1489 case tok::pp_sccs:
1490 return HandleIdentSCCSDirective(Tok&: Result);
1491 case tok::pp_embed:
1492 return HandleEmbedDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1493 case tok::pp_assert:
1494 //isExtension = true; // FIXME: implement #assert
1495 break;
1496 case tok::pp_unassert:
1497 //isExtension = true; // FIXME: implement #unassert
1498 break;
1499
1500 case tok::pp___public_macro:
1501 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1502 return HandleMacroPublicDirective(Tok&: Result);
1503 break;
1504
1505 case tok::pp___private_macro:
1506 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1507 return HandleMacroPrivateDirective();
1508 break;
1509 }
1510 break;
1511 }
1512
1513 // If this is a .S file, treat unknown # directives as non-preprocessor
1514 // directives. This is important because # may be a comment or introduce
1515 // various pseudo-ops. Just return the # token and push back the following
1516 // token to be lexed next time.
1517 if (getLangOpts().AsmPreprocessor) {
1518 auto Toks = std::make_unique<Token[]>(num: 2);
1519 // Return the # and the token after it.
1520 Toks[0] = Introducer;
1521 Toks[1] = Result;
1522
1523 // If the second token is a hashhash token, then we need to translate it to
1524 // unknown so the token lexer doesn't try to perform token pasting.
1525 if (Result.is(K: tok::hashhash))
1526 Toks[1].setKind(tok::unknown);
1527
1528 // Enter this token stream so that we re-lex the tokens. Make sure to
1529 // enable macro expansion, in case the token after the # is an identifier
1530 // that is expanded.
1531 EnterTokenStream(Toks: std::move(Toks), NumToks: 2, DisableMacroExpansion: false, /*IsReinject*/false);
1532 return;
1533 }
1534
1535 // If we reached here, the preprocessing token is not valid!
1536 // Start suggesting if a similar directive found.
1537 Diag(Tok: Result, DiagID: diag::err_pp_invalid_directive) << 0;
1538
1539 // Read the rest of the PP line.
1540 DiscardUntilEndOfDirective();
1541
1542 // Okay, we're done parsing the directive.
1543}
1544
1545/// GetLineValue - Convert a numeric token into an unsigned value, emitting
1546/// Diagnostic DiagID if it is invalid, and returning the value in Val.
1547static bool GetLineValue(Token &DigitTok, unsigned &Val,
1548 unsigned DiagID, Preprocessor &PP,
1549 bool IsGNULineDirective=false) {
1550 if (DigitTok.isNot(K: tok::numeric_constant)) {
1551 PP.Diag(Tok: DigitTok, DiagID);
1552
1553 if (DigitTok.isNot(K: tok::eod))
1554 PP.DiscardUntilEndOfDirective();
1555 return true;
1556 }
1557
1558 SmallString<64> IntegerBuffer;
1559 IntegerBuffer.resize(N: DigitTok.getLength());
1560 const char *DigitTokBegin = &IntegerBuffer[0];
1561 bool Invalid = false;
1562 unsigned ActualLength = PP.getSpelling(Tok: DigitTok, Buffer&: DigitTokBegin, Invalid: &Invalid);
1563 if (Invalid)
1564 return true;
1565
1566 // Verify that we have a simple digit-sequence, and compute the value. This
1567 // is always a simple digit string computed in decimal, so we do this manually
1568 // here.
1569 Val = 0;
1570 for (unsigned i = 0; i != ActualLength; ++i) {
1571 // C++1y [lex.fcon]p1:
1572 // Optional separating single quotes in a digit-sequence are ignored
1573 if (DigitTokBegin[i] == '\'')
1574 continue;
1575
1576 if (!isDigit(c: DigitTokBegin[i])) {
1577 PP.Diag(Loc: PP.AdvanceToTokenCharacter(TokStart: DigitTok.getLocation(), Char: i),
1578 DiagID: diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1579 PP.DiscardUntilEndOfDirective();
1580 return true;
1581 }
1582
1583 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1584 if (NextVal < Val) { // overflow.
1585 PP.Diag(Tok: DigitTok, DiagID);
1586 PP.DiscardUntilEndOfDirective();
1587 return true;
1588 }
1589 Val = NextVal;
1590 }
1591
1592 if (DigitTokBegin[0] == '0' && Val)
1593 PP.Diag(Loc: DigitTok.getLocation(), DiagID: diag::warn_pp_line_decimal)
1594 << IsGNULineDirective;
1595
1596 return false;
1597}
1598
1599/// Handle a \#line directive: C99 6.10.4.
1600///
1601/// The two acceptable forms are:
1602/// \verbatim
1603/// # line digit-sequence
1604/// # line digit-sequence "s-char-sequence"
1605/// \endverbatim
1606void Preprocessor::HandleLineDirective() {
1607 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are
1608 // expanded.
1609 Token DigitTok;
1610 Lex(Result&: DigitTok);
1611
1612 // Validate the number and convert it to an unsigned.
1613 unsigned LineNo;
1614 if (GetLineValue(DigitTok, Val&: LineNo, DiagID: diag::err_pp_line_requires_integer,PP&: *this))
1615 return;
1616
1617 if (LineNo == 0)
1618 Diag(Tok: DigitTok, DiagID: diag::ext_pp_line_zero);
1619
1620 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1621 // number greater than 2147483647". C90 requires that the line # be <= 32767.
1622 unsigned LineLimit = 32768U;
1623 if (LangOpts.C99 || LangOpts.CPlusPlus11)
1624 LineLimit = 2147483648U;
1625 if (LineNo >= LineLimit)
1626 Diag(Tok: DigitTok, DiagID: diag::ext_pp_line_too_big) << LineLimit;
1627 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1628 Diag(Tok: DigitTok, DiagID: diag::warn_cxx98_compat_pp_line_too_big);
1629
1630 int FilenameID = -1;
1631 Token StrTok;
1632 Lex(Result&: StrTok);
1633
1634 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1635 // string followed by eod.
1636 if (StrTok.is(K: tok::eod))
1637 ; // ok
1638 else if (StrTok.isNot(K: tok::string_literal)) {
1639 Diag(Tok: StrTok, DiagID: diag::err_pp_line_invalid_filename);
1640 DiscardUntilEndOfDirective();
1641 return;
1642 } else if (StrTok.hasUDSuffix()) {
1643 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1644 DiscardUntilEndOfDirective();
1645 return;
1646 } else {
1647 // Parse and validate the string, converting it into a unique ID.
1648 StringLiteralParser Literal(StrTok, *this);
1649 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1650 if (Literal.hadError) {
1651 DiscardUntilEndOfDirective();
1652 return;
1653 }
1654 if (Literal.Pascal) {
1655 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1656 DiscardUntilEndOfDirective();
1657 return;
1658 }
1659 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1660
1661 // Verify that there is nothing after the string, other than EOD. Because
1662 // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1663 CheckEndOfDirective(DirType: "line", EnableMacros: true);
1664 }
1665
1666 // Take the file kind of the file containing the #line directive. #line
1667 // directives are often used for generated sources from the same codebase, so
1668 // the new file should generally be classified the same way as the current
1669 // file. This is visible in GCC's pre-processed output, which rewrites #line
1670 // to GNU line markers.
1671 SrcMgr::CharacteristicKind FileKind =
1672 SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1673
1674 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry: false,
1675 IsFileExit: false, FileKind);
1676
1677 if (Callbacks)
1678 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(),
1679 Reason: PPCallbacks::RenameFile, FileType: FileKind);
1680}
1681
1682/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1683/// marker directive.
1684static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1685 SrcMgr::CharacteristicKind &FileKind,
1686 Preprocessor &PP) {
1687 unsigned FlagVal;
1688 Token FlagTok;
1689 PP.Lex(Result&: FlagTok);
1690 if (FlagTok.is(K: tok::eod)) return false;
1691 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag, PP))
1692 return true;
1693
1694 if (FlagVal == 1) {
1695 IsFileEntry = true;
1696
1697 PP.Lex(Result&: FlagTok);
1698 if (FlagTok.is(K: tok::eod)) return false;
1699 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag,PP))
1700 return true;
1701 } else if (FlagVal == 2) {
1702 IsFileExit = true;
1703
1704 SourceManager &SM = PP.getSourceManager();
1705 // If we are leaving the current presumed file, check to make sure the
1706 // presumed include stack isn't empty!
1707 FileID CurFileID =
1708 SM.getDecomposedExpansionLoc(Loc: FlagTok.getLocation()).first;
1709 PresumedLoc PLoc = SM.getPresumedLoc(Loc: FlagTok.getLocation());
1710 if (PLoc.isInvalid())
1711 return true;
1712
1713 // If there is no include loc (main file) or if the include loc is in a
1714 // different physical file, then we aren't in a "1" line marker flag region.
1715 SourceLocation IncLoc = PLoc.getIncludeLoc();
1716 if (IncLoc.isInvalid() ||
1717 SM.getDecomposedExpansionLoc(Loc: IncLoc).first != CurFileID) {
1718 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_pop);
1719 PP.DiscardUntilEndOfDirective();
1720 return true;
1721 }
1722
1723 PP.Lex(Result&: FlagTok);
1724 if (FlagTok.is(K: tok::eod)) return false;
1725 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag,PP))
1726 return true;
1727 }
1728
1729 // We must have 3 if there are still flags.
1730 if (FlagVal != 3) {
1731 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1732 PP.DiscardUntilEndOfDirective();
1733 return true;
1734 }
1735
1736 FileKind = SrcMgr::C_System;
1737
1738 PP.Lex(Result&: FlagTok);
1739 if (FlagTok.is(K: tok::eod)) return false;
1740 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag, PP))
1741 return true;
1742
1743 // We must have 4 if there is yet another flag.
1744 if (FlagVal != 4) {
1745 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1746 PP.DiscardUntilEndOfDirective();
1747 return true;
1748 }
1749
1750 FileKind = SrcMgr::C_ExternCSystem;
1751
1752 PP.Lex(Result&: FlagTok);
1753 if (FlagTok.is(K: tok::eod)) return false;
1754
1755 // There are no more valid flags here.
1756 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1757 PP.DiscardUntilEndOfDirective();
1758 return true;
1759}
1760
1761/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1762/// one of the following forms:
1763///
1764/// # 42
1765/// # 42 "file" ('1' | '2')?
1766/// # 42 "file" ('1' | '2')? '3' '4'?
1767///
1768void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1769 // Validate the number and convert it to an unsigned. GNU does not have a
1770 // line # limit other than it fit in 32-bits.
1771 unsigned LineNo;
1772 if (GetLineValue(DigitTok, Val&: LineNo, DiagID: diag::err_pp_linemarker_requires_integer,
1773 PP&: *this, IsGNULineDirective: true))
1774 return;
1775
1776 Token StrTok;
1777 Lex(Result&: StrTok);
1778
1779 bool IsFileEntry = false, IsFileExit = false;
1780 int FilenameID = -1;
1781 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1782
1783 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1784 // string followed by eod.
1785 if (StrTok.is(K: tok::eod)) {
1786 Diag(Tok: StrTok, DiagID: diag::ext_pp_gnu_line_directive);
1787 // Treat this like "#line NN", which doesn't change file characteristics.
1788 FileKind = SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1789 } else if (StrTok.isNot(K: tok::string_literal)) {
1790 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1791 DiscardUntilEndOfDirective();
1792 return;
1793 } else if (StrTok.hasUDSuffix()) {
1794 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1795 DiscardUntilEndOfDirective();
1796 return;
1797 } else {
1798 // Parse and validate the string, converting it into a unique ID.
1799 StringLiteralParser Literal(StrTok, *this);
1800 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1801 if (Literal.hadError) {
1802 DiscardUntilEndOfDirective();
1803 return;
1804 }
1805 if (Literal.Pascal) {
1806 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1807 DiscardUntilEndOfDirective();
1808 return;
1809 }
1810
1811 // If a filename was present, read any flags that are present.
1812 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, PP&: *this))
1813 return;
1814 if (!SourceMgr.isInPredefinedFile(Loc: DigitTok.getLocation()))
1815 Diag(Tok: StrTok, DiagID: diag::ext_pp_gnu_line_directive);
1816
1817 // Exiting to an empty string means pop to the including file, so leave
1818 // FilenameID as -1 in that case.
1819 if (!(IsFileExit && Literal.GetString().empty()))
1820 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1821 }
1822
1823 // Create a line note with this information.
1824 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1825 IsFileExit, FileKind);
1826
1827 // If the preprocessor has callbacks installed, notify them of the #line
1828 // change. This is used so that the line marker comes out in -E mode for
1829 // example.
1830 if (Callbacks) {
1831 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1832 if (IsFileEntry)
1833 Reason = PPCallbacks::EnterFile;
1834 else if (IsFileExit)
1835 Reason = PPCallbacks::ExitFile;
1836
1837 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(), Reason, FileType: FileKind);
1838 }
1839}
1840
1841/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1842///
1843void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1844 bool isWarning) {
1845 // Read the rest of the line raw. We do this because we don't want macros
1846 // to be expanded and we don't require that the tokens be valid preprocessing
1847 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
1848 // collapse multiple consecutive white space between tokens, but this isn't
1849 // specified by the standard.
1850 SmallString<128> Message;
1851 CurLexer->ReadToEndOfLine(Result: &Message);
1852
1853 // Find the first non-whitespace character, so that we can make the
1854 // diagnostic more succinct.
1855 StringRef Msg = Message.str().ltrim(Char: ' ');
1856
1857 if (isWarning)
1858 Diag(Tok, DiagID: diag::pp_hash_warning) << Msg;
1859 else
1860 Diag(Tok, DiagID: diag::err_pp_hash_error) << Msg;
1861}
1862
1863/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1864///
1865void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1866 // Yes, this directive is an extension.
1867 Diag(Tok, DiagID: diag::ext_pp_ident_directive);
1868
1869 // Read the string argument.
1870 Token StrTok;
1871 Lex(Result&: StrTok);
1872
1873 // If the token kind isn't a string, it's a malformed directive.
1874 if (StrTok.isNot(K: tok::string_literal) &&
1875 StrTok.isNot(K: tok::wide_string_literal)) {
1876 Diag(Tok: StrTok, DiagID: diag::err_pp_malformed_ident);
1877 if (StrTok.isNot(K: tok::eod))
1878 DiscardUntilEndOfDirective();
1879 return;
1880 }
1881
1882 if (StrTok.hasUDSuffix()) {
1883 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1884 DiscardUntilEndOfDirective();
1885 return;
1886 }
1887
1888 // Verify that there is nothing after the string, other than EOD.
1889 CheckEndOfDirective(DirType: "ident");
1890
1891 if (Callbacks) {
1892 bool Invalid = false;
1893 std::string Str = getSpelling(Tok: StrTok, Invalid: &Invalid);
1894 if (!Invalid)
1895 Callbacks->Ident(Loc: Tok.getLocation(), str: Str);
1896 }
1897}
1898
1899/// Handle a #public directive.
1900void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1901 Token MacroNameTok;
1902 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1903
1904 // Error reading macro name? If so, diagnostic already issued.
1905 if (MacroNameTok.is(K: tok::eod))
1906 return;
1907
1908 // Check to see if this is the last token on the #__public_macro line.
1909 CheckEndOfDirective(DirType: "__public_macro");
1910
1911 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1912 // Okay, we finally have a valid identifier to undef.
1913 MacroDirective *MD = getLocalMacroDirective(II);
1914
1915 // If the macro is not defined, this is an error.
1916 if (!MD) {
1917 Diag(Tok: MacroNameTok, DiagID: diag::err_pp_visibility_non_macro) << II;
1918 return;
1919 }
1920
1921 // Note that this macro has now been exported.
1922 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1923 Loc: MacroNameTok.getLocation(), /*isPublic=*/true));
1924}
1925
1926/// Handle a #private directive.
1927void Preprocessor::HandleMacroPrivateDirective() {
1928 Token MacroNameTok;
1929 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1930
1931 // Error reading macro name? If so, diagnostic already issued.
1932 if (MacroNameTok.is(K: tok::eod))
1933 return;
1934
1935 // Check to see if this is the last token on the #__private_macro line.
1936 CheckEndOfDirective(DirType: "__private_macro");
1937
1938 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1939 // Okay, we finally have a valid identifier to undef.
1940 MacroDirective *MD = getLocalMacroDirective(II);
1941
1942 // If the macro is not defined, this is an error.
1943 if (!MD) {
1944 Diag(Tok: MacroNameTok, DiagID: diag::err_pp_visibility_non_macro) << II;
1945 return;
1946 }
1947
1948 // Note that this macro has now been marked private.
1949 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1950 Loc: MacroNameTok.getLocation(), /*isPublic=*/false));
1951}
1952
1953//===----------------------------------------------------------------------===//
1954// Preprocessor Include Directive Handling.
1955//===----------------------------------------------------------------------===//
1956
1957/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1958/// checked and spelled filename, e.g. as an operand of \#include. This returns
1959/// true if the input filename was in <>'s or false if it were in ""'s. The
1960/// caller is expected to provide a buffer that is large enough to hold the
1961/// spelling of the filename, but is also expected to handle the case when
1962/// this method decides to use a different buffer.
1963bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1964 StringRef &Buffer) {
1965 // Get the text form of the filename.
1966 assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1967
1968 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1969 // C++20 [lex.header]/2:
1970 //
1971 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1972 // in C: behavior is undefined
1973 // in C++: program is conditionally-supported with implementation-defined
1974 // semantics
1975
1976 // Make sure the filename is <x> or "x".
1977 bool isAngled;
1978 if (Buffer[0] == '<') {
1979 if (Buffer.back() != '>') {
1980 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1981 Buffer = StringRef();
1982 return true;
1983 }
1984 isAngled = true;
1985 } else if (Buffer[0] == '"') {
1986 if (Buffer.back() != '"') {
1987 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1988 Buffer = StringRef();
1989 return true;
1990 }
1991 isAngled = false;
1992 } else {
1993 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1994 Buffer = StringRef();
1995 return true;
1996 }
1997
1998 // Diagnose #include "" as invalid.
1999 if (Buffer.size() <= 2) {
2000 Diag(Loc, DiagID: diag::err_pp_empty_filename);
2001 Buffer = StringRef();
2002 return true;
2003 }
2004
2005 // Skip the brackets.
2006 Buffer = Buffer.substr(Start: 1, N: Buffer.size()-2);
2007 return isAngled;
2008}
2009
2010/// Push a token onto the token stream containing an annotation.
2011void Preprocessor::EnterAnnotationToken(SourceRange Range,
2012 tok::TokenKind Kind,
2013 void *AnnotationVal) {
2014 // FIXME: Produce this as the current token directly, rather than
2015 // allocating a new token for it.
2016 auto Tok = std::make_unique<Token[]>(num: 1);
2017 Tok[0].startToken();
2018 Tok[0].setKind(Kind);
2019 Tok[0].setLocation(Range.getBegin());
2020 Tok[0].setAnnotationEndLoc(Range.getEnd());
2021 Tok[0].setAnnotationValue(AnnotationVal);
2022 EnterTokenStream(Toks: std::move(Tok), NumToks: 1, DisableMacroExpansion: true, /*IsReinject*/ false);
2023}
2024
2025/// Produce a diagnostic informing the user that a #include or similar
2026/// was implicitly treated as a module import.
2027static void diagnoseAutoModuleImport(Preprocessor &PP, SourceLocation HashLoc,
2028 Token &IncludeTok,
2029 ArrayRef<IdentifierLoc> Path,
2030 SourceLocation PathEnd) {
2031 SmallString<128> PathString;
2032 for (size_t I = 0, N = Path.size(); I != N; ++I) {
2033 if (I)
2034 PathString += '.';
2035 PathString += Path[I].getIdentifierInfo()->getName();
2036 }
2037
2038 int IncludeKind = 0;
2039 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
2040 case tok::pp_include:
2041 IncludeKind = 0;
2042 break;
2043
2044 case tok::pp_import:
2045 IncludeKind = 1;
2046 break;
2047
2048 case tok::pp_include_next:
2049 IncludeKind = 2;
2050 break;
2051
2052 case tok::pp___include_macros:
2053 IncludeKind = 3;
2054 break;
2055
2056 default:
2057 llvm_unreachable("unknown include directive kind");
2058 }
2059
2060 PP.Diag(Loc: HashLoc, DiagID: diag::remark_pp_include_directive_modular_translation)
2061 << IncludeKind << PathString;
2062}
2063
2064// Given a vector of path components and a string containing the real
2065// path to the file, build a properly-cased replacement in the vector,
2066// and return true if the replacement should be suggested.
2067static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
2068 StringRef RealPathName,
2069 llvm::sys::path::Style Separator) {
2070 auto RealPathComponentIter = llvm::sys::path::rbegin(path: RealPathName);
2071 auto RealPathComponentEnd = llvm::sys::path::rend(path: RealPathName);
2072 int Cnt = 0;
2073 bool SuggestReplacement = false;
2074
2075 auto IsSep = [Separator](StringRef Component) {
2076 return Component.size() == 1 &&
2077 llvm::sys::path::is_separator(value: Component[0], style: Separator);
2078 };
2079
2080 // Below is a best-effort to handle ".." in paths. It is admittedly
2081 // not 100% correct in the presence of symlinks.
2082 for (auto &Component : llvm::reverse(C&: Components)) {
2083 if ("." == Component) {
2084 } else if (".." == Component) {
2085 ++Cnt;
2086 } else if (Cnt) {
2087 --Cnt;
2088 } else if (RealPathComponentIter != RealPathComponentEnd) {
2089 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
2090 Component != *RealPathComponentIter) {
2091 // If these non-separator path components differ by more than just case,
2092 // then we may be looking at symlinked paths. Bail on this diagnostic to
2093 // avoid noisy false positives.
2094 SuggestReplacement =
2095 RealPathComponentIter->equals_insensitive(RHS: Component);
2096 if (!SuggestReplacement)
2097 break;
2098 Component = *RealPathComponentIter;
2099 }
2100 ++RealPathComponentIter;
2101 }
2102 }
2103 return SuggestReplacement;
2104}
2105
2106bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
2107 const TargetInfo &TargetInfo,
2108 const Module &M,
2109 DiagnosticsEngine &Diags) {
2110 Module::Requirement Requirement;
2111 Module::UnresolvedHeaderDirective MissingHeader;
2112 Module *ShadowingModule = nullptr;
2113 if (M.isAvailable(LangOpts, Target: TargetInfo, Req&: Requirement, MissingHeader,
2114 ShadowingModule))
2115 return false;
2116
2117 if (MissingHeader.FileNameLoc.isValid()) {
2118 Diags.Report(Loc: MissingHeader.FileNameLoc, DiagID: diag::err_module_header_missing)
2119 << MissingHeader.IsUmbrella << MissingHeader.FileName;
2120 } else if (ShadowingModule) {
2121 Diags.Report(Loc: M.DefinitionLoc, DiagID: diag::err_module_shadowed) << M.Name;
2122 Diags.Report(Loc: ShadowingModule->DefinitionLoc,
2123 DiagID: diag::note_previous_definition);
2124 } else {
2125 // FIXME: Track the location at which the requirement was specified, and
2126 // use it here.
2127 Diags.Report(Loc: M.DefinitionLoc, DiagID: diag::err_module_unavailable)
2128 << M.getFullModuleName() << Requirement.RequiredState
2129 << Requirement.FeatureName;
2130 }
2131 return true;
2132}
2133
2134std::pair<ConstSearchDirIterator, const FileEntry *>
2135Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2136 // #include_next is like #include, except that we start searching after
2137 // the current found directory. If we can't do this, issue a
2138 // diagnostic.
2139 ConstSearchDirIterator Lookup = CurDirLookup;
2140 const FileEntry *LookupFromFile = nullptr;
2141
2142 if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2143 // If the main file is a header, then it's either for PCH/AST generation,
2144 // or libclang opened it. Either way, handle it as a normal include below
2145 // and do not complain about include_next.
2146 } else if (isInPrimaryFile()) {
2147 Lookup = nullptr;
2148 Diag(Tok: IncludeNextTok, DiagID: diag::pp_include_next_in_primary);
2149 } else if (CurLexerSubmodule) {
2150 // Start looking up in the directory *after* the one in which the current
2151 // file would be found, if any.
2152 assert(CurPPLexer && "#include_next directive in macro?");
2153 if (auto FE = CurPPLexer->getFileEntry())
2154 LookupFromFile = *FE;
2155 Lookup = nullptr;
2156 } else if (!Lookup) {
2157 // The current file was not found by walking the include path. Either it
2158 // is the primary file (handled above), or it was found by absolute path,
2159 // or it was found relative to such a file.
2160 // FIXME: Track enough information so we know which case we're in.
2161 Diag(Tok: IncludeNextTok, DiagID: diag::pp_include_next_absolute_path);
2162 } else {
2163 // Start looking up in the next directory.
2164 ++Lookup;
2165 }
2166
2167 return {Lookup, LookupFromFile};
2168}
2169
2170/// HandleIncludeDirective - The "\#include" tokens have just been read, read
2171/// the file to be included from the lexer, then include it! This is a common
2172/// routine with functionality shared between \#include, \#include_next and
2173/// \#import. LookupFrom is set when this is a \#include_next directive, it
2174/// specifies the file to start searching from.
2175void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2176 Token &IncludeTok,
2177 ConstSearchDirIterator LookupFrom,
2178 const FileEntry *LookupFromFile) {
2179 Token FilenameTok;
2180 if (LexHeaderName(Result&: FilenameTok))
2181 return;
2182
2183 if (FilenameTok.isNot(K: tok::header_name)) {
2184 if (FilenameTok.is(K: tok::identifier) &&
2185 (PPOpts.SingleFileParseMode || PPOpts.SingleModuleParseMode)) {
2186 // If we saw #include IDENTIFIER and lexing didn't turn in into a header
2187 // name, it was undefined. In 'single-{file,module}-parse' mode, just skip
2188 // the directive without emitting diagnostics - the identifier might be
2189 // normally defined in previously-skipped include directive.
2190 DiscardUntilEndOfDirective();
2191 return;
2192 }
2193
2194 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_pp_expects_filename);
2195 if (FilenameTok.isNot(K: tok::eod))
2196 DiscardUntilEndOfDirective();
2197 return;
2198 }
2199
2200 // Verify that there is nothing after the filename, other than EOD. Note
2201 // that we allow macros that expand to nothing after the filename, because
2202 // this falls into the category of "#include pp-tokens new-line" specified
2203 // in C99 6.10.2p4.
2204 SourceLocation EndLoc =
2205 CheckEndOfDirective(DirType: IncludeTok.getIdentifierInfo()->getNameStart(), EnableMacros: true);
2206
2207 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2208 EndLoc, LookupFrom, LookupFromFile);
2209 switch (Action.Kind) {
2210 case ImportAction::None:
2211 case ImportAction::SkippedModuleImport:
2212 break;
2213 case ImportAction::ModuleBegin:
2214 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2215 Kind: tok::annot_module_begin, AnnotationVal: Action.ModuleForHeader);
2216 break;
2217 case ImportAction::HeaderUnitImport:
2218 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc), Kind: tok::annot_header_unit,
2219 AnnotationVal: Action.ModuleForHeader);
2220 break;
2221 case ImportAction::ModuleImport:
2222 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2223 Kind: tok::annot_module_include, AnnotationVal: Action.ModuleForHeader);
2224 break;
2225 case ImportAction::Failure:
2226 assert(TheModuleLoader.HadFatalFailure &&
2227 "This should be an early exit only to a fatal error");
2228 TheModuleLoader.HadFatalFailure = true;
2229 IncludeTok.setKind(tok::eof);
2230 CurLexer->cutOffLexing();
2231 return;
2232 }
2233}
2234
2235OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2236 ConstSearchDirIterator *CurDir, StringRef &Filename,
2237 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2238 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2239 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2240 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2241 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2242 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2243 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2244 if (LangOpts.AsmPreprocessor)
2245 return;
2246
2247 Module *RequestingModule = getModuleForLocation(
2248 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
2249 bool RequestingModuleIsModuleInterface =
2250 !SourceMgr.isInMainFile(Loc: FilenameLoc);
2251
2252 HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2253 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2254 Filename, File: FE);
2255 };
2256
2257 OptionalFileEntryRef File = LookupFile(
2258 FilenameLoc, Filename: LookupFilename, isAngled, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2259 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2260 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped, IsFrameworkFound: &IsFrameworkFound);
2261 if (File) {
2262 DiagnoseHeaderInclusion(*File);
2263 return File;
2264 }
2265
2266 // Give the clients a chance to silently skip this include.
2267 if (Callbacks && Callbacks->FileNotFound(FileName: Filename))
2268 return std::nullopt;
2269
2270 if (SuppressIncludeNotFoundError)
2271 return std::nullopt;
2272
2273 // If the file could not be located and it was included via angle
2274 // brackets, we can attempt a lookup as though it were a quoted path to
2275 // provide the user with a possible fixit.
2276 if (isAngled) {
2277 OptionalFileEntryRef File = LookupFile(
2278 FilenameLoc, Filename: LookupFilename, isAngled: false, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2279 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2280 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2281 /*IsFrameworkFound=*/nullptr);
2282 if (File) {
2283 DiagnoseHeaderInclusion(*File);
2284 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found_angled_include_not_fatal)
2285 << Filename << IsImportDecl
2286 << FixItHint::CreateReplacement(RemoveRange: FilenameRange,
2287 Code: "\"" + Filename.str() + "\"");
2288 return File;
2289 }
2290 }
2291
2292 // Check for likely typos due to leading or trailing non-isAlphanumeric
2293 // characters
2294 StringRef OriginalFilename = Filename;
2295 if (LangOpts.SpellChecking) {
2296 // A heuristic to correct a typo file name by removing leading and
2297 // trailing non-isAlphanumeric characters.
2298 auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2299 Filename = Filename.drop_until(F: isAlphanumeric);
2300 while (!Filename.empty() && !isAlphanumeric(c: Filename.back())) {
2301 Filename = Filename.drop_back();
2302 }
2303 return Filename;
2304 };
2305 StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2306 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2307
2308 OptionalFileEntryRef File = LookupFile(
2309 FilenameLoc, Filename: TypoCorrectionLookupName, isAngled, FromDir: LookupFrom,
2310 FromFile: LookupFromFile, CurDirArg: CurDir, SearchPath: Callbacks ? &SearchPath : nullptr,
2311 RelativePath: Callbacks ? &RelativePath : nullptr, SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2312 /*IsFrameworkFound=*/nullptr);
2313 if (File) {
2314 DiagnoseHeaderInclusion(*File);
2315 auto Hint =
2316 isAngled ? FixItHint::CreateReplacement(
2317 RemoveRange: FilenameRange, Code: "<" + TypoCorrectionName.str() + ">")
2318 : FixItHint::CreateReplacement(
2319 RemoveRange: FilenameRange, Code: "\"" + TypoCorrectionName.str() + "\"");
2320 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found_typo_not_fatal)
2321 << OriginalFilename << TypoCorrectionName << Hint;
2322 // We found the file, so set the Filename to the name after typo
2323 // correction.
2324 Filename = TypoCorrectionName;
2325 LookupFilename = TypoCorrectionLookupName;
2326 return File;
2327 }
2328 }
2329
2330 // If the file is still not found, just go with the vanilla diagnostic
2331 assert(!File && "expected missing file");
2332 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found)
2333 << OriginalFilename << FilenameRange;
2334 if (IsFrameworkFound) {
2335 size_t SlashPos = OriginalFilename.find(C: '/');
2336 assert(SlashPos != StringRef::npos &&
2337 "Include with framework name should have '/' in the filename");
2338 StringRef FrameworkName = OriginalFilename.substr(Start: 0, N: SlashPos);
2339 FrameworkCacheEntry &CacheEntry =
2340 HeaderInfo.LookupFrameworkCache(FWName: FrameworkName);
2341 assert(CacheEntry.Directory && "Found framework should be in cache");
2342 Diag(Tok: FilenameTok, DiagID: diag::note_pp_framework_without_header)
2343 << OriginalFilename.substr(Start: SlashPos + 1) << FrameworkName
2344 << CacheEntry.Directory->getName();
2345 }
2346
2347 return std::nullopt;
2348}
2349
2350/// Handle either a #include-like directive or an import declaration that names
2351/// a header file.
2352///
2353/// \param HashLoc The location of the '#' token for an include, or
2354/// SourceLocation() for an import declaration.
2355/// \param IncludeTok The include / include_next / import token.
2356/// \param FilenameTok The header-name token.
2357/// \param EndLoc The location at which any imported macros become visible.
2358/// \param LookupFrom For #include_next, the starting directory for the
2359/// directory lookup.
2360/// \param LookupFromFile For #include_next, the starting file for the directory
2361/// lookup.
2362Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2363 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2364 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2365 const FileEntry *LookupFromFile) {
2366 SmallString<128> FilenameBuffer;
2367 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
2368 SourceLocation CharEnd = FilenameTok.getEndLoc();
2369
2370 CharSourceRange FilenameRange
2371 = CharSourceRange::getCharRange(B: FilenameTok.getLocation(), E: CharEnd);
2372 StringRef OriginalFilename = Filename;
2373 bool isAngled =
2374 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
2375
2376 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2377 // error.
2378 if (Filename.empty())
2379 return {ImportAction::None};
2380
2381 bool IsImportDecl = HashLoc.isInvalid();
2382 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2383
2384 // Complain about attempts to #include files in an audit pragma.
2385 if (PragmaARCCFCodeAuditedInfo.getLoc().isValid()) {
2386 Diag(Loc: StartLoc, DiagID: diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2387 Diag(Loc: PragmaARCCFCodeAuditedInfo.getLoc(), DiagID: diag::note_pragma_entered_here);
2388
2389 // Immediately leave the pragma.
2390 PragmaARCCFCodeAuditedInfo = IdentifierLoc();
2391 }
2392
2393 // Complain about attempts to #include files in an assume-nonnull pragma.
2394 if (PragmaAssumeNonNullLoc.isValid()) {
2395 Diag(Loc: StartLoc, DiagID: diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2396 Diag(Loc: PragmaAssumeNonNullLoc, DiagID: diag::note_pragma_entered_here);
2397
2398 // Immediately leave the pragma.
2399 PragmaAssumeNonNullLoc = SourceLocation();
2400 }
2401
2402 if (HeaderInfo.HasIncludeAliasMap()) {
2403 // Map the filename with the brackets still attached. If the name doesn't
2404 // map to anything, fall back on the filename we've already gotten the
2405 // spelling for.
2406 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(Source: OriginalFilename);
2407 if (!NewName.empty())
2408 Filename = NewName;
2409 }
2410
2411 // Search include directories.
2412 bool IsMapped = false;
2413 bool IsFrameworkFound = false;
2414 ConstSearchDirIterator CurDir = nullptr;
2415 SmallString<1024> SearchPath;
2416 SmallString<1024> RelativePath;
2417 // We get the raw path only if we have 'Callbacks' to which we later pass
2418 // the path.
2419 ModuleMap::KnownHeader SuggestedModule;
2420 SourceLocation FilenameLoc = FilenameTok.getLocation();
2421 StringRef LookupFilename = Filename;
2422
2423 // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2424 // is unnecessary on Windows since the filesystem there handles backslashes.
2425 SmallString<128> NormalizedPath;
2426 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2427 if (is_style_posix(S: BackslashStyle) && LangOpts.MicrosoftExt) {
2428 NormalizedPath = Filename.str();
2429 llvm::sys::path::native(path&: NormalizedPath);
2430 LookupFilename = NormalizedPath;
2431 BackslashStyle = llvm::sys::path::Style::windows;
2432 }
2433
2434 OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2435 CurDir: &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2436 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2437 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2438
2439 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2440 if (File && isPCHThroughHeader(FE: &File->getFileEntry()))
2441 SkippingUntilPCHThroughHeader = false;
2442 return {ImportAction::None};
2443 }
2444
2445 // Should we enter the source file? Set to Skip if either the source file is
2446 // known to have no effect beyond its effect on module visibility -- that is,
2447 // if it's got an include guard that is already defined, set to Import if it
2448 // is a modular header we've already built and should import.
2449
2450 // For C++20 Modules
2451 // [cpp.include]/7 If the header identified by the header-name denotes an
2452 // importable header, it is implementation-defined whether the #include
2453 // preprocessing directive is instead replaced by an import directive.
2454 // For this implementation, the translation is permitted when we are parsing
2455 // the Global Module Fragment, and not otherwise (the cases where it would be
2456 // valid to replace an include with an import are highly constrained once in
2457 // named module purview; this choice avoids considerable complexity in
2458 // determining valid cases).
2459
2460 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2461
2462 if (PPOpts.SingleFileParseMode)
2463 Action = IncludeLimitReached;
2464
2465 // If we've reached the max allowed include depth, it is usually due to an
2466 // include cycle. Don't enter already processed files again as it can lead to
2467 // reaching the max allowed include depth again.
2468 if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2469 alreadyIncluded(File: *File))
2470 Action = IncludeLimitReached;
2471
2472 // FIXME: We do not have a good way to disambiguate C++ clang modules from
2473 // C++ standard modules (other than use/non-use of Header Units).
2474
2475 Module *ModuleToImport = SuggestedModule.getModule();
2476
2477 bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2478 !ModuleToImport->isForBuilding(LangOpts: getLangOpts());
2479
2480 // Maybe a usable Header Unit
2481 bool UsableHeaderUnit = false;
2482 if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2483 ModuleToImport->isHeaderUnit()) {
2484 if (TrackGMFState.inGMF() || IsImportDecl)
2485 UsableHeaderUnit = true;
2486 else if (!IsImportDecl) {
2487 // This is a Header Unit that we do not include-translate
2488 ModuleToImport = nullptr;
2489 }
2490 }
2491 // Maybe a usable clang header module.
2492 bool UsableClangHeaderModule =
2493 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2494 ModuleToImport && !ModuleToImport->isHeaderUnit();
2495
2496 // Determine whether we should try to import the module for this #include, if
2497 // there is one. Don't do so if precompiled module support is disabled or we
2498 // are processing this module textually (because we're building the module).
2499 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2500 // If this include corresponds to a module but that module is
2501 // unavailable, diagnose the situation and bail out.
2502 // FIXME: Remove this; loadModule does the same check (but produces
2503 // slightly worse diagnostics).
2504 if (checkModuleIsAvailable(LangOpts: getLangOpts(), TargetInfo: getTargetInfo(), M: *ModuleToImport,
2505 Diags&: getDiagnostics())) {
2506 Diag(Loc: FilenameTok.getLocation(),
2507 DiagID: diag::note_implicit_top_level_module_import_here)
2508 << ModuleToImport->getTopLevelModuleName();
2509 return {ImportAction::None};
2510 }
2511
2512 // Compute the module access path corresponding to this module.
2513 // FIXME: Should we have a second loadModule() overload to avoid this
2514 // extra lookup step?
2515 SmallVector<IdentifierLoc, 2> Path;
2516 for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2517 Path.emplace_back(Args: FilenameTok.getLocation(),
2518 Args: getIdentifierInfo(Name: Mod->Name));
2519 std::reverse(first: Path.begin(), last: Path.end());
2520
2521 // Warn that we're replacing the include/import with a module import.
2522 if (!IsImportDecl)
2523 diagnoseAutoModuleImport(PP&: *this, HashLoc: StartLoc, IncludeTok, Path, PathEnd: CharEnd);
2524
2525 // Load the module to import its macros. We'll make the declarations
2526 // visible when the parser gets here.
2527 // FIXME: Pass ModuleToImport in here rather than converting it to a path
2528 // and making the module loader convert it back again.
2529 ModuleLoadResult Imported = TheModuleLoader.loadModule(
2530 ImportLoc: IncludeTok.getLocation(), Path, Visibility: Module::Hidden,
2531 /*IsInclusionDirective=*/true);
2532 assert((Imported == nullptr || Imported == ModuleToImport) &&
2533 "the imported module is different than the suggested one");
2534
2535 if (Imported) {
2536 Action = Import;
2537 } else if (Imported.isMissingExpected()) {
2538 markClangModuleAsAffecting(
2539 M: static_cast<Module *>(Imported)->getTopLevelModule());
2540 // We failed to find a submodule that we assumed would exist (because it
2541 // was in the directory of an umbrella header, for instance), but no
2542 // actual module containing it exists (because the umbrella header is
2543 // incomplete). Treat this as a textual inclusion.
2544 ModuleToImport = nullptr;
2545 } else if (Imported.isConfigMismatch()) {
2546 // On a configuration mismatch, enter the header textually. We still know
2547 // that it's part of the corresponding module.
2548 } else {
2549 // We hit an error processing the import. Bail out.
2550 if (hadModuleLoaderFatalFailure()) {
2551 // With a fatal failure in the module loader, we abort parsing.
2552 Token &Result = IncludeTok;
2553 assert(CurLexer && "#include but no current lexer set!");
2554 Result.startToken();
2555 CurLexer->FormTokenWithChars(Result, TokEnd: CurLexer->BufferEnd, Kind: tok::eof);
2556 CurLexer->cutOffLexing();
2557 }
2558 return {ImportAction::None};
2559 }
2560 }
2561
2562 // The #included file will be considered to be a system header if either it is
2563 // in a system include directory, or if the #includer is a system include
2564 // header.
2565 SrcMgr::CharacteristicKind FileCharacter =
2566 SourceMgr.getFileCharacteristic(Loc: FilenameTok.getLocation());
2567 if (File)
2568 FileCharacter = std::max(a: HeaderInfo.getFileDirFlavor(File: *File), b: FileCharacter);
2569
2570 // If this is a '#import' or an import-declaration, don't re-enter the file.
2571 //
2572 // FIXME: If we have a suggested module for a '#include', and we've already
2573 // visited this file, don't bother entering it again. We know it has no
2574 // further effect.
2575 bool EnterOnce =
2576 IsImportDecl ||
2577 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2578
2579 bool IsFirstIncludeOfFile = false;
2580
2581 // Ask HeaderInfo if we should enter this #include file. If not, #including
2582 // this file will have no effect.
2583 if (Action == Enter && File &&
2584 !HeaderInfo.ShouldEnterIncludeFile(PP&: *this, File: *File, isImport: EnterOnce,
2585 ModulesEnabled: getLangOpts().Modules, M: ModuleToImport,
2586 IsFirstIncludeOfFile)) {
2587 // C++ standard modules:
2588 // If we are not in the GMF, then we textually include only
2589 // clang modules:
2590 // Even if we've already preprocessed this header once and know that we
2591 // don't need to see its contents again, we still need to import it if it's
2592 // modular because we might not have imported it from this submodule before.
2593 //
2594 // FIXME: We don't do this when compiling a PCH because the AST
2595 // serialization layer can't cope with it. This means we get local
2596 // submodule visibility semantics wrong in that case.
2597 if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2598 Action = TrackGMFState.inGMF() ? Import : Skip;
2599 else
2600 Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2601 }
2602
2603 // Check for circular inclusion of the main file.
2604 // We can't generate a consistent preamble with regard to the conditional
2605 // stack if the main file is included again as due to the preamble bounds
2606 // some directives (e.g. #endif of a header guard) will never be seen.
2607 // Since this will lead to confusing errors, avoid the inclusion.
2608 if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2609 SourceMgr.isMainFile(SourceFile: File->getFileEntry())) {
2610 Diag(Loc: FilenameTok.getLocation(),
2611 DiagID: diag::err_pp_including_mainfile_in_preamble);
2612 return {ImportAction::None};
2613 }
2614
2615 if (Callbacks && !IsImportDecl) {
2616 // Notify the callback object that we've seen an inclusion directive.
2617 // FIXME: Use a different callback for a pp-import?
2618 Callbacks->InclusionDirective(HashLoc, IncludeTok, FileName: LookupFilename, IsAngled: isAngled,
2619 FilenameRange, File, SearchPath, RelativePath,
2620 SuggestedModule: SuggestedModule.getModule(), ModuleImported: Action == Import,
2621 FileType: FileCharacter);
2622 if (Action == Skip && File)
2623 Callbacks->FileSkipped(SkippedFile: *File, FilenameTok, FileType: FileCharacter);
2624 }
2625
2626 if (!File)
2627 return {ImportAction::None};
2628
2629 // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2630 // module corresponding to the named header.
2631 if (IsImportDecl && !ModuleToImport) {
2632 Diag(Tok: FilenameTok, DiagID: diag::err_header_import_not_header_unit)
2633 << OriginalFilename << File->getName();
2634 return {ImportAction::None};
2635 }
2636
2637 // Issue a diagnostic if the name of the file on disk has a different case
2638 // than the one we're about to open.
2639 const bool CheckIncludePathPortability =
2640 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2641
2642 if (CheckIncludePathPortability) {
2643 StringRef Name = LookupFilename;
2644 StringRef NameWithoriginalSlashes = Filename;
2645#if defined(_WIN32)
2646 // Skip UNC prefix if present. (tryGetRealPathName() always
2647 // returns a path with the prefix skipped.)
2648 bool NameWasUNC = Name.consume_front("\\\\?\\");
2649 NameWithoriginalSlashes.consume_front("\\\\?\\");
2650#endif
2651 StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2652 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(path: Name),
2653 llvm::sys::path::end(path: Name));
2654#if defined(_WIN32)
2655 // -Wnonportable-include-path is designed to diagnose includes using
2656 // case even on systems with a case-insensitive file system.
2657 // On Windows, RealPathName always starts with an upper-case drive
2658 // letter for absolute paths, but Name might start with either
2659 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2660 // ("foo" will always have on-disk case, no matter which case was
2661 // used in the cd command). To not emit this warning solely for
2662 // the drive letter, whose case is dependent on if `cd` is used
2663 // with upper- or lower-case drive letters, always consider the
2664 // given drive letter case as correct for the purpose of this warning.
2665 SmallString<128> FixedDriveRealPath;
2666 if (llvm::sys::path::is_absolute(Name) &&
2667 llvm::sys::path::is_absolute(RealPathName) &&
2668 toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2669 isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2670 assert(Components.size() >= 3 && "should have drive, backslash, name");
2671 assert(Components[0].size() == 2 && "should start with drive");
2672 assert(Components[0][1] == ':' && "should have colon");
2673 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2674 RealPathName = FixedDriveRealPath;
2675 }
2676#endif
2677
2678 if (trySimplifyPath(Components, RealPathName, Separator: BackslashStyle)) {
2679 SmallString<128> Path;
2680 Path.reserve(N: Name.size()+2);
2681 Path.push_back(Elt: isAngled ? '<' : '"');
2682
2683 const auto IsSep = [BackslashStyle](char c) {
2684 return llvm::sys::path::is_separator(value: c, style: BackslashStyle);
2685 };
2686
2687 for (auto Component : Components) {
2688 // On POSIX, Components will contain a single '/' as first element
2689 // exactly if Name is an absolute path.
2690 // On Windows, it will contain "C:" followed by '\' for absolute paths.
2691 // The drive letter is optional for absolute paths on Windows, but
2692 // clang currently cannot process absolute paths in #include lines that
2693 // don't have a drive.
2694 // If the first entry in Components is a directory separator,
2695 // then the code at the bottom of this loop that keeps the original
2696 // directory separator style copies it. If the second entry is
2697 // a directory separator (the C:\ case), then that separator already
2698 // got copied when the C: was processed and we want to skip that entry.
2699 if (!(Component.size() == 1 && IsSep(Component[0])))
2700 Path.append(RHS: Component);
2701 else if (Path.size() != 1)
2702 continue;
2703
2704 // Append the separator(s) the user used, or the close quote
2705 if (Path.size() > NameWithoriginalSlashes.size()) {
2706 Path.push_back(Elt: isAngled ? '>' : '"');
2707 continue;
2708 }
2709 assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2710 do
2711 Path.push_back(Elt: NameWithoriginalSlashes[Path.size()-1]);
2712 while (Path.size() <= NameWithoriginalSlashes.size() &&
2713 IsSep(NameWithoriginalSlashes[Path.size()-1]));
2714 }
2715
2716#if defined(_WIN32)
2717 // Restore UNC prefix if it was there.
2718 if (NameWasUNC)
2719 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2720#endif
2721
2722 // For user files and known standard headers, issue a diagnostic.
2723 // For other system headers, don't. They can be controlled separately.
2724 auto DiagId =
2725 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Include: Name))
2726 ? diag::pp_nonportable_path
2727 : diag::pp_nonportable_system_path;
2728 Diag(Tok: FilenameTok, DiagID: DiagId) << Path <<
2729 FixItHint::CreateReplacement(RemoveRange: FilenameRange, Code: Path);
2730 }
2731 }
2732
2733 switch (Action) {
2734 case Skip:
2735 // If we don't need to enter the file, stop now.
2736 if (ModuleToImport)
2737 return {ImportAction::SkippedModuleImport, ModuleToImport};
2738 return {ImportAction::None};
2739
2740 case IncludeLimitReached:
2741 // If we reached our include limit and don't want to enter any more files,
2742 // don't go any further.
2743 return {ImportAction::None};
2744
2745 case Import: {
2746 // If this is a module import, make it visible if needed.
2747 assert(ModuleToImport && "no module to import");
2748
2749 makeModuleVisible(M: ModuleToImport, Loc: EndLoc);
2750
2751 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2752 tok::pp___include_macros)
2753 return {ImportAction::None};
2754
2755 return {ImportAction::ModuleImport, ModuleToImport};
2756 }
2757
2758 case Enter:
2759 break;
2760 }
2761
2762 // Check that we don't have infinite #include recursion.
2763 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2764 Diag(Tok: FilenameTok, DiagID: diag::err_pp_include_too_deep);
2765 HasReachedMaxIncludeDepth = true;
2766 return {ImportAction::None};
2767 }
2768
2769 if (isAngled && isInNamedModule())
2770 Diag(Tok: FilenameTok, DiagID: diag::warn_pp_include_angled_in_module_purview)
2771 << getNamedModuleName();
2772
2773 // Look up the file, create a File ID for it.
2774 SourceLocation IncludePos = FilenameTok.getLocation();
2775 // If the filename string was the result of macro expansions, set the include
2776 // position on the file where it will be included and after the expansions.
2777 if (IncludePos.isMacroID())
2778 IncludePos = SourceMgr.getExpansionRange(Loc: IncludePos).getEnd();
2779 FileID FID = SourceMgr.createFileID(SourceFile: *File, IncludePos, FileCharacter);
2780 if (!FID.isValid()) {
2781 TheModuleLoader.HadFatalFailure = true;
2782 return ImportAction::Failure;
2783 }
2784
2785 // If all is good, enter the new file!
2786 if (EnterSourceFile(FID, Dir: CurDir, Loc: FilenameTok.getLocation(),
2787 IsFirstIncludeOfFile))
2788 return {ImportAction::None};
2789
2790 // Determine if we're switching to building a new submodule, and which one.
2791 // This does not apply for C++20 modules header units.
2792 if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2793 if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2794 // We are building a submodule that belongs to a shadowed module. This
2795 // means we find header files in the shadowed module.
2796 Diag(Loc: ModuleToImport->DefinitionLoc,
2797 DiagID: diag::err_module_build_shadowed_submodule)
2798 << ModuleToImport->getFullModuleName();
2799 Diag(Loc: ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2800 DiagID: diag::note_previous_definition);
2801 return {ImportAction::None};
2802 }
2803 // When building a pch, -fmodule-name tells the compiler to textually
2804 // include headers in the specified module. We are not building the
2805 // specified module.
2806 //
2807 // FIXME: This is the wrong way to handle this. We should produce a PCH
2808 // that behaves the same as the header would behave in a compilation using
2809 // that PCH, which means we should enter the submodule. We need to teach
2810 // the AST serialization layer to deal with the resulting AST.
2811 if (getLangOpts().CompilingPCH &&
2812 ModuleToImport->isForBuilding(LangOpts: getLangOpts()))
2813 return {ImportAction::None};
2814
2815 assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2816 CurLexerSubmodule = ModuleToImport;
2817
2818 // Let the macro handling code know that any future macros are within
2819 // the new submodule.
2820 EnterSubmodule(M: ModuleToImport, ImportLoc: EndLoc, /*ForPragma*/ false);
2821
2822 // Let the parser know that any future declarations are within the new
2823 // submodule.
2824 // FIXME: There's no point doing this if we're handling a #__include_macros
2825 // directive.
2826 return {ImportAction::ModuleBegin, ModuleToImport};
2827 }
2828
2829 assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2830 return {ImportAction::None};
2831}
2832
2833/// HandleIncludeNextDirective - Implements \#include_next.
2834///
2835void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2836 Token &IncludeNextTok) {
2837 Diag(Tok: IncludeNextTok, DiagID: diag::ext_pp_include_next_directive);
2838
2839 ConstSearchDirIterator Lookup = nullptr;
2840 const FileEntry *LookupFromFile;
2841 std::tie(args&: Lookup, args&: LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2842
2843 return HandleIncludeDirective(HashLoc, IncludeTok&: IncludeNextTok, LookupFrom: Lookup,
2844 LookupFromFile);
2845}
2846
2847/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2848void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2849 // The Microsoft #import directive takes a type library and generates header
2850 // files from it, and includes those. This is beyond the scope of what clang
2851 // does, so we ignore it and error out. However, #import can optionally have
2852 // trailing attributes that span multiple lines. We're going to eat those
2853 // so we can continue processing from there.
2854 Diag(Tok, DiagID: diag::err_pp_import_directive_ms );
2855
2856 // Read tokens until we get to the end of the directive. Note that the
2857 // directive can be split over multiple lines using the backslash character.
2858 DiscardUntilEndOfDirective();
2859}
2860
2861/// HandleImportDirective - Implements \#import.
2862///
2863void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2864 Token &ImportTok) {
2865 if (!LangOpts.ObjC) { // #import is standard for ObjC.
2866 if (LangOpts.MSVCCompat)
2867 return HandleMicrosoftImportDirective(Tok&: ImportTok);
2868 Diag(Tok: ImportTok, DiagID: diag::ext_pp_import_directive);
2869 }
2870 return HandleIncludeDirective(HashLoc, IncludeTok&: ImportTok);
2871}
2872
2873/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2874/// pseudo directive in the predefines buffer. This handles it by sucking all
2875/// tokens through the preprocessor and discarding them (only keeping the side
2876/// effects on the preprocessor).
2877void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2878 Token &IncludeMacrosTok) {
2879 // This directive should only occur in the predefines buffer. If not, emit an
2880 // error and reject it.
2881 SourceLocation Loc = IncludeMacrosTok.getLocation();
2882 if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2883 Diag(Loc: IncludeMacrosTok.getLocation(),
2884 DiagID: diag::pp_include_macros_out_of_predefines);
2885 DiscardUntilEndOfDirective();
2886 return;
2887 }
2888
2889 // Treat this as a normal #include for checking purposes. If this is
2890 // successful, it will push a new lexer onto the include stack.
2891 HandleIncludeDirective(HashLoc, IncludeTok&: IncludeMacrosTok);
2892
2893 Token TmpTok;
2894 do {
2895 Lex(Result&: TmpTok);
2896 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2897 } while (TmpTok.isNot(K: tok::hashhash));
2898}
2899
2900//===----------------------------------------------------------------------===//
2901// Preprocessor Macro Directive Handling.
2902//===----------------------------------------------------------------------===//
2903
2904/// ReadMacroParameterList - The ( starting a parameter list of a macro
2905/// definition has just been read. Lex the rest of the parameters and the
2906/// closing ), updating MI with what we learn. Return true if an error occurs
2907/// parsing the param list.
2908bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2909 SmallVector<IdentifierInfo*, 32> Parameters;
2910
2911 while (true) {
2912 LexUnexpandedNonComment(Result&: Tok);
2913 switch (Tok.getKind()) {
2914 case tok::r_paren:
2915 // Found the end of the parameter list.
2916 if (Parameters.empty()) // #define FOO()
2917 return false;
2918 // Otherwise we have #define FOO(A,)
2919 Diag(Tok, DiagID: diag::err_pp_expected_ident_in_arg_list);
2920 return true;
2921 case tok::ellipsis: // #define X(... -> C99 varargs
2922 if (!LangOpts.C99)
2923 Diag(Tok, DiagID: LangOpts.CPlusPlus11 ?
2924 diag::warn_cxx98_compat_variadic_macro :
2925 diag::ext_variadic_macro);
2926
2927 // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2928 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2929 Diag(Tok, DiagID: diag::ext_pp_opencl_variadic_macros);
2930 }
2931
2932 // Lex the token after the identifier.
2933 LexUnexpandedNonComment(Result&: Tok);
2934 if (Tok.isNot(K: tok::r_paren)) {
2935 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2936 return true;
2937 }
2938 // Add the __VA_ARGS__ identifier as a parameter.
2939 Parameters.push_back(Elt: Ident__VA_ARGS__);
2940 MI->setIsC99Varargs();
2941 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2942 return false;
2943 case tok::eod: // #define X(
2944 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2945 return true;
2946 default:
2947 // Handle keywords and identifiers here to accept things like
2948 // #define Foo(for) for.
2949 IdentifierInfo *II = Tok.getIdentifierInfo();
2950 if (!II) {
2951 // #define X(1
2952 Diag(Tok, DiagID: diag::err_pp_invalid_tok_in_arg_list);
2953 return true;
2954 }
2955
2956 // If this is already used as a parameter, it is used multiple times (e.g.
2957 // #define X(A,A.
2958 if (llvm::is_contained(Range&: Parameters, Element: II)) { // C99 6.10.3p6
2959 Diag(Tok, DiagID: diag::err_pp_duplicate_name_in_arg_list) << II;
2960 return true;
2961 }
2962
2963 // Add the parameter to the macro info.
2964 Parameters.push_back(Elt: II);
2965
2966 // Lex the token after the identifier.
2967 LexUnexpandedNonComment(Result&: Tok);
2968
2969 switch (Tok.getKind()) {
2970 default: // #define X(A B
2971 Diag(Tok, DiagID: diag::err_pp_expected_comma_in_arg_list);
2972 return true;
2973 case tok::r_paren: // #define X(A)
2974 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2975 return false;
2976 case tok::comma: // #define X(A,
2977 break;
2978 case tok::ellipsis: // #define X(A... -> GCC extension
2979 // Diagnose extension.
2980 Diag(Tok, DiagID: diag::ext_named_variadic_macro);
2981
2982 // Lex the token after the identifier.
2983 LexUnexpandedNonComment(Result&: Tok);
2984 if (Tok.isNot(K: tok::r_paren)) {
2985 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2986 return true;
2987 }
2988
2989 MI->setIsGNUVarargs();
2990 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2991 return false;
2992 }
2993 }
2994 }
2995}
2996
2997static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2998 const LangOptions &LOptions) {
2999 if (MI->getNumTokens() == 1) {
3000 const Token &Value = MI->getReplacementToken(Tok: 0);
3001
3002 // Macro that is identity, like '#define inline inline' is a valid pattern.
3003 if (MacroName.getKind() == Value.getKind())
3004 return true;
3005
3006 // Macro that maps a keyword to the same keyword decorated with leading/
3007 // trailing underscores is a valid pattern:
3008 // #define inline __inline
3009 // #define inline __inline__
3010 // #define inline _inline (in MS compatibility mode)
3011 StringRef MacroText = MacroName.getIdentifierInfo()->getName();
3012 if (IdentifierInfo *II = Value.getIdentifierInfo()) {
3013 if (!II->isKeyword(LangOpts: LOptions))
3014 return false;
3015 StringRef ValueText = II->getName();
3016 StringRef TrimmedValue = ValueText;
3017 if (!ValueText.starts_with(Prefix: "__")) {
3018 if (ValueText.starts_with(Prefix: "_"))
3019 TrimmedValue = TrimmedValue.drop_front(N: 1);
3020 else
3021 return false;
3022 } else {
3023 TrimmedValue = TrimmedValue.drop_front(N: 2);
3024 if (TrimmedValue.ends_with(Suffix: "__"))
3025 TrimmedValue = TrimmedValue.drop_back(N: 2);
3026 }
3027 return TrimmedValue == MacroText;
3028 } else {
3029 return false;
3030 }
3031 }
3032
3033 // #define inline
3034 return MacroName.isOneOf(Ks: tok::kw_extern, Ks: tok::kw_inline, Ks: tok::kw_static,
3035 Ks: tok::kw_const) &&
3036 MI->getNumTokens() == 0;
3037}
3038
3039// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
3040// entire line) of the macro's tokens and adds them to MacroInfo, and while
3041// doing so performs certain validity checks including (but not limited to):
3042// - # (stringization) is followed by a macro parameter
3043//
3044// Returns a nullptr if an invalid sequence of tokens is encountered or returns
3045// a pointer to a MacroInfo object.
3046
3047MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
3048 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
3049
3050 Token LastTok = MacroNameTok;
3051 // Create the new macro.
3052 MacroInfo *const MI = AllocateMacroInfo(L: MacroNameTok.getLocation());
3053
3054 Token Tok;
3055 LexUnexpandedToken(Result&: Tok);
3056
3057 // Ensure we consume the rest of the macro body if errors occur.
3058 llvm::scope_exit _([&]() {
3059 // The flag indicates if we are still waiting for 'eod'.
3060 if (CurLexer->ParsingPreprocessorDirective)
3061 DiscardUntilEndOfDirective();
3062 });
3063
3064 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
3065 // within their appropriate context.
3066 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
3067
3068 // If this is a function-like macro definition, parse the argument list,
3069 // marking each of the identifiers as being used as macro arguments. Also,
3070 // check other constraints on the first token of the macro body.
3071 if (Tok.is(K: tok::eod)) {
3072 if (ImmediatelyAfterHeaderGuard) {
3073 // Save this macro information since it may part of a header guard.
3074 CurPPLexer->MIOpt.SetDefinedMacro(M: MacroNameTok.getIdentifierInfo(),
3075 Loc: MacroNameTok.getLocation());
3076 }
3077 // If there is no body to this macro, we have no special handling here.
3078 } else if (Tok.hasLeadingSpace()) {
3079 // This is a normal token with leading space. Clear the leading space
3080 // marker on the first token to get proper expansion.
3081 Tok.clearFlag(Flag: Token::LeadingSpace);
3082 } else if (Tok.is(K: tok::l_paren)) {
3083 // This is a function-like macro definition. Read the argument list.
3084 MI->setIsFunctionLike();
3085 if (ReadMacroParameterList(MI, Tok&: LastTok))
3086 return nullptr;
3087
3088 // If this is a definition of an ISO C/C++ variadic function-like macro (not
3089 // using the GNU named varargs extension) inform our variadic scope guard
3090 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
3091 // allowed only within the definition of a variadic macro.
3092
3093 if (MI->isC99Varargs()) {
3094 VariadicMacroScopeGuard.enterScope();
3095 }
3096
3097 // Read the first token after the arg list for down below.
3098 LexUnexpandedToken(Result&: Tok);
3099 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
3100 // C99 requires whitespace between the macro definition and the body. Emit
3101 // a diagnostic for something like "#define X+".
3102 Diag(Tok, DiagID: diag::ext_c99_whitespace_required_after_macro_name);
3103 } else {
3104 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
3105 // first character of a replacement list is not a character required by
3106 // subclause 5.2.1, then there shall be white-space separation between the
3107 // identifier and the replacement list.". 5.2.1 lists this set:
3108 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
3109 // is irrelevant here.
3110 bool isInvalid = false;
3111 if (Tok.is(K: tok::at)) // @ is not in the list above.
3112 isInvalid = true;
3113 else if (Tok.is(K: tok::unknown)) {
3114 // If we have an unknown token, it is something strange like "`". Since
3115 // all of valid characters would have lexed into a single character
3116 // token of some sort, we know this is not a valid case.
3117 isInvalid = true;
3118 }
3119 if (isInvalid)
3120 Diag(Tok, DiagID: diag::ext_missing_whitespace_after_macro_name);
3121 else
3122 Diag(Tok, DiagID: diag::warn_missing_whitespace_after_macro_name);
3123 }
3124
3125 if (!Tok.is(K: tok::eod))
3126 LastTok = Tok;
3127
3128 SmallVector<Token, 16> Tokens;
3129
3130 // Read the rest of the macro body.
3131 if (MI->isObjectLike()) {
3132 // Object-like macros are very simple, just read their body.
3133 while (Tok.isNot(K: tok::eod)) {
3134 LastTok = Tok;
3135 Tokens.push_back(Elt: Tok);
3136 // Get the next token of the macro.
3137 LexUnexpandedToken(Result&: Tok);
3138 }
3139 } else {
3140 // Otherwise, read the body of a function-like macro. While we are at it,
3141 // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3142 // parameters in function-like macro expansions.
3143
3144 VAOptDefinitionContext VAOCtx(*this);
3145
3146 while (Tok.isNot(K: tok::eod)) {
3147 LastTok = Tok;
3148
3149 if (!Tok.isOneOf(Ks: tok::hash, Ks: tok::hashat, Ks: tok::hashhash)) {
3150 Tokens.push_back(Elt: Tok);
3151
3152 if (VAOCtx.isVAOptToken(T: Tok)) {
3153 // If we're already within a VAOPT, emit an error.
3154 if (VAOCtx.isInVAOpt()) {
3155 Diag(Tok, DiagID: diag::err_pp_vaopt_nested_use);
3156 return nullptr;
3157 }
3158 // Ensure VAOPT is followed by a '(' .
3159 LexUnexpandedToken(Result&: Tok);
3160 if (Tok.isNot(K: tok::l_paren)) {
3161 Diag(Tok, DiagID: diag::err_pp_missing_lparen_in_vaopt_use);
3162 return nullptr;
3163 }
3164 Tokens.push_back(Elt: Tok);
3165 VAOCtx.sawVAOptFollowedByOpeningParens(LParenLoc: Tok.getLocation());
3166 LexUnexpandedToken(Result&: Tok);
3167 if (Tok.is(K: tok::hashhash)) {
3168 Diag(Tok, DiagID: diag::err_vaopt_paste_at_start);
3169 return nullptr;
3170 }
3171 continue;
3172 } else if (VAOCtx.isInVAOpt()) {
3173 if (Tok.is(K: tok::r_paren)) {
3174 if (VAOCtx.sawClosingParen()) {
3175 assert(Tokens.size() >= 3 &&
3176 "Must have seen at least __VA_OPT__( "
3177 "and a subsequent tok::r_paren");
3178 if (Tokens[Tokens.size() - 2].is(K: tok::hashhash)) {
3179 Diag(Tok, DiagID: diag::err_vaopt_paste_at_end);
3180 return nullptr;
3181 }
3182 }
3183 } else if (Tok.is(K: tok::l_paren)) {
3184 VAOCtx.sawOpeningParen(LParenLoc: Tok.getLocation());
3185 }
3186 }
3187 // Get the next token of the macro.
3188 LexUnexpandedToken(Result&: Tok);
3189 continue;
3190 }
3191
3192 // If we're in -traditional mode, then we should ignore stringification
3193 // and token pasting. Mark the tokens as unknown so as not to confuse
3194 // things.
3195 if (getLangOpts().TraditionalCPP) {
3196 Tok.setKind(tok::unknown);
3197 Tokens.push_back(Elt: Tok);
3198
3199 // Get the next token of the macro.
3200 LexUnexpandedToken(Result&: Tok);
3201 continue;
3202 }
3203
3204 if (Tok.is(K: tok::hashhash)) {
3205 // If we see token pasting, check if it looks like the gcc comma
3206 // pasting extension. We'll use this information to suppress
3207 // diagnostics later on.
3208
3209 // Get the next token of the macro.
3210 LexUnexpandedToken(Result&: Tok);
3211
3212 if (Tok.is(K: tok::eod)) {
3213 Tokens.push_back(Elt: LastTok);
3214 break;
3215 }
3216
3217 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3218 Tokens[Tokens.size() - 1].is(K: tok::comma))
3219 MI->setHasCommaPasting();
3220
3221 // Things look ok, add the '##' token to the macro.
3222 Tokens.push_back(Elt: LastTok);
3223 continue;
3224 }
3225
3226 // Our Token is a stringization operator.
3227 // Get the next token of the macro.
3228 LexUnexpandedToken(Result&: Tok);
3229
3230 // Check for a valid macro arg identifier or __VA_OPT__.
3231 if (!VAOCtx.isVAOptToken(T: Tok) &&
3232 (Tok.getIdentifierInfo() == nullptr ||
3233 MI->getParameterNum(Arg: Tok.getIdentifierInfo()) == -1)) {
3234
3235 // If this is assembler-with-cpp mode, we accept random gibberish after
3236 // the '#' because '#' is often a comment character. However, change
3237 // the kind of the token to tok::unknown so that the preprocessor isn't
3238 // confused.
3239 if (getLangOpts().AsmPreprocessor && Tok.isNot(K: tok::eod)) {
3240 LastTok.setKind(tok::unknown);
3241 Tokens.push_back(Elt: LastTok);
3242 continue;
3243 } else {
3244 Diag(Tok, DiagID: diag::err_pp_stringize_not_parameter)
3245 << LastTok.is(K: tok::hashat);
3246 return nullptr;
3247 }
3248 }
3249
3250 // Things look ok, add the '#' and param name tokens to the macro.
3251 Tokens.push_back(Elt: LastTok);
3252
3253 // If the token following '#' is VAOPT, let the next iteration handle it
3254 // and check it for correctness, otherwise add the token and prime the
3255 // loop with the next one.
3256 if (!VAOCtx.isVAOptToken(T: Tok)) {
3257 Tokens.push_back(Elt: Tok);
3258 LastTok = Tok;
3259
3260 // Get the next token of the macro.
3261 LexUnexpandedToken(Result&: Tok);
3262 }
3263 }
3264 if (VAOCtx.isInVAOpt()) {
3265 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3266 Diag(Tok, DiagID: diag::err_pp_expected_after)
3267 << LastTok.getKind() << tok::r_paren;
3268 Diag(Loc: VAOCtx.getUnmatchedOpeningParenLoc(), DiagID: diag::note_matching) << tok::l_paren;
3269 return nullptr;
3270 }
3271 }
3272 MI->setDefinitionEndLoc(LastTok.getLocation());
3273
3274 MI->setTokens(Tokens, PPAllocator&: BP);
3275 return MI;
3276}
3277
3278static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3279 return II->isStr(Str: "__strong") || II->isStr(Str: "__weak") ||
3280 II->isStr(Str: "__unsafe_unretained") || II->isStr(Str: "__autoreleasing");
3281}
3282
3283/// HandleDefineDirective - Implements \#define. This consumes the entire macro
3284/// line then lets the caller lex the next real token.
3285void Preprocessor::HandleDefineDirective(
3286 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3287 ++NumDefined;
3288
3289 Token MacroNameTok;
3290 bool MacroShadowsKeyword;
3291 ReadMacroName(MacroNameTok, isDefineUndef: MU_Define, ShadowFlag: &MacroShadowsKeyword);
3292
3293 // Error reading macro name? If so, diagnostic already issued.
3294 if (MacroNameTok.is(K: tok::eod))
3295 return;
3296
3297 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3298 // Issue a final pragma warning if we're defining a macro that was has been
3299 // undefined and is being redefined.
3300 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3301 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3302
3303 // If we are supposed to keep comments in #defines, reenable comment saving
3304 // mode.
3305 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3306
3307 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3308 MacroNameTok, ImmediatelyAfterHeaderGuard);
3309
3310 if (!MI) return;
3311
3312 if (MacroShadowsKeyword &&
3313 !isConfigurationPattern(MacroName&: MacroNameTok, MI, LOptions: getLangOpts())) {
3314 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_hides_keyword);
3315 }
3316 // Check that there is no paste (##) operator at the beginning or end of the
3317 // replacement list.
3318 unsigned NumTokens = MI->getNumTokens();
3319 if (NumTokens != 0) {
3320 if (MI->getReplacementToken(Tok: 0).is(K: tok::hashhash)) {
3321 Diag(Tok: MI->getReplacementToken(Tok: 0), DiagID: diag::err_paste_at_start);
3322 return;
3323 }
3324 if (MI->getReplacementToken(Tok: NumTokens-1).is(K: tok::hashhash)) {
3325 Diag(Tok: MI->getReplacementToken(Tok: NumTokens-1), DiagID: diag::err_paste_at_end);
3326 return;
3327 }
3328 }
3329
3330 // When skipping just warn about macros that do not match.
3331 if (SkippingUntilPCHThroughHeader) {
3332 const MacroInfo *OtherMI = getMacroInfo(II: MacroNameTok.getIdentifierInfo());
3333 if (!OtherMI || !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3334 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt))
3335 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::warn_pp_macro_def_mismatch_with_pch)
3336 << MacroNameTok.getIdentifierInfo();
3337 // Issue the diagnostic but allow the change if msvc extensions are enabled
3338 if (!LangOpts.MicrosoftExt)
3339 return;
3340 }
3341
3342 // Finally, if this identifier already had a macro defined for it, verify that
3343 // the macro bodies are identical, and issue diagnostics if they are not.
3344 if (const MacroInfo *OtherMI=getMacroInfo(II: MacroNameTok.getIdentifierInfo())) {
3345 // Final macros are hard-mode: they always warn. Even if the bodies are
3346 // identical. Even if they are in system headers. Even if they are things we
3347 // would silently allow in the past.
3348 if (MacroNameTok.getIdentifierInfo()->isFinal())
3349 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3350
3351 // In Objective-C, ignore attempts to directly redefine the builtin
3352 // definitions of the ownership qualifiers. It's still possible to
3353 // #undef them.
3354 if (getLangOpts().ObjC &&
3355 SourceMgr.getFileID(SpellingLoc: OtherMI->getDefinitionLoc()) ==
3356 getPredefinesFileID() &&
3357 isObjCProtectedMacro(II: MacroNameTok.getIdentifierInfo())) {
3358 // Warn if it changes the tokens.
3359 if ((!getDiagnostics().getSuppressSystemWarnings() ||
3360 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) &&
3361 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3362 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3363 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::warn_pp_objc_macro_redef_ignored);
3364 }
3365 assert(!OtherMI->isWarnIfUnused());
3366 return;
3367 }
3368
3369 // It is very common for system headers to have tons of macro redefinitions
3370 // and for warnings to be disabled in system headers. If this is the case,
3371 // then don't bother calling MacroInfo::isIdenticalTo.
3372 if (!getDiagnostics().getSuppressSystemWarnings() ||
3373 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) {
3374
3375 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3376 Diag(Loc: OtherMI->getDefinitionLoc(), DiagID: diag::pp_macro_not_used);
3377
3378 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3379 // C++ [cpp.predefined]p4, but allow it as an extension.
3380 if (isLanguageDefinedBuiltin(SourceMgr, MI: OtherMI, MacroName: II->getName()))
3381 Diag(Tok: MacroNameTok, DiagID: diag::ext_pp_redef_builtin_macro);
3382 // Macros must be identical. This means all tokens and whitespace
3383 // separation must be the same. C99 6.10.3p2.
3384 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3385 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this, /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3386 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::ext_pp_macro_redef)
3387 << MacroNameTok.getIdentifierInfo();
3388 Diag(Loc: OtherMI->getDefinitionLoc(), DiagID: diag::note_previous_definition);
3389 }
3390 }
3391 if (OtherMI->isWarnIfUnused())
3392 WarnUnusedMacroLocs.erase(V: OtherMI->getDefinitionLoc());
3393 }
3394
3395 DefMacroDirective *MD =
3396 appendDefMacroDirective(II: MacroNameTok.getIdentifierInfo(), MI);
3397
3398 assert(!MI->isUsed());
3399 // If we need warning for not using the macro, add its location in the
3400 // warn-because-unused-macro set. If it gets used it will be removed from set.
3401 if (getSourceManager().isInMainFile(Loc: MI->getDefinitionLoc()) &&
3402 !Diags->isIgnored(DiagID: diag::pp_macro_not_used, Loc: MI->getDefinitionLoc()) &&
3403 !MacroExpansionInDirectivesOverride &&
3404 getSourceManager().getFileID(SpellingLoc: MI->getDefinitionLoc()) !=
3405 getPredefinesFileID()) {
3406 MI->setIsWarnIfUnused(true);
3407 WarnUnusedMacroLocs.insert(V: MI->getDefinitionLoc());
3408 }
3409
3410 // If the callbacks want to know, tell them about the macro definition.
3411 if (Callbacks)
3412 Callbacks->MacroDefined(MacroNameTok, MD);
3413}
3414
3415/// HandleUndefDirective - Implements \#undef.
3416///
3417void Preprocessor::HandleUndefDirective() {
3418 ++NumUndefined;
3419
3420 Token MacroNameTok;
3421 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
3422
3423 // Error reading macro name? If so, diagnostic already issued.
3424 if (MacroNameTok.is(K: tok::eod))
3425 return;
3426
3427 // Check to see if this is the last token on the #undef line.
3428 CheckEndOfDirective(DirType: "undef");
3429
3430 // Okay, we have a valid identifier to undef.
3431 auto *II = MacroNameTok.getIdentifierInfo();
3432 auto MD = getMacroDefinition(II);
3433 UndefMacroDirective *Undef = nullptr;
3434
3435 if (II->isFinal())
3436 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/true);
3437
3438 // If the macro is not defined, this is a noop undef.
3439 if (const MacroInfo *MI = MD.getMacroInfo()) {
3440 if (!MI->isUsed() && MI->isWarnIfUnused())
3441 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::pp_macro_not_used);
3442
3443 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3444 // C++ [cpp.predefined]p4, but allow it as an extension.
3445 if (isLanguageDefinedBuiltin(SourceMgr, MI, MacroName: II->getName()))
3446 Diag(Tok: MacroNameTok, DiagID: diag::ext_pp_undef_builtin_macro);
3447
3448 if (MI->isWarnIfUnused())
3449 WarnUnusedMacroLocs.erase(V: MI->getDefinitionLoc());
3450
3451 Undef = AllocateUndefMacroDirective(UndefLoc: MacroNameTok.getLocation());
3452 }
3453
3454 // If the callbacks want to know, tell them about the macro #undef.
3455 // Note: no matter if the macro was defined or not.
3456 if (Callbacks)
3457 Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3458
3459 if (Undef)
3460 appendMacroDirective(II, MD: Undef);
3461}
3462
3463//===----------------------------------------------------------------------===//
3464// Preprocessor Conditional Directive Handling.
3465//===----------------------------------------------------------------------===//
3466
3467/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef
3468/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is
3469/// true if any tokens have been returned or pp-directives activated before this
3470/// \#ifndef has been lexed.
3471///
3472void Preprocessor::HandleIfdefDirective(Token &Result,
3473 const Token &HashToken,
3474 bool isIfndef,
3475 bool ReadAnyTokensBeforeDirective) {
3476 ++NumIf;
3477 Token DirectiveTok = Result;
3478
3479 Token MacroNameTok;
3480 ReadMacroName(MacroNameTok);
3481
3482 // Error reading macro name? If so, diagnostic already issued.
3483 if (MacroNameTok.is(K: tok::eod)) {
3484 // Skip code until we get to #endif. This helps with recovery by not
3485 // emitting an error when the #endif is reached.
3486 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3487 IfTokenLoc: DirectiveTok.getLocation(),
3488 /*Foundnonskip*/ FoundNonSkipPortion: false, /*FoundElse*/ false);
3489 return;
3490 }
3491
3492 emitMacroExpansionWarnings(Identifier: MacroNameTok, /*IsIfnDef=*/true);
3493
3494 // Check to see if this is the last token on the #if[n]def line.
3495 CheckEndOfDirective(DirType: isIfndef ? "ifndef" : "ifdef");
3496
3497 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3498 auto MD = getMacroDefinition(II: MII);
3499 MacroInfo *MI = MD.getMacroInfo();
3500
3501 if (CurPPLexer->getConditionalStackDepth() == 0) {
3502 // If the start of a top-level #ifdef and if the macro is not defined,
3503 // inform MIOpt that this might be the start of a proper include guard.
3504 // Otherwise it is some other form of unknown conditional which we can't
3505 // handle.
3506 if (!ReadAnyTokensBeforeDirective && !MI) {
3507 assert(isIfndef && "#ifdef shouldn't reach here");
3508 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: MII, Loc: MacroNameTok.getLocation());
3509 } else
3510 CurPPLexer->MIOpt.EnterTopLevelConditional();
3511 }
3512
3513 // If there is a macro, process it.
3514 if (MI) // Mark it used.
3515 markMacroAsUsed(MI);
3516
3517 if (Callbacks) {
3518 if (isIfndef)
3519 Callbacks->Ifndef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3520 else
3521 Callbacks->Ifdef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3522 }
3523
3524 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3525 getSourceManager().isInMainFile(Loc: DirectiveTok.getLocation());
3526
3527 // Should we include the stuff contained by this directive?
3528 if (PPOpts.SingleFileParseMode && !MI) {
3529 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3530 // the directive blocks.
3531 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3532 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: false,
3533 /*foundelse*/FoundElse: false);
3534 } else if (PPOpts.SingleModuleParseMode && !MI) {
3535 // In 'single-module-parse mode' undefined identifiers trigger skipping of
3536 // all the directive blocks. We lie here and set FoundNonSkipPortion so that
3537 // even any \#else blocks get skipped.
3538 SkipExcludedConditionalBlock(
3539 HashTokenLoc: HashToken.getLocation(), IfTokenLoc: DirectiveTok.getLocation(),
3540 /*FoundNonSkipPortion=*/true, /*FoundElse=*/false);
3541 } else if (!MI == isIfndef || RetainExcludedCB) {
3542 // Yes, remember that we are inside a conditional, then lex the next token.
3543 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3544 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: true,
3545 /*foundelse*/FoundElse: false);
3546 } else {
3547 // No, skip the contents of this block.
3548 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3549 IfTokenLoc: DirectiveTok.getLocation(),
3550 /*Foundnonskip*/ FoundNonSkipPortion: false,
3551 /*FoundElse*/ false);
3552 }
3553}
3554
3555/// HandleIfDirective - Implements the \#if directive.
3556///
3557void Preprocessor::HandleIfDirective(Token &IfToken,
3558 const Token &HashToken,
3559 bool ReadAnyTokensBeforeDirective) {
3560 ++NumIf;
3561
3562 // Parse and evaluate the conditional expression.
3563 IdentifierInfo *IfNDefMacro = nullptr;
3564 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3565 const bool ConditionalTrue = DER.Conditional;
3566 // Lexer might become invalid if we hit code completion point while evaluating
3567 // expression.
3568 if (!CurPPLexer)
3569 return;
3570
3571 // If this condition is equivalent to #ifndef X, and if this is the first
3572 // directive seen, handle it for the multiple-include optimization.
3573 if (CurPPLexer->getConditionalStackDepth() == 0) {
3574 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3575 // FIXME: Pass in the location of the macro name, not the 'if' token.
3576 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: IfNDefMacro, Loc: IfToken.getLocation());
3577 else
3578 CurPPLexer->MIOpt.EnterTopLevelConditional();
3579 }
3580
3581 if (Callbacks)
3582 Callbacks->If(
3583 Loc: IfToken.getLocation(), ConditionRange: DER.ExprRange,
3584 ConditionValue: (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3585
3586 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3587 getSourceManager().isInMainFile(Loc: IfToken.getLocation());
3588
3589 // Should we include the stuff contained by this directive?
3590 if (PPOpts.SingleFileParseMode && DER.IncludedUndefinedIds) {
3591 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3592 // the directive blocks.
3593 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3594 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3595 } else if (PPOpts.SingleModuleParseMode && DER.IncludedUndefinedIds) {
3596 // In 'single-module-parse mode' undefined identifiers trigger skipping of
3597 // all the directive blocks. We lie here and set FoundNonSkipPortion so that
3598 // even any \#else blocks get skipped.
3599 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: IfToken.getLocation(),
3600 /*FoundNonSkipPortion=*/true,
3601 /*FoundElse=*/false);
3602 } else if (ConditionalTrue || RetainExcludedCB) {
3603 // Yes, remember that we are inside a conditional, then lex the next token.
3604 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3605 /*foundnonskip*/FoundNonSkip: true, /*foundelse*/FoundElse: false);
3606 } else {
3607 // No, skip the contents of this block.
3608 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: IfToken.getLocation(),
3609 /*Foundnonskip*/ FoundNonSkipPortion: false,
3610 /*FoundElse*/ false);
3611 }
3612}
3613
3614/// HandleEndifDirective - Implements the \#endif directive.
3615///
3616void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3617 ++NumEndif;
3618
3619 // Check that this is the whole directive.
3620 CheckEndOfDirective(DirType: "endif");
3621
3622 PPConditionalInfo CondInfo;
3623 if (CurPPLexer->popConditionalLevel(CI&: CondInfo)) {
3624 // No conditionals on the stack: this is an #endif without an #if.
3625 Diag(Tok: EndifToken, DiagID: diag::err_pp_endif_without_if);
3626 return;
3627 }
3628
3629 // If this the end of a top-level #endif, inform MIOpt.
3630 if (CurPPLexer->getConditionalStackDepth() == 0)
3631 CurPPLexer->MIOpt.ExitTopLevelConditional();
3632
3633 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3634 "This code should only be reachable in the non-skipping case!");
3635
3636 if (Callbacks)
3637 Callbacks->Endif(Loc: EndifToken.getLocation(), IfLoc: CondInfo.IfLoc);
3638}
3639
3640/// HandleElseDirective - Implements the \#else directive.
3641///
3642void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3643 ++NumElse;
3644
3645 // #else directive in a non-skipping conditional... start skipping.
3646 CheckEndOfDirective(DirType: "else");
3647
3648 PPConditionalInfo CI;
3649 if (CurPPLexer->popConditionalLevel(CI)) {
3650 Diag(Tok: Result, DiagID: diag::pp_err_else_without_if);
3651 return;
3652 }
3653
3654 // If this is a top-level #else, inform the MIOpt.
3655 if (CurPPLexer->getConditionalStackDepth() == 0)
3656 CurPPLexer->MIOpt.EnterTopLevelConditional();
3657
3658 // If this is a #else with a #else before it, report the error.
3659 if (CI.FoundElse) Diag(Tok: Result, DiagID: diag::pp_err_else_after_else);
3660
3661 if (Callbacks)
3662 Callbacks->Else(Loc: Result.getLocation(), IfLoc: CI.IfLoc);
3663
3664 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3665 getSourceManager().isInMainFile(Loc: Result.getLocation());
3666
3667 if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3668 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3669 // the directive blocks.
3670 CurPPLexer->pushConditionalLevel(DirectiveStart: CI.IfLoc, /*wasskip*/WasSkipping: false,
3671 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: true);
3672 return;
3673 }
3674
3675 // Finally, skip the rest of the contents of this block.
3676 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc,
3677 /*Foundnonskip*/ FoundNonSkipPortion: true,
3678 /*FoundElse*/ true, ElseLoc: Result.getLocation());
3679}
3680
3681/// Implements the \#elif, \#elifdef, and \#elifndef directives.
3682void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3683 const Token &HashToken,
3684 tok::PPKeywordKind Kind) {
3685 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif
3686 : Kind == tok::pp_elifdef ? PED_Elifdef
3687 : PED_Elifndef;
3688 ++NumElse;
3689
3690 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3691 switch (DirKind) {
3692 case PED_Elifdef:
3693 case PED_Elifndef:
3694 unsigned DiagID;
3695 if (LangOpts.CPlusPlus)
3696 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3697 : diag::ext_cxx23_pp_directive;
3698 else
3699 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3700 : diag::ext_c23_pp_directive;
3701 Diag(Tok: ElifToken, DiagID) << DirKind;
3702 break;
3703 default:
3704 break;
3705 }
3706
3707 // #elif directive in a non-skipping conditional... start skipping.
3708 // We don't care what the condition is, because we will always skip it (since
3709 // the block immediately before it was included).
3710 SourceRange ConditionRange = DiscardUntilEndOfDirective();
3711
3712 PPConditionalInfo CI;
3713 if (CurPPLexer->popConditionalLevel(CI)) {
3714 Diag(Tok: ElifToken, DiagID: diag::pp_err_elif_without_if) << DirKind;
3715 return;
3716 }
3717
3718 // If this is a top-level #elif, inform the MIOpt.
3719 if (CurPPLexer->getConditionalStackDepth() == 0)
3720 CurPPLexer->MIOpt.EnterTopLevelConditional();
3721
3722 // If this is a #elif with a #else before it, report the error.
3723 if (CI.FoundElse)
3724 Diag(Tok: ElifToken, DiagID: diag::pp_err_elif_after_else) << DirKind;
3725
3726 if (Callbacks) {
3727 switch (Kind) {
3728 case tok::pp_elif:
3729 Callbacks->Elif(Loc: ElifToken.getLocation(), ConditionRange,
3730 ConditionValue: PPCallbacks::CVK_NotEvaluated, IfLoc: CI.IfLoc);
3731 break;
3732 case tok::pp_elifdef:
3733 Callbacks->Elifdef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3734 break;
3735 case tok::pp_elifndef:
3736 Callbacks->Elifndef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3737 break;
3738 default:
3739 assert(false && "unexpected directive kind");
3740 break;
3741 }
3742 }
3743
3744 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3745 getSourceManager().isInMainFile(Loc: ElifToken.getLocation());
3746
3747 if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3748 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3749 // the directive blocks.
3750 CurPPLexer->pushConditionalLevel(DirectiveStart: ElifToken.getLocation(), /*wasskip*/WasSkipping: false,
3751 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3752 return;
3753 }
3754
3755 // Finally, skip the rest of the contents of this block.
3756 SkipExcludedConditionalBlock(
3757 HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc, /*Foundnonskip*/ FoundNonSkipPortion: true,
3758 /*FoundElse*/ CI.FoundElse, ElseLoc: ElifToken.getLocation());
3759}
3760
3761std::optional<LexEmbedParametersResult>
3762Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3763 LexEmbedParametersResult Result{};
3764 tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3765
3766 auto DiagMismatchedBracesAndSkipToEOD =
3767 [&](tok::TokenKind Expected,
3768 std::pair<tok::TokenKind, SourceLocation> Matches) {
3769 Diag(Tok: CurTok, DiagID: diag::err_expected) << Expected;
3770 Diag(Loc: Matches.second, DiagID: diag::note_matching) << Matches.first;
3771 if (CurTok.isNot(K: tok::eod))
3772 DiscardUntilEndOfDirective(Tmp&: CurTok);
3773 };
3774
3775 auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3776 if (CurTok.isNot(K: Kind)) {
3777 Diag(Tok: CurTok, DiagID: diag::err_expected) << Kind;
3778 if (CurTok.isNot(K: tok::eod))
3779 DiscardUntilEndOfDirective(Tmp&: CurTok);
3780 return false;
3781 }
3782 return true;
3783 };
3784
3785 // C23 6.10:
3786 // pp-parameter-name:
3787 // pp-standard-parameter
3788 // pp-prefixed-parameter
3789 //
3790 // pp-standard-parameter:
3791 // identifier
3792 //
3793 // pp-prefixed-parameter:
3794 // identifier :: identifier
3795 auto LexPPParameterName = [&]() -> std::optional<std::string> {
3796 // We expect the current token to be an identifier; if it's not, things
3797 // have gone wrong.
3798 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3799 return std::nullopt;
3800
3801 const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3802
3803 // Lex another token; it is either a :: or we're done with the parameter
3804 // name.
3805 LexNonComment(Result&: CurTok);
3806 if (CurTok.is(K: tok::coloncolon)) {
3807 // We found a ::, so lex another identifier token.
3808 LexNonComment(Result&: CurTok);
3809 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3810 return std::nullopt;
3811
3812 const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3813
3814 // Lex another token so we're past the name.
3815 LexNonComment(Result&: CurTok);
3816 return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3817 }
3818 return Prefix->getName().str();
3819 };
3820
3821 // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3822 // this document as an identifier pp_param and an identifier of the form
3823 // __pp_param__ shall behave the same when used as a preprocessor parameter,
3824 // except for the spelling.
3825 auto NormalizeParameterName = [](StringRef Name) {
3826 if (Name.size() > 4 && Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "__"))
3827 return Name.substr(Start: 2, N: Name.size() - 4);
3828 return Name;
3829 };
3830
3831 auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3832 // we have a limit parameter and its internals are processed using
3833 // evaluation rules from #if.
3834 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3835 return std::nullopt;
3836
3837 // We do not consume the ( because EvaluateDirectiveExpression will lex
3838 // the next token for us.
3839 IdentifierInfo *ParameterIfNDef = nullptr;
3840 bool EvaluatedDefined;
3841 DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3842 IfNDefMacro&: ParameterIfNDef, Tok&: CurTok, EvaluatedDefined, /*CheckForEOD=*/CheckForEoD: false);
3843
3844 if (!LimitEvalResult.Value) {
3845 // If there was an error evaluating the directive expression, we expect
3846 // to be at the end of directive token.
3847 assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3848 return std::nullopt;
3849 }
3850
3851 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3852 return std::nullopt;
3853
3854 // Eat the ).
3855 LexNonComment(Result&: CurTok);
3856
3857 // C23 6.10.3.2p2: The token defined shall not appear within the constant
3858 // expression.
3859 if (EvaluatedDefined) {
3860 Diag(Tok: CurTok, DiagID: diag::err_defined_in_pp_embed);
3861 return std::nullopt;
3862 }
3863
3864 if (LimitEvalResult.Value) {
3865 const llvm::APSInt &Result = *LimitEvalResult.Value;
3866 if (Result.isNegative()) {
3867 Diag(Tok: CurTok, DiagID: diag::err_requires_positive_value)
3868 << toString(I: Result, Radix: 10) << /*positive*/ 0;
3869 if (CurTok.isNot(K: EndTokenKind))
3870 DiscardUntilEndOfDirective(Tmp&: CurTok);
3871 return std::nullopt;
3872 }
3873 return Result.getLimitedValue();
3874 }
3875 return std::nullopt;
3876 };
3877
3878 auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3879 switch (Kind) {
3880 case tok::l_paren:
3881 return tok::r_paren;
3882 case tok::l_brace:
3883 return tok::r_brace;
3884 case tok::l_square:
3885 return tok::r_square;
3886 default:
3887 llvm_unreachable("should not get here");
3888 }
3889 };
3890
3891 auto LexParenthesizedBalancedTokenSoup =
3892 [&](llvm::SmallVectorImpl<Token> &Tokens) {
3893 std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3894
3895 // We expect the current token to be a left paren.
3896 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3897 return false;
3898 LexNonComment(Result&: CurTok); // Eat the (
3899
3900 bool WaitingForInnerCloseParen = false;
3901 while (CurTok.isNot(K: tok::eod) &&
3902 (WaitingForInnerCloseParen || CurTok.isNot(K: tok::r_paren))) {
3903 switch (CurTok.getKind()) {
3904 default: // Shutting up diagnostics about not fully-covered switch.
3905 break;
3906 case tok::l_paren:
3907 WaitingForInnerCloseParen = true;
3908 [[fallthrough]];
3909 case tok::l_brace:
3910 case tok::l_square:
3911 BracketStack.push_back(x: {CurTok.getKind(), CurTok.getLocation()});
3912 break;
3913 case tok::r_paren:
3914 WaitingForInnerCloseParen = false;
3915 [[fallthrough]];
3916 case tok::r_brace:
3917 case tok::r_square: {
3918 if (BracketStack.empty()) {
3919 ExpectOrDiagAndSkipToEOD(tok::r_paren);
3920 return false;
3921 }
3922 tok::TokenKind Matching =
3923 GetMatchingCloseBracket(BracketStack.back().first);
3924 if (CurTok.getKind() != Matching) {
3925 DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3926 return false;
3927 }
3928 BracketStack.pop_back();
3929 } break;
3930 }
3931 Tokens.push_back(Elt: CurTok);
3932 LexNonComment(Result&: CurTok);
3933 }
3934
3935 // When we're done, we want to eat the closing paren.
3936 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3937 return false;
3938
3939 LexNonComment(Result&: CurTok); // Eat the )
3940 return true;
3941 };
3942
3943 LexNonComment(Result&: CurTok); // Prime the pump.
3944 while (!CurTok.isOneOf(Ks: EndTokenKind, Ks: tok::eod)) {
3945 SourceLocation ParamStartLoc = CurTok.getLocation();
3946 std::optional<std::string> ParamName = LexPPParameterName();
3947 if (!ParamName)
3948 return std::nullopt;
3949 StringRef Parameter = NormalizeParameterName(*ParamName);
3950
3951 // Lex the parameters (dependent on the parameter type we want!).
3952 //
3953 // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3954 // one time in the embed parameter sequence.
3955 if (Parameter == "limit") {
3956 if (Result.MaybeLimitParam)
3957 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3958
3959 std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3960 if (!Limit)
3961 return std::nullopt;
3962 Result.MaybeLimitParam =
3963 PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3964 } else if (Parameter == "clang::offset") {
3965 if (Result.MaybeOffsetParam)
3966 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3967
3968 std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3969 if (!Offset)
3970 return std::nullopt;
3971 Result.MaybeOffsetParam = PPEmbedParameterOffset{
3972 *Offset, {ParamStartLoc, CurTok.getLocation()}};
3973 } else if (Parameter == "prefix") {
3974 if (Result.MaybePrefixParam)
3975 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3976
3977 SmallVector<Token, 4> Soup;
3978 if (!LexParenthesizedBalancedTokenSoup(Soup))
3979 return std::nullopt;
3980 Result.MaybePrefixParam = PPEmbedParameterPrefix{
3981 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3982 } else if (Parameter == "suffix") {
3983 if (Result.MaybeSuffixParam)
3984 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3985
3986 SmallVector<Token, 4> Soup;
3987 if (!LexParenthesizedBalancedTokenSoup(Soup))
3988 return std::nullopt;
3989 Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3990 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3991 } else if (Parameter == "if_empty") {
3992 if (Result.MaybeIfEmptyParam)
3993 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3994
3995 SmallVector<Token, 4> Soup;
3996 if (!LexParenthesizedBalancedTokenSoup(Soup))
3997 return std::nullopt;
3998 Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3999 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
4000 } else {
4001 ++Result.UnrecognizedParams;
4002
4003 // If there's a left paren, we need to parse a balanced token sequence
4004 // and just eat those tokens.
4005 if (CurTok.is(K: tok::l_paren)) {
4006 SmallVector<Token, 4> Soup;
4007 if (!LexParenthesizedBalancedTokenSoup(Soup))
4008 return std::nullopt;
4009 }
4010 if (!ForHasEmbed) {
4011 Diag(Loc: ParamStartLoc, DiagID: diag::err_pp_unknown_parameter) << 1 << Parameter;
4012 if (CurTok.isNot(K: EndTokenKind))
4013 DiscardUntilEndOfDirective(Tmp&: CurTok);
4014 return std::nullopt;
4015 }
4016 }
4017 }
4018 return Result;
4019}
4020
4021void Preprocessor::HandleEmbedDirectiveImpl(
4022 SourceLocation HashLoc, const LexEmbedParametersResult &Params,
4023 StringRef BinaryContents, StringRef FileName) {
4024 if (BinaryContents.empty()) {
4025 // If we have no binary contents, the only thing we need to emit are the
4026 // if_empty tokens, if any.
4027 // FIXME: this loses AST fidelity; nothing in the compiler will see that
4028 // these tokens came from #embed. We have to hack around this when printing
4029 // preprocessed output. The same is true for prefix and suffix tokens.
4030 if (Params.MaybeIfEmptyParam) {
4031 ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
4032 size_t TokCount = Toks.size();
4033 auto NewToks = std::make_unique<Token[]>(num: TokCount);
4034 llvm::copy(Range&: Toks, Out: NewToks.get());
4035 EnterTokenStream(Toks: std::move(NewToks), NumToks: TokCount, DisableMacroExpansion: true, IsReinject: true);
4036 }
4037 return;
4038 }
4039
4040 size_t NumPrefixToks = Params.PrefixTokenCount(),
4041 NumSuffixToks = Params.SuffixTokenCount();
4042 size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
4043 size_t CurIdx = 0;
4044 auto Toks = std::make_unique<Token[]>(num: TotalNumToks);
4045
4046 // Add the prefix tokens, if any.
4047 if (Params.MaybePrefixParam) {
4048 llvm::copy(Range: Params.MaybePrefixParam->Tokens, Out: &Toks[CurIdx]);
4049 CurIdx += NumPrefixToks;
4050 }
4051
4052 EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
4053 Data->BinaryData = BinaryContents;
4054 Data->FileName = FileName;
4055
4056 Toks[CurIdx].startToken();
4057 Toks[CurIdx].setKind(tok::annot_embed);
4058 Toks[CurIdx].setAnnotationRange(HashLoc);
4059 Toks[CurIdx++].setAnnotationValue(Data);
4060
4061 // Now add the suffix tokens, if any.
4062 if (Params.MaybeSuffixParam) {
4063 llvm::copy(Range: Params.MaybeSuffixParam->Tokens, Out: &Toks[CurIdx]);
4064 CurIdx += NumSuffixToks;
4065 }
4066
4067 assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
4068 EnterTokenStream(Toks: std::move(Toks), NumToks: TotalNumToks, DisableMacroExpansion: true, IsReinject: true);
4069}
4070
4071void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc,
4072 Token &EmbedTok) {
4073 // Give the usual extension/compatibility warnings.
4074 if (LangOpts.C23)
4075 Diag(Tok: EmbedTok, DiagID: diag::warn_compat_pp_embed_directive);
4076 else
4077 Diag(Tok: EmbedTok, DiagID: diag::ext_pp_embed_directive)
4078 << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
4079
4080 // Parse the filename header
4081 Token FilenameTok;
4082 if (LexHeaderName(Result&: FilenameTok))
4083 return;
4084
4085 if (FilenameTok.isNot(K: tok::header_name)) {
4086 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_pp_expects_filename);
4087 if (FilenameTok.isNot(K: tok::eod))
4088 DiscardUntilEndOfDirective();
4089 return;
4090 }
4091
4092 // Parse the optional sequence of
4093 // directive-parameters:
4094 // identifier parameter-name-list[opt] directive-argument-list[opt]
4095 // directive-argument-list:
4096 // '(' balanced-token-sequence ')'
4097 // parameter-name-list:
4098 // '::' identifier parameter-name-list[opt]
4099 Token CurTok;
4100 std::optional<LexEmbedParametersResult> Params =
4101 LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
4102
4103 assert((Params || CurTok.is(tok::eod)) &&
4104 "expected success or to be at the end of the directive");
4105 if (!Params)
4106 return;
4107
4108 // Now, splat the data out!
4109 SmallString<128> FilenameBuffer;
4110 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
4111 StringRef OriginalFilename = Filename;
4112 bool isAngled =
4113 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
4114
4115 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
4116 // error.
4117 if (Filename.empty())
4118 return;
4119
4120 OptionalFileEntryRef MaybeFileRef =
4121 this->LookupEmbedFile(Filename, isAngled, /*OpenFile=*/true);
4122 if (!MaybeFileRef) {
4123 // could not find file
4124 if (Callbacks && Callbacks->EmbedFileNotFound(FileName: Filename)) {
4125 return;
4126 }
4127 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found) << Filename;
4128 return;
4129 }
4130
4131 if (MaybeFileRef->isDeviceFile()) {
4132 Diag(Tok: FilenameTok, DiagID: diag::err_pp_embed_device_file) << Filename;
4133 return;
4134 }
4135
4136 std::optional<llvm::MemoryBufferRef> MaybeFile =
4137 getSourceManager().getMemoryBufferForFileOrNone(File: *MaybeFileRef);
4138 if (!MaybeFile) {
4139 // could not find file
4140 Diag(Tok: FilenameTok, DiagID: diag::err_cannot_open_file)
4141 << Filename << "a buffer to the contents could not be created";
4142 return;
4143 }
4144 StringRef BinaryContents = MaybeFile->getBuffer();
4145
4146 // The order is important between 'offset' and 'limit'; we want to offset
4147 // first and then limit second; otherwise we may reduce the notional resource
4148 // size to something too small to offset into.
4149 if (Params->MaybeOffsetParam) {
4150 // FIXME: just like with the limit() and if_empty() parameters, this loses
4151 // source fidelity in the AST; it has no idea that there was an offset
4152 // involved.
4153 // offsets all the way to the end of the file make for an empty file.
4154 BinaryContents = BinaryContents.substr(Start: Params->MaybeOffsetParam->Offset);
4155 }
4156
4157 if (Params->MaybeLimitParam) {
4158 // FIXME: just like with the clang::offset() and if_empty() parameters,
4159 // this loses source fidelity in the AST; it has no idea there was a limit
4160 // involved.
4161 BinaryContents = BinaryContents.substr(Start: 0, N: Params->MaybeLimitParam->Limit);
4162 }
4163
4164 if (Callbacks)
4165 Callbacks->EmbedDirective(HashLoc, FileName: Filename, IsAngled: isAngled, File: MaybeFileRef,
4166 Params: *Params);
4167 // getSpelling() may return a buffer from the token itself or it may use the
4168 // SmallString buffer we provided. getSpelling() may also return a string that
4169 // is actually longer than FilenameTok.getLength(), so we first pass a
4170 // locally created buffer to getSpelling() to get the string of real length
4171 // and then we allocate a long living buffer because the buffer we used
4172 // previously will only live till the end of this function and we need
4173 // filename info to live longer.
4174 void *Mem = BP.Allocate(Size: OriginalFilename.size(), Alignment: alignof(char *));
4175 memcpy(dest: Mem, src: OriginalFilename.data(), n: OriginalFilename.size());
4176 StringRef FilenameToGo =
4177 StringRef(static_cast<char *>(Mem), OriginalFilename.size());
4178 HandleEmbedDirectiveImpl(HashLoc, Params: *Params, BinaryContents, FileName: FilenameToGo);
4179}
4180
4181/// HandleCXXImportDirective - Handle the C++ modules import directives
4182///
4183/// pp-import:
4184/// export[opt] import header-name pp-tokens[opt] ; new-line
4185/// export[opt] import header-name-tokens pp-tokens[opt] ; new-line
4186/// export[opt] import pp-tokens ; new-line
4187///
4188/// The header importing are replaced by annot_header_unit token, and the
4189/// lexed module name are replaced by annot_module_name token.
4190void Preprocessor::HandleCXXImportDirective(Token ImportTok) {
4191 assert(getLangOpts().CPlusPlusModules && ImportTok.is(tok::kw_import));
4192 llvm::SaveAndRestore<bool> SaveImportingCXXModules(
4193 this->ImportingCXXNamedModules, true);
4194
4195 if (LastExportKeyword.is(K: tok::kw_export))
4196 LastExportKeyword.startToken();
4197
4198 Token Tok;
4199 if (LexHeaderName(Result&: Tok)) {
4200 if (Tok.isNot(K: tok::eod))
4201 CheckEndOfDirective(DirType: ImportTok.getIdentifierInfo()->getName());
4202 return;
4203 }
4204
4205 SourceLocation UseLoc = ImportTok.getLocation();
4206 SmallVector<Token, 4> DirToks{ImportTok};
4207 SmallVector<IdentifierLoc, 2> Path;
4208 bool ImportingHeader = false;
4209 bool IsPartition = false;
4210 std::string FlatName;
4211 switch (Tok.getKind()) {
4212 case tok::header_name:
4213 ImportingHeader = true;
4214 DirToks.push_back(Elt: Tok);
4215 Lex(Result&: DirToks.emplace_back());
4216 break;
4217 case tok::colon:
4218 IsPartition = true;
4219 DirToks.push_back(Elt: Tok);
4220 UseLoc = Tok.getLocation();
4221 Lex(Result&: Tok);
4222 [[fallthrough]];
4223 case tok::identifier: {
4224 bool LeadingSpace = Tok.hasLeadingSpace();
4225 unsigned NumToksInDirective = DirToks.size();
4226 if (LexModuleNameContinue(Tok, UseLoc, Suffix&: DirToks, Path)) {
4227 if (Tok.isNot(K: tok::eod))
4228 CheckEndOfDirective(DirType: ImportTok.getIdentifierInfo()->getName(),
4229 /*EnableMacros=*/false, ExtraToks: &DirToks);
4230 EnterModuleSuffixTokenStream(Toks: DirToks);
4231 return;
4232 }
4233
4234 // Clean the module-name tokens and replace these tokens with
4235 // annot_module_name.
4236 DirToks.resize(N: NumToksInDirective);
4237 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(PP&: *this, Path);
4238 DirToks.emplace_back();
4239 DirToks.back().setKind(tok::annot_module_name);
4240 DirToks.back().setAnnotationRange(NameLoc->getRange());
4241 DirToks.back().setAnnotationValue(static_cast<void *>(NameLoc));
4242 DirToks.back().setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
4243 DirToks.push_back(Elt: Tok);
4244
4245 bool IsValid =
4246 (IsPartition && ModuleDeclState.isNamedModule()) || !IsPartition;
4247 if (Callbacks && IsValid) {
4248 if (IsPartition && ModuleDeclState.isNamedModule()) {
4249 FlatName += ModuleDeclState.getPrimaryName();
4250 FlatName += ":";
4251 }
4252
4253 FlatName += ModuleLoader::getFlatNameFromPath(Path);
4254 SourceLocation StartLoc = IsPartition ? UseLoc : Path[0].getLoc();
4255 IdentifierLoc FlatNameLoc(StartLoc, getIdentifierInfo(Name: FlatName));
4256
4257 // We don't/shouldn't load the standard c++20 modules when preprocessing.
4258 // so the imported module is nullptr.
4259 Callbacks->moduleImport(ImportLoc: ImportTok.getLocation(),
4260 Path: ModuleIdPath(FlatNameLoc),
4261 /*Imported=*/nullptr);
4262 }
4263 break;
4264 }
4265 default:
4266 DirToks.push_back(Elt: Tok);
4267 break;
4268 }
4269
4270 // Consume the pp-import-suffix and expand any macros in it now, if we're not
4271 // at the semicolon already.
4272 if (!DirToks.back().isOneOf(Ks: tok::semi, Ks: tok::eod))
4273 CollectPPImportSuffix(Toks&: DirToks);
4274
4275 if (DirToks.back().isNot(K: tok::eod))
4276 CheckEndOfDirective(DirType: ImportTok.getIdentifierInfo()->getName());
4277 else
4278 DirToks.pop_back();
4279
4280 // This is not a pp-import after all.
4281 if (DirToks.back().isNot(K: tok::semi)) {
4282 EnterModuleSuffixTokenStream(Toks: DirToks);
4283 return;
4284 }
4285
4286 if (ImportingHeader) {
4287 // C++2a [cpp.module]p1:
4288 // The ';' preprocessing-token terminating a pp-import shall not have
4289 // been produced by macro replacement.
4290 SourceLocation SemiLoc = DirToks.back().getLocation();
4291 if (SemiLoc.isMacroID())
4292 Diag(Loc: SemiLoc, DiagID: diag::err_header_import_semi_in_macro);
4293
4294 auto Action = HandleHeaderIncludeOrImport(
4295 /*HashLoc*/ SourceLocation(), IncludeTok&: ImportTok, FilenameTok&: Tok, EndLoc: SemiLoc);
4296 switch (Action.Kind) {
4297 case ImportAction::None:
4298 break;
4299
4300 case ImportAction::ModuleBegin:
4301 // Let the parser know we're textually entering the module.
4302 DirToks.emplace_back();
4303 DirToks.back().startToken();
4304 DirToks.back().setKind(tok::annot_module_begin);
4305 DirToks.back().setLocation(SemiLoc);
4306 DirToks.back().setAnnotationEndLoc(SemiLoc);
4307 DirToks.back().setAnnotationValue(Action.ModuleForHeader);
4308 [[fallthrough]];
4309
4310 case ImportAction::ModuleImport:
4311 case ImportAction::HeaderUnitImport:
4312 case ImportAction::SkippedModuleImport:
4313 // We chose to import (or textually enter) the file. Convert the
4314 // header-name token into a header unit annotation token.
4315 DirToks[1].setKind(tok::annot_header_unit);
4316 DirToks[1].setAnnotationEndLoc(DirToks[0].getLocation());
4317 DirToks[1].setAnnotationValue(Action.ModuleForHeader);
4318 // FIXME: Call the moduleImport callback?
4319 break;
4320 case ImportAction::Failure:
4321 assert(TheModuleLoader.HadFatalFailure &&
4322 "This should be an early exit only to a fatal error");
4323 CurLexer->cutOffLexing();
4324 return;
4325 }
4326 }
4327
4328 EnterModuleSuffixTokenStream(Toks: DirToks);
4329}
4330
4331/// HandleCXXModuleDirective - Handle C++ module declaration directives.
4332///
4333/// pp-module:
4334/// export[opt] module pp-tokens[opt] ; new-line
4335///
4336/// pp-module-name:
4337/// pp-module-name-qualifier[opt] identifier
4338/// pp-module-partition:
4339/// : pp-module-name-qualifier[opt] identifier
4340/// pp-module-name-qualifier:
4341/// identifier .
4342/// pp-module-name-qualifier identifier .
4343///
4344/// global-module-fragment:
4345/// module-keyword ; declaration-seq[opt]
4346///
4347/// private-module-fragment:
4348/// module-keyword : private ; declaration-seq[opt]
4349///
4350/// The lexed module name are replaced by annot_module_name token.
4351void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) {
4352 assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module));
4353 Token Introducer = ModuleTok;
4354 if (LastExportKeyword.is(K: tok::kw_export)) {
4355 Introducer = LastExportKeyword;
4356 LastExportKeyword.startToken();
4357 }
4358
4359 SourceLocation StartLoc = Introducer.getLocation();
4360
4361 Token Tok;
4362 SourceLocation UseLoc = ModuleTok.getLocation();
4363 SmallVector<Token, 4> DirToks{ModuleTok};
4364 SmallVector<IdentifierLoc, 2> Path, Partition;
4365 LexUnexpandedToken(Result&: Tok);
4366
4367 switch (Tok.getKind()) {
4368 // Global Module Fragment.
4369 case tok::semi:
4370 DirToks.push_back(Elt: Tok);
4371 break;
4372 case tok::colon:
4373 DirToks.push_back(Elt: Tok);
4374 LexUnexpandedToken(Result&: Tok);
4375 if (Tok.isNot(K: tok::kw_private)) {
4376 if (Tok.isNot(K: tok::eod))
4377 CheckEndOfDirective(DirType: ModuleTok.getIdentifierInfo()->getName(),
4378 /*EnableMacros=*/false, ExtraToks: &DirToks);
4379 EnterModuleSuffixTokenStream(Toks: DirToks);
4380 return;
4381 }
4382 DirToks.push_back(Elt: Tok);
4383 break;
4384 case tok::identifier: {
4385 bool LeadingSpace = Tok.hasLeadingSpace();
4386 unsigned NumToksInDirective = DirToks.size();
4387
4388 // C++ [cpp.module]p3: Any preprocessing tokens after the module
4389 // preprocessing token in the module directive are processed just as in
4390 // normal text.
4391 //
4392 // P3034R1 Module Declarations Shouldn’t be Macros.
4393 if (LexModuleNameContinue(Tok, UseLoc, Suffix&: DirToks, Path,
4394 /*AllowMacroExpansion=*/false)) {
4395 if (Tok.isNot(K: tok::eod))
4396 CheckEndOfDirective(DirType: ModuleTok.getIdentifierInfo()->getName(),
4397 /*EnableMacros=*/false, ExtraToks: &DirToks);
4398 EnterModuleSuffixTokenStream(Toks: DirToks);
4399 return;
4400 }
4401
4402 ModuleNameLoc *NameLoc = ModuleNameLoc::Create(PP&: *this, Path);
4403 DirToks.resize(N: NumToksInDirective);
4404 DirToks.emplace_back();
4405 DirToks.back().setKind(tok::annot_module_name);
4406 DirToks.back().setAnnotationRange(NameLoc->getRange());
4407 DirToks.back().setAnnotationValue(static_cast<void *>(NameLoc));
4408 DirToks.back().setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
4409 DirToks.push_back(Elt: Tok);
4410
4411 // C++20 [cpp.module]p
4412 // The pp-tokens, if any, of a pp-module shall be of the form:
4413 // pp-module-name pp-module-partition[opt] pp-tokens[opt]
4414 if (Tok.is(K: tok::colon)) {
4415 NumToksInDirective = DirToks.size();
4416 LexUnexpandedToken(Result&: Tok);
4417 LeadingSpace = Tok.hasLeadingSpace();
4418 if (LexModuleNameContinue(Tok, UseLoc, Suffix&: DirToks, Path&: Partition,
4419 /*AllowMacroExpansion=*/false,
4420 /*IsPartition=*/true)) {
4421 if (Tok.isNot(K: tok::eod))
4422 CheckEndOfDirective(DirType: ModuleTok.getIdentifierInfo()->getName(),
4423 /*EnableMacros=*/false, ExtraToks: &DirToks);
4424 EnterModuleSuffixTokenStream(Toks: DirToks);
4425 return;
4426 }
4427
4428 ModuleNameLoc *PartitionLoc = ModuleNameLoc::Create(PP&: *this, Path: Partition);
4429 DirToks.resize(N: NumToksInDirective);
4430 DirToks.emplace_back();
4431 DirToks.back().setKind(tok::annot_module_name);
4432 DirToks.back().setAnnotationRange(NameLoc->getRange());
4433 DirToks.back().setAnnotationValue(static_cast<void *>(PartitionLoc));
4434 DirToks.back().setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
4435 DirToks.push_back(Elt: Tok);
4436 }
4437
4438 // If the current token is a macro definition, put it back to token stream
4439 // and expand any macros in it later.
4440 //
4441 // export module M ATTR(some_attr); // -D'ATTR(x)=[[x]]'
4442 //
4443 // Current token is `ATTR`.
4444 if (Tok.is(K: tok::identifier) &&
4445 getMacroDefinition(II: Tok.getIdentifierInfo())) {
4446 std::unique_ptr<Token[]> TokCopy = std::make_unique<Token[]>(num: 1);
4447 TokCopy[0] = Tok;
4448 EnterTokenStream(Toks: std::move(TokCopy), /*NumToks=*/1,
4449 /*DisableMacroExpansion=*/false, /*IsReinject=*/false);
4450 Lex(Result&: Tok);
4451 DirToks.back() = Tok;
4452 }
4453 break;
4454 }
4455 default:
4456 DirToks.push_back(Elt: Tok);
4457 break;
4458 }
4459
4460 // Consume the pp-import-suffix and expand any macros in it now, if we're not
4461 // at the semicolon already.
4462 SourceLocation End = DirToks.back().getLocation();
4463 std::optional<Token> NextPPTok = DirToks.back();
4464 if (DirToks.back().is(K: tok::eod)) {
4465 NextPPTok = peekNextPPToken();
4466 if (NextPPTok && NextPPTok->is(K: tok::raw_identifier))
4467 LookUpIdentifierInfo(Identifier&: *NextPPTok);
4468 }
4469
4470 // Only ';' and '[' are allowed after module name.
4471 // We also check 'private' because the previous is not a module name.
4472 if (!NextPPTok->isOneOf(Ks: tok::semi, Ks: tok::eod, Ks: tok::l_square, Ks: tok::kw_private))
4473 Diag(Tok: *NextPPTok, DiagID: diag::err_pp_unexpected_tok_after_module_name)
4474 << getSpelling(Tok: *NextPPTok);
4475
4476 if (!DirToks.back().isOneOf(Ks: tok::semi, Ks: tok::eod)) {
4477 // Consume the pp-import-suffix and expand any macros in it now. We'll add
4478 // it back into the token stream later.
4479 CollectPPImportSuffix(Toks&: DirToks);
4480 End = DirToks.back().getLocation();
4481 }
4482
4483 if (DirToks.back().isNot(K: tok::eod))
4484 End = CheckEndOfDirective(DirType: ModuleTok.getIdentifierInfo()->getName(),
4485 /*EnableMacros=*/false, ExtraToks: &DirToks);
4486 else
4487 End = DirToks.pop_back_val().getLocation();
4488
4489 if (!IncludeMacroStack.empty()) {
4490 Diag(Loc: StartLoc, DiagID: diag::err_pp_module_decl_in_header)
4491 << SourceRange(StartLoc, End);
4492 }
4493
4494 if (CurPPLexer->getConditionalStackDepth() != 0) {
4495 Diag(Loc: StartLoc, DiagID: diag::err_pp_cond_span_module_decl)
4496 << SourceRange(StartLoc, End);
4497 }
4498 EnterModuleSuffixTokenStream(Toks: DirToks);
4499}
4500