1//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implements # directive processing for the Preprocessor.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/AttributeCommonInfo.h"
15#include "clang/Basic/Attributes.h"
16#include "clang/Basic/CharInfo.h"
17#include "clang/Basic/DirectoryEntry.h"
18#include "clang/Basic/FileManager.h"
19#include "clang/Basic/IdentifierTable.h"
20#include "clang/Basic/LangOptions.h"
21#include "clang/Basic/Module.h"
22#include "clang/Basic/SourceLocation.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Basic/TargetInfo.h"
25#include "clang/Basic/TokenKinds.h"
26#include "clang/Lex/CodeCompletionHandler.h"
27#include "clang/Lex/HeaderSearch.h"
28#include "clang/Lex/LexDiagnostic.h"
29#include "clang/Lex/LiteralSupport.h"
30#include "clang/Lex/MacroInfo.h"
31#include "clang/Lex/ModuleLoader.h"
32#include "clang/Lex/ModuleMap.h"
33#include "clang/Lex/PPCallbacks.h"
34#include "clang/Lex/Pragma.h"
35#include "clang/Lex/Preprocessor.h"
36#include "clang/Lex/PreprocessorOptions.h"
37#include "clang/Lex/Token.h"
38#include "clang/Lex/VariadicMacroSupport.h"
39#include "llvm/ADT/ArrayRef.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/ScopeExit.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringExtras.h"
44#include "llvm/ADT/StringRef.h"
45#include "llvm/ADT/StringSwitch.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/Path.h"
48#include "llvm/Support/SaveAndRestore.h"
49#include <algorithm>
50#include <cassert>
51#include <cstddef>
52#include <cstring>
53#include <optional>
54#include <string>
55#include <utility>
56
57using namespace clang;
58
59//===----------------------------------------------------------------------===//
60// Utility Methods for Preprocessor Directive Handling.
61//===----------------------------------------------------------------------===//
62
63MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
64 static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
65 return new (BP) MacroInfo(L);
66}
67
68DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
69 SourceLocation Loc) {
70 return new (BP) DefMacroDirective(MI, Loc);
71}
72
73UndefMacroDirective *
74Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
75 return new (BP) UndefMacroDirective(UndefLoc);
76}
77
78VisibilityMacroDirective *
79Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
80 bool isPublic) {
81 return new (BP) VisibilityMacroDirective(Loc, isPublic);
82}
83
84/// Read and discard all tokens remaining on the current line until
85/// the tok::eod token is found.
86SourceRange Preprocessor::DiscardUntilEndOfDirective(
87 Token &Tmp, SmallVectorImpl<Token> *DiscardedToks) {
88 SourceRange Res;
89 auto ReadNextTok = [&]() {
90 LexUnexpandedToken(Result&: Tmp);
91 if (DiscardedToks && Tmp.isNot(K: tok::eod))
92 DiscardedToks->push_back(Elt: Tmp);
93 };
94 ReadNextTok();
95 Res.setBegin(Tmp.getLocation());
96 while (Tmp.isNot(K: tok::eod)) {
97 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
98 ReadNextTok();
99 }
100 Res.setEnd(Tmp.getLocation());
101 return Res;
102}
103
104/// Enumerates possible cases of #define/#undef a reserved identifier.
105enum MacroDiag {
106 MD_NoWarn, //> Not a reserved identifier
107 MD_KeywordDef, //> Macro hides keyword, enabled by default
108 MD_ReservedMacro, //> #define of #undef reserved id, disabled by default
109 MD_ReservedAttributeIdentifier
110};
111
112/// Enumerates possible %select values for the pp_err_elif_after_else and
113/// pp_err_elif_without_if diagnostics.
114enum PPElifDiag {
115 PED_Elif,
116 PED_Elifdef,
117 PED_Elifndef
118};
119
120static bool isFeatureTestMacro(StringRef MacroName) {
121 // list from:
122 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
123 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
124 // * man 7 feature_test_macros
125 // The list must be sorted for correct binary search.
126 static constexpr StringRef ReservedMacro[] = {
127 "_ATFILE_SOURCE",
128 "_BSD_SOURCE",
129 "_CRT_NONSTDC_NO_WARNINGS",
130 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
131 "_CRT_SECURE_NO_WARNINGS",
132 "_FILE_OFFSET_BITS",
133 "_FORTIFY_SOURCE",
134 "_GLIBCXX_ASSERTIONS",
135 "_GLIBCXX_CONCEPT_CHECKS",
136 "_GLIBCXX_DEBUG",
137 "_GLIBCXX_DEBUG_PEDANTIC",
138 "_GLIBCXX_PARALLEL",
139 "_GLIBCXX_PARALLEL_ASSERTIONS",
140 "_GLIBCXX_SANITIZE_VECTOR",
141 "_GLIBCXX_USE_CXX11_ABI",
142 "_GLIBCXX_USE_DEPRECATED",
143 "_GNU_SOURCE",
144 "_ISOC11_SOURCE",
145 "_ISOC95_SOURCE",
146 "_ISOC99_SOURCE",
147 "_LARGEFILE64_SOURCE",
148 "_POSIX_C_SOURCE",
149 "_REENTRANT",
150 "_SVID_SOURCE",
151 "_THREAD_SAFE",
152 "_XOPEN_SOURCE",
153 "_XOPEN_SOURCE_EXTENDED",
154 "__STDCPP_WANT_MATH_SPEC_FUNCS__",
155 "__STDC_FORMAT_MACROS",
156 };
157 return llvm::binary_search(Range: ReservedMacro, Value&: MacroName);
158}
159
160static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
161 const MacroInfo *MI,
162 const StringRef MacroName) {
163 // If this is a macro with special handling (like __LINE__) then it's language
164 // defined.
165 if (MI->isBuiltinMacro())
166 return true;
167 // Builtin macros are defined in the builtin file
168 if (!SourceMgr.isWrittenInBuiltinFile(Loc: MI->getDefinitionLoc()))
169 return false;
170 // C defines macros starting with __STDC, and C++ defines macros starting with
171 // __STDCPP
172 if (MacroName.starts_with(Prefix: "__STDC"))
173 return true;
174 // C++ defines the __cplusplus macro
175 if (MacroName == "__cplusplus")
176 return true;
177 // C++ defines various feature-test macros starting with __cpp
178 if (MacroName.starts_with(Prefix: "__cpp"))
179 return true;
180 // Anything else isn't language-defined
181 return false;
182}
183
184static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
185 const LangOptions &Lang = PP.getLangOpts();
186 if (Lang.CPlusPlus &&
187 hasAttribute(Syntax: AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, Attr: II,
188 Target: PP.getTargetInfo(), LangOpts: Lang, /*CheckPlugins*/ false) > 0) {
189 AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
190 AttributeCommonInfo::getCXX11AttrArgsInfo(Name: II);
191 if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
192 return PP.isNextPPTokenOneOf(Ks: tok::l_paren);
193
194 return !PP.isNextPPTokenOneOf(Ks: tok::l_paren) ||
195 AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
196 }
197 return false;
198}
199
200static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
201 const LangOptions &Lang = PP.getLangOpts();
202 StringRef Text = II->getName();
203 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
204 return isFeatureTestMacro(MacroName: Text) ? MD_NoWarn : MD_ReservedMacro;
205 if (II->isKeyword(LangOpts: Lang))
206 return MD_KeywordDef;
207 if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
208 return MD_KeywordDef;
209 if (isReservedCXXAttributeName(PP, II))
210 return MD_ReservedAttributeIdentifier;
211 return MD_NoWarn;
212}
213
214static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
215 const LangOptions &Lang = PP.getLangOpts();
216 // Do not warn on keyword undef. It is generally harmless and widely used.
217 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
218 return MD_ReservedMacro;
219 if (isReservedCXXAttributeName(PP, II))
220 return MD_ReservedAttributeIdentifier;
221 return MD_NoWarn;
222}
223
224// Return true if we want to issue a diagnostic by default if we
225// encounter this name in a #include with the wrong case. For now,
226// this includes the standard C and C++ headers, Posix headers,
227// and Boost headers. Improper case for these #includes is a
228// potential portability issue.
229static bool warnByDefaultOnWrongCase(StringRef Include) {
230 // If the first component of the path is "boost", treat this like a standard header
231 // for the purposes of diagnostics.
232 if (::llvm::sys::path::begin(path: Include)->equals_insensitive(RHS: "boost"))
233 return true;
234
235 // "condition_variable" is the longest standard header name at 18 characters.
236 // If the include file name is longer than that, it can't be a standard header.
237 static const size_t MaxStdHeaderNameLen = 18u;
238 if (Include.size() > MaxStdHeaderNameLen)
239 return false;
240
241 // Lowercase and normalize the search string.
242 SmallString<32> LowerInclude{Include};
243 for (char &Ch : LowerInclude) {
244 // In the ASCII range?
245 if (static_cast<unsigned char>(Ch) > 0x7f)
246 return false; // Can't be a standard header
247 // ASCII lowercase:
248 if (Ch >= 'A' && Ch <= 'Z')
249 Ch += 'a' - 'A';
250 // Normalize path separators for comparison purposes.
251 else if (::llvm::sys::path::is_separator(value: Ch))
252 Ch = '/';
253 }
254
255 // The standard C/C++ and Posix headers
256 return llvm::StringSwitch<bool>(LowerInclude)
257 // C library headers
258 .Cases(CaseStrings: {"assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h"}, Value: true)
259 .Cases(CaseStrings: {"float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h"},
260 Value: true)
261 .Cases(CaseStrings: {"math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h"}, Value: true)
262 .Cases(CaseStrings: {"stdatomic.h", "stdbool.h", "stdckdint.h", "stdcountof.h"}, Value: true)
263 .Cases(CaseStrings: {"stddef.h", "stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h"},
264 Value: true)
265 .Cases(CaseStrings: {"string.h", "tgmath.h", "threads.h", "time.h", "uchar.h"}, Value: true)
266 .Cases(CaseStrings: {"wchar.h", "wctype.h"}, Value: true)
267
268 // C++ headers for C library facilities
269 .Cases(CaseStrings: {"cassert", "ccomplex", "cctype", "cerrno", "cfenv"}, Value: true)
270 .Cases(CaseStrings: {"cfloat", "cinttypes", "ciso646", "climits", "clocale"}, Value: true)
271 .Cases(CaseStrings: {"cmath", "csetjmp", "csignal", "cstdalign", "cstdarg"}, Value: true)
272 .Cases(CaseStrings: {"cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib"}, Value: true)
273 .Cases(CaseStrings: {"cstring", "ctgmath", "ctime", "cuchar", "cwchar"}, Value: true)
274 .Case(S: "cwctype", Value: true)
275
276 // C++ library headers
277 .Cases(CaseStrings: {"algorithm", "fstream", "list", "regex", "thread"}, Value: true)
278 .Cases(CaseStrings: {"array", "functional", "locale", "scoped_allocator", "tuple"},
279 Value: true)
280 .Cases(CaseStrings: {"atomic", "future", "map", "set", "type_traits"}, Value: true)
281 .Cases(
282 CaseStrings: {"bitset", "initializer_list", "memory", "shared_mutex", "typeindex"},
283 Value: true)
284 .Cases(CaseStrings: {"chrono", "iomanip", "mutex", "sstream", "typeinfo"}, Value: true)
285 .Cases(CaseStrings: {"codecvt", "ios", "new", "stack", "unordered_map"}, Value: true)
286 .Cases(CaseStrings: {"complex", "iosfwd", "numeric", "stdexcept", "unordered_set"},
287 Value: true)
288 .Cases(
289 CaseStrings: {"condition_variable", "iostream", "ostream", "streambuf", "utility"},
290 Value: true)
291 .Cases(CaseStrings: {"deque", "istream", "queue", "string", "valarray"}, Value: true)
292 .Cases(CaseStrings: {"exception", "iterator", "random", "strstream", "vector"}, Value: true)
293 .Cases(CaseStrings: {"forward_list", "limits", "ratio", "system_error"}, Value: true)
294
295 // POSIX headers (which aren't also C headers)
296 .Cases(CaseStrings: {"aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h"}, Value: true)
297 .Cases(CaseStrings: {"fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h"}, Value: true)
298 .Cases(CaseStrings: {"grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h"}, Value: true)
299 .Cases(CaseStrings: {"mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h"},
300 Value: true)
301 .Cases(CaseStrings: {"netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h"},
302 Value: true)
303 .Cases(CaseStrings: {"regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h"}, Value: true)
304 .Cases(CaseStrings: {"strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h"},
305 Value: true)
306 .Cases(CaseStrings: {"sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h",
307 "sys/socket.h"},
308 Value: true)
309 .Cases(CaseStrings: {"sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h",
310 "sys/types.h"},
311 Value: true)
312 .Cases(
313 CaseStrings: {"sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h"},
314 Value: true)
315 .Cases(CaseStrings: {"tar.h", "termios.h", "trace.h", "ulimit.h"}, Value: true)
316 .Cases(CaseStrings: {"unistd.h", "utime.h", "utmpx.h", "wordexp.h"}, Value: true)
317 .Default(Value: false);
318}
319
320/// Find a similar string in `Candidates`.
321///
322/// \param LHS a string for a similar string in `Candidates`
323///
324/// \param Candidates the candidates to find a similar string.
325///
326/// \returns a similar string if exists. If no similar string exists,
327/// returns std::nullopt.
328static std::optional<StringRef>
329findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
330 // We need to check if `Candidates` has the exact case-insensitive string
331 // because the Levenshtein distance match does not care about it.
332 for (StringRef C : Candidates) {
333 if (LHS.equals_insensitive(RHS: C)) {
334 return C;
335 }
336 }
337
338 // Keep going with the Levenshtein distance match.
339 // If the LHS size is less than 3, use the LHS size minus 1 and if not,
340 // use the LHS size divided by 3.
341 size_t Length = LHS.size();
342 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
343
344 std::optional<std::pair<StringRef, size_t>> SimilarStr;
345 for (StringRef C : Candidates) {
346 size_t CurDist = LHS.edit_distance(Other: C, AllowReplacements: true);
347 if (CurDist <= MaxDist) {
348 if (!SimilarStr) {
349 // The first similar string found.
350 SimilarStr = {C, CurDist};
351 } else if (CurDist < SimilarStr->second) {
352 // More similar string found.
353 SimilarStr = {C, CurDist};
354 }
355 }
356 }
357
358 if (SimilarStr) {
359 return SimilarStr->first;
360 } else {
361 return std::nullopt;
362 }
363}
364
365bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
366 bool *ShadowFlag) {
367 // Missing macro name?
368 if (MacroNameTok.is(K: tok::eod))
369 return Diag(Tok: MacroNameTok, DiagID: diag::err_pp_missing_macro_name);
370
371 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
372 if (!II)
373 return Diag(Tok: MacroNameTok, DiagID: diag::err_pp_macro_not_identifier);
374
375 if (II->isCPlusPlusOperatorKeyword()) {
376 // C++ 2.5p2: Alternative tokens behave the same as its primary token
377 // except for their spellings.
378 Diag(Tok: MacroNameTok, DiagID: getLangOpts().MicrosoftExt
379 ? diag::ext_pp_operator_used_as_macro_name
380 : diag::err_pp_operator_used_as_macro_name)
381 << II << MacroNameTok.getKind();
382 // Allow #defining |and| and friends for Microsoft compatibility or
383 // recovery when legacy C headers are included in C++.
384 }
385
386 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
387 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
388 return Diag(Tok: MacroNameTok, DiagID: diag::err_defined_macro_name);
389 }
390
391 // If defining/undefining reserved identifier or a keyword, we need to issue
392 // a warning.
393 SourceLocation MacroNameLoc = MacroNameTok.getLocation();
394 if (ShadowFlag)
395 *ShadowFlag = false;
396 // Macro names with reserved identifiers are accepted if built-in or passed
397 // through the command line (the later may be present if -dD was used to
398 // generate the preprocessed file).
399 // NB: isInPredefinedFile() is relatively expensive, so keep it at the end
400 // of the condition.
401 if (!SourceMgr.isInSystemHeader(Loc: MacroNameLoc) &&
402 !SourceMgr.isInPredefinedFile(Loc: MacroNameLoc)) {
403 MacroDiag D = MD_NoWarn;
404 if (isDefineUndef == MU_Define) {
405 D = shouldWarnOnMacroDef(PP&: *this, II);
406 }
407 else if (isDefineUndef == MU_Undef)
408 D = shouldWarnOnMacroUndef(PP&: *this, II);
409 if (D == MD_KeywordDef) {
410 // We do not want to warn on some patterns widely used in configuration
411 // scripts. This requires analyzing next tokens, so do not issue warnings
412 // now, only inform caller.
413 if (ShadowFlag)
414 *ShadowFlag = true;
415 }
416 if (D == MD_ReservedMacro)
417 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_is_reserved_id);
418 if (D == MD_ReservedAttributeIdentifier)
419 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_is_reserved_attribute_id)
420 << II->getName();
421 }
422
423 // Okay, we got a good identifier.
424 return false;
425}
426
427/// Lex and validate a macro name, which occurs after a
428/// \#define or \#undef.
429///
430/// This sets the token kind to eod and discards the rest of the macro line if
431/// the macro name is invalid.
432///
433/// \param MacroNameTok Token that is expected to be a macro name.
434/// \param isDefineUndef Context in which macro is used.
435/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
436void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
437 bool *ShadowFlag) {
438 // Read the token, don't allow macro expansion on it.
439 LexUnexpandedToken(Result&: MacroNameTok);
440
441 if (MacroNameTok.is(K: tok::code_completion)) {
442 if (CodeComplete)
443 CodeComplete->CodeCompleteMacroName(IsDefinition: isDefineUndef == MU_Define);
444 setCodeCompletionReached();
445 LexUnexpandedToken(Result&: MacroNameTok);
446 }
447
448 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
449 return;
450
451 // Invalid macro name, read and discard the rest of the line and set the
452 // token kind to tok::eod if necessary.
453 if (MacroNameTok.isNot(K: tok::eod)) {
454 MacroNameTok.setKind(tok::eod);
455 DiscardUntilEndOfDirective();
456 }
457}
458
459/// Ensure that the next token is a tok::eod token.
460///
461/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
462/// true, then we consider macros that expand to zero tokens as being ok.
463///
464/// Returns the location of the end of the directive.
465SourceLocation
466Preprocessor::CheckEndOfDirective(StringRef DirType, bool EnableMacros,
467 SmallVectorImpl<Token> *ExtraToks) {
468 Token Tmp;
469 // Avoid use-of-uninitialized-memory for edge case(s) where there is no extra
470 // token to be parsed.
471 Tmp.startToken();
472 auto ReadNextTok = [this, ExtraToks, &Tmp](auto &&LexFn) {
473 std::invoke(LexFn, this, Tmp);
474 if (ExtraToks && Tmp.isNot(K: tok::eod))
475 ExtraToks->push_back(Elt: Tmp);
476 };
477 // Lex unexpanded tokens for most directives: macros might expand to zero
478 // tokens, causing us to miss diagnosing invalid lines. Some directives (like
479 // #line) allow empty macros.
480 if (EnableMacros)
481 ReadNextTok(&Preprocessor::Lex);
482 else
483 ReadNextTok(&Preprocessor::LexUnexpandedToken);
484
485 // There should be no tokens after the directive, but we allow them as an
486 // extension.
487 while (Tmp.is(K: tok::comment)) // Skip comments in -C mode.
488 ReadNextTok(&Preprocessor::LexUnexpandedToken);
489
490 if (Tmp.is(K: tok::eod))
491 return Tmp.getLocation();
492
493 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
494 // or if this is a macro-style preprocessing directive, because it is more
495 // trouble than it is worth to insert /**/ and check that there is no /**/
496 // in the range also.
497 FixItHint Hint;
498 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
499 !CurTokenLexer)
500 Hint = FixItHint::CreateInsertion(InsertionLoc: Tmp.getLocation(),Code: "//");
501
502 unsigned DiagID = diag::ext_pp_extra_tokens_at_eol;
503 // C++20 import or module directive has no '#' prefix.
504 if (getLangOpts().CPlusPlusModules &&
505 (DirType == "import" || DirType == "module"))
506 DiagID = diag::warn_pp_extra_tokens_at_module_directive_eol;
507
508 Diag(Tok: Tmp, DiagID) << DirType << Hint;
509 return DiscardUntilEndOfDirective(DiscardedToks: ExtraToks).getEnd();
510}
511
512void Preprocessor::SuggestTypoedDirective(const Token &Tok,
513 StringRef Directive) const {
514 // If this is a `.S` file, treat unknown # directives as non-preprocessor
515 // directives.
516 if (getLangOpts().AsmPreprocessor) return;
517
518 std::vector<StringRef> Candidates = {
519 "if", "ifdef", "ifndef", "elif", "else", "endif"
520 };
521 if (LangOpts.C23 || LangOpts.CPlusPlus23)
522 Candidates.insert(position: Candidates.end(), l: {"elifdef", "elifndef"});
523
524 if (std::optional<StringRef> Sugg = findSimilarStr(LHS: Directive, Candidates)) {
525 // Directive cannot be coming from macro.
526 assert(Tok.getLocation().isFileID());
527 CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
528 B: Tok.getLocation(),
529 E: Tok.getLocation().getLocWithOffset(Offset: Directive.size()));
530 StringRef SuggValue = *Sugg;
531
532 auto Hint = FixItHint::CreateReplacement(RemoveRange: DirectiveRange, Code: SuggValue);
533 Diag(Tok, DiagID: diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
534 }
535}
536
537/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
538/// decided that the subsequent tokens are in the \#if'd out portion of the
539/// file. Lex the rest of the file, until we see an \#endif. If
540/// FoundNonSkipPortion is true, then we have already emitted code for part of
541/// this \#if directive, so \#else/\#elif blocks should never be entered.
542/// If ElseOk is true, then \#else directives are ok, if not, then we have
543/// already seen one so a \#else directive is a duplicate. When this returns,
544/// the caller can lex the first valid token.
545void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
546 SourceLocation IfTokenLoc,
547 bool FoundNonSkipPortion,
548 bool FoundElse,
549 SourceLocation ElseLoc) {
550 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
551 // not getting called recursively by storing the RecordedSkippedRanges
552 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
553 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
554 // invalidated. If this changes and there is a need to call
555 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
556 // change to do a second lookup in endLexPass function instead of reusing the
557 // lookup pointer.
558 assert(!SkippingExcludedConditionalBlock &&
559 "calling SkipExcludedConditionalBlock recursively");
560 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
561
562 ++NumSkipped;
563 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
564 assert(CurPPLexer && "Conditional PP block must be in a file!");
565 assert(CurLexer && "Conditional PP block but no current lexer set!");
566
567 if (PreambleConditionalStack.reachedEOFWhileSkipping())
568 PreambleConditionalStack.clearSkipInfo();
569 else
570 CurPPLexer->pushConditionalLevel(DirectiveStart: IfTokenLoc, /*isSkipping*/ WasSkipping: false,
571 FoundNonSkip: FoundNonSkipPortion, FoundElse);
572
573 // Enter raw mode to disable identifier lookup (and thus macro expansion),
574 // disabling warnings, etc.
575 CurPPLexer->LexingRawMode = true;
576 Token Tok;
577 SourceLocation endLoc;
578
579 /// Keeps track and caches skipped ranges and also retrieves a prior skipped
580 /// range if the same block is re-visited.
581 struct SkippingRangeStateTy {
582 Preprocessor &PP;
583
584 const char *BeginPtr = nullptr;
585 unsigned *SkipRangePtr = nullptr;
586
587 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
588
589 void beginLexPass() {
590 if (BeginPtr)
591 return; // continue skipping a block.
592
593 // Initiate a skipping block and adjust the lexer if we already skipped it
594 // before.
595 BeginPtr = PP.CurLexer->getBufferLocation();
596 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
597 if (*SkipRangePtr) {
598 PP.CurLexer->seek(Offset: PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
599 /*IsAtStartOfLine*/ true);
600 }
601 }
602
603 void endLexPass(const char *Hashptr) {
604 if (!BeginPtr) {
605 // Not doing normal lexing.
606 assert(PP.CurLexer->isDependencyDirectivesLexer());
607 return;
608 }
609
610 // Finished skipping a block, record the range if it's first time visited.
611 if (!*SkipRangePtr) {
612 *SkipRangePtr = Hashptr - BeginPtr;
613 }
614 assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
615 BeginPtr = nullptr;
616 SkipRangePtr = nullptr;
617 }
618 } SkippingRangeState(*this);
619
620 while (true) {
621 if (CurLexer->isDependencyDirectivesLexer()) {
622 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Result&: Tok);
623 } else {
624 SkippingRangeState.beginLexPass();
625 while (true) {
626 CurLexer->Lex(Result&: Tok);
627
628 if (Tok.is(K: tok::code_completion)) {
629 setCodeCompletionReached();
630 if (CodeComplete)
631 CodeComplete->CodeCompleteInConditionalExclusion();
632 continue;
633 }
634
635 // There is actually no "skipped block" in the above because the module
636 // directive is not a text-line (https://wg21.link/cpp.pre#2) nor
637 // anything else that is allowed in a group
638 // (https://eel.is/c++draft/cpp.pre#nt:group-part).
639 //
640 // A preprocessor diagnostic (effective with -E) that triggers whenever
641 // a module directive is encountered where a control-line or a text-line
642 // is required.
643 if (getLangOpts().CPlusPlusModules && Tok.isAtStartOfLine() &&
644 Tok.is(K: tok::raw_identifier) &&
645 (Tok.getRawIdentifier() == "export" ||
646 Tok.getRawIdentifier() == "module")) {
647 llvm::SaveAndRestore ModuleDirectiveSkipping(LastExportKeyword);
648 LastExportKeyword.startToken();
649 LookUpIdentifierInfo(Identifier&: Tok);
650 IdentifierInfo *II = Tok.getIdentifierInfo();
651
652 if (II->getName()[0] == 'e') { // export
653 HandleModuleContextualKeyword(Result&: Tok);
654 CurLexer->Lex(Result&: Tok);
655 if (Tok.is(K: tok::raw_identifier)) {
656 LookUpIdentifierInfo(Identifier&: Tok);
657 II = Tok.getIdentifierInfo();
658 }
659 }
660
661 if (II->getName()[0] == 'm') { // module
662 // HandleModuleContextualKeyword changes the lexer state, so we need
663 // to save RawLexingMode
664 llvm::SaveAndRestore RestoreLexingRawMode(CurPPLexer->LexingRawMode,
665 false);
666 if (HandleModuleContextualKeyword(Result&: Tok)) {
667 // We just parsed a # character at the start of a line, so we're
668 // in directive mode. Tell the lexer this so any newlines we see
669 // will be converted into an EOD token (this terminates the
670 // macro).
671 CurPPLexer->ParsingPreprocessorDirective = true;
672 SourceLocation StartLoc = Tok.getLocation();
673 SourceLocation End = DiscardUntilEndOfDirective().getEnd();
674 Diag(Loc: StartLoc, DiagID: diag::err_pp_cond_span_module_decl)
675 << SourceRange(StartLoc, End);
676 CurPPLexer->ParsingPreprocessorDirective = false;
677 // Restore comment saving mode.
678 if (CurLexer)
679 CurLexer->resetExtendedTokenMode();
680 continue;
681 }
682 }
683 }
684
685 // If this is the end of the buffer, we have an error.
686 if (Tok.is(K: tok::eof)) {
687 // We don't emit errors for unterminated conditionals here,
688 // Lexer::LexEndOfFile can do that properly.
689 // Just return and let the caller lex after this #include.
690 if (PreambleConditionalStack.isRecording())
691 PreambleConditionalStack.SkipInfo.emplace(args&: HashTokenLoc, args&: IfTokenLoc,
692 args&: FoundNonSkipPortion,
693 args&: FoundElse, args&: ElseLoc);
694 break;
695 }
696
697 // If this token is not a preprocessor directive, just skip it.
698 if (Tok.isNot(K: tok::hash) || !Tok.isAtStartOfLine())
699 continue;
700
701 break;
702 }
703 }
704 if (Tok.is(K: tok::eof))
705 break;
706
707 // We just parsed a # character at the start of a line, so we're in
708 // directive mode. Tell the lexer this so any newlines we see will be
709 // converted into an EOD token (this terminates the macro).
710 CurPPLexer->ParsingPreprocessorDirective = true;
711 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
712
713 assert(Tok.is(tok::hash));
714 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
715 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
716
717 // Read the next token, the directive flavor.
718 LexUnexpandedToken(Result&: Tok);
719
720 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
721 // something bogus), skip it.
722 if (Tok.isNot(K: tok::raw_identifier)) {
723 CurPPLexer->ParsingPreprocessorDirective = false;
724 // Restore comment saving mode.
725 if (CurLexer) CurLexer->resetExtendedTokenMode();
726 continue;
727 }
728
729 // If the first letter isn't i or e, it isn't intesting to us. We know that
730 // this is safe in the face of spelling differences, because there is no way
731 // to spell an i/e in a strange way that is another letter. Skipping this
732 // allows us to avoid looking up the identifier info for #define/#undef and
733 // other common directives.
734 StringRef RI = Tok.getRawIdentifier();
735
736 char FirstChar = RI[0];
737 if (FirstChar >= 'a' && FirstChar <= 'z' &&
738 FirstChar != 'i' && FirstChar != 'e') {
739 CurPPLexer->ParsingPreprocessorDirective = false;
740 // Restore comment saving mode.
741 if (CurLexer) CurLexer->resetExtendedTokenMode();
742 continue;
743 }
744
745 // Get the identifier name without trigraphs or embedded newlines. Note
746 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
747 // when skipping.
748 char DirectiveBuf[20];
749 StringRef Directive;
750 if (!Tok.needsCleaning() && RI.size() < 20) {
751 Directive = RI;
752 } else {
753 std::string DirectiveStr = getSpelling(Tok);
754 size_t IdLen = DirectiveStr.size();
755 if (IdLen >= 20) {
756 CurPPLexer->ParsingPreprocessorDirective = false;
757 // Restore comment saving mode.
758 if (CurLexer) CurLexer->resetExtendedTokenMode();
759 continue;
760 }
761 memcpy(dest: DirectiveBuf, src: &DirectiveStr[0], n: IdLen);
762 Directive = StringRef(DirectiveBuf, IdLen);
763 }
764
765 if (Directive.starts_with(Prefix: "if")) {
766 StringRef Sub = Directive.substr(Start: 2);
767 if (Sub.empty() || // "if"
768 Sub == "def" || // "ifdef"
769 Sub == "ndef") { // "ifndef"
770 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
771 // bother parsing the condition.
772 DiscardUntilEndOfDirective();
773 CurPPLexer->pushConditionalLevel(DirectiveStart: Tok.getLocation(), /*wasskipping*/WasSkipping: true,
774 /*foundnonskip*/FoundNonSkip: false,
775 /*foundelse*/FoundElse: false);
776 } else {
777 SuggestTypoedDirective(Tok, Directive);
778 }
779 } else if (Directive[0] == 'e') {
780 StringRef Sub = Directive.substr(Start: 1);
781 if (Sub == "ndif") { // "endif"
782 PPConditionalInfo CondInfo;
783 CondInfo.WasSkipping = true; // Silence bogus warning.
784 bool InCond = CurPPLexer->popConditionalLevel(CI&: CondInfo);
785 (void)InCond; // Silence warning in no-asserts mode.
786 assert(!InCond && "Can't be skipping if not in a conditional!");
787
788 // If we popped the outermost skipping block, we're done skipping!
789 if (!CondInfo.WasSkipping) {
790 SkippingRangeState.endLexPass(Hashptr);
791 // Restore the value of LexingRawMode so that trailing comments
792 // are handled correctly, if we've reached the outermost block.
793 CurPPLexer->LexingRawMode = false;
794 endLoc = CheckEndOfDirective(DirType: "endif");
795 CurPPLexer->LexingRawMode = true;
796 if (Callbacks)
797 Callbacks->Endif(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
798 break;
799 } else {
800 DiscardUntilEndOfDirective();
801 }
802 } else if (Sub == "lse") { // "else".
803 // #else directive in a skipping conditional. If not in some other
804 // skipping conditional, and if #else hasn't already been seen, enter it
805 // as a non-skipping conditional.
806 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
807
808 if (!CondInfo.WasSkipping)
809 SkippingRangeState.endLexPass(Hashptr);
810
811 // If this is a #else with a #else before it, report the error.
812 if (CondInfo.FoundElse)
813 Diag(Tok, DiagID: diag::pp_err_else_after_else);
814
815 // Note that we've seen a #else in this conditional.
816 CondInfo.FoundElse = true;
817
818 // If the conditional is at the top level, and the #if block wasn't
819 // entered, enter the #else block now.
820 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
821 CondInfo.FoundNonSkip = true;
822 // Restore the value of LexingRawMode so that trailing comments
823 // are handled correctly.
824 CurPPLexer->LexingRawMode = false;
825 endLoc = CheckEndOfDirective(DirType: "else");
826 CurPPLexer->LexingRawMode = true;
827 if (Callbacks)
828 Callbacks->Else(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
829 break;
830 } else {
831 DiscardUntilEndOfDirective(); // C99 6.10p4.
832 }
833 } else if (Sub == "lif") { // "elif".
834 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
835
836 if (!CondInfo.WasSkipping)
837 SkippingRangeState.endLexPass(Hashptr);
838
839 // If this is a #elif with a #else before it, report the error.
840 if (CondInfo.FoundElse)
841 Diag(Tok, DiagID: diag::pp_err_elif_after_else) << PED_Elif;
842
843 // If this is in a skipping block or if we're already handled this #if
844 // block, don't bother parsing the condition.
845 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
846 // FIXME: We should probably do at least some minimal parsing of the
847 // condition to verify that it is well-formed. The current state
848 // allows #elif* directives with completely malformed (or missing)
849 // conditions.
850 DiscardUntilEndOfDirective();
851 } else {
852 // Restore the value of LexingRawMode so that identifiers are
853 // looked up, etc, inside the #elif expression.
854 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
855 CurPPLexer->LexingRawMode = false;
856 IdentifierInfo *IfNDefMacro = nullptr;
857 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
858 // Stop if Lexer became invalid after hitting code completion token.
859 if (!CurPPLexer)
860 return;
861 const bool CondValue = DER.Conditional;
862 CurPPLexer->LexingRawMode = true;
863 if (Callbacks) {
864 Callbacks->Elif(
865 Loc: Tok.getLocation(), ConditionRange: DER.ExprRange,
866 ConditionValue: (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
867 IfLoc: CondInfo.IfLoc);
868 }
869 // If this condition is true, enter it!
870 if (CondValue) {
871 CondInfo.FoundNonSkip = true;
872 break;
873 }
874 }
875 } else if (Sub == "lifdef" || // "elifdef"
876 Sub == "lifndef") { // "elifndef"
877 bool IsElifDef = Sub == "lifdef";
878 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
879 Token DirectiveToken = Tok;
880
881 if (!CondInfo.WasSkipping)
882 SkippingRangeState.endLexPass(Hashptr);
883
884 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
885 // if this branch is in a skipping block.
886 unsigned DiagID;
887 if (LangOpts.CPlusPlus)
888 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
889 : diag::ext_cxx23_pp_directive;
890 else
891 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
892 : diag::ext_c23_pp_directive;
893 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
894
895 // If this is a #elif with a #else before it, report the error.
896 if (CondInfo.FoundElse)
897 Diag(Tok, DiagID: diag::pp_err_elif_after_else)
898 << (IsElifDef ? PED_Elifdef : PED_Elifndef);
899
900 // If this is in a skipping block or if we're already handled this #if
901 // block, don't bother parsing the condition.
902 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
903 // FIXME: We should probably do at least some minimal parsing of the
904 // condition to verify that it is well-formed. The current state
905 // allows #elif* directives with completely malformed (or missing)
906 // conditions.
907 DiscardUntilEndOfDirective();
908 } else {
909 // Restore the value of LexingRawMode so that identifiers are
910 // looked up, etc, inside the #elif[n]def expression.
911 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
912 CurPPLexer->LexingRawMode = false;
913 Token MacroNameTok;
914 ReadMacroName(MacroNameTok);
915 CurPPLexer->LexingRawMode = true;
916
917 // If the macro name token is tok::eod, there was an error that was
918 // already reported.
919 if (MacroNameTok.is(K: tok::eod)) {
920 // Skip code until we get to #endif. This helps with recovery by
921 // not emitting an error when the #endif is reached.
922 continue;
923 }
924
925 emitMacroExpansionWarnings(Identifier: MacroNameTok);
926
927 CheckEndOfDirective(DirType: IsElifDef ? "elifdef" : "elifndef");
928
929 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
930 auto MD = getMacroDefinition(II: MII);
931 MacroInfo *MI = MD.getMacroInfo();
932
933 if (Callbacks) {
934 if (IsElifDef) {
935 Callbacks->Elifdef(Loc: DirectiveToken.getLocation(), MacroNameTok,
936 MD);
937 } else {
938 Callbacks->Elifndef(Loc: DirectiveToken.getLocation(), MacroNameTok,
939 MD);
940 }
941 }
942 // If this condition is true, enter it!
943 if (static_cast<bool>(MI) == IsElifDef) {
944 CondInfo.FoundNonSkip = true;
945 break;
946 }
947 }
948 } else {
949 SuggestTypoedDirective(Tok, Directive);
950 }
951 } else {
952 SuggestTypoedDirective(Tok, Directive);
953 }
954
955 CurPPLexer->ParsingPreprocessorDirective = false;
956 // Restore comment saving mode.
957 if (CurLexer) CurLexer->resetExtendedTokenMode();
958 }
959
960 // Finally, if we are out of the conditional (saw an #endif or ran off the end
961 // of the file, just stop skipping and return to lexing whatever came after
962 // the #if block.
963 CurPPLexer->LexingRawMode = false;
964
965 // The last skipped range isn't actually skipped yet if it's truncated
966 // by the end of the preamble; we'll resume parsing after the preamble.
967 if (Callbacks && (Tok.isNot(K: tok::eof) || !isRecordingPreamble()))
968 Callbacks->SourceRangeSkipped(
969 Range: SourceRange(HashTokenLoc, endLoc.isValid()
970 ? endLoc
971 : CurPPLexer->getSourceLocation()),
972 EndifLoc: Tok.getLocation());
973}
974
975Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
976 bool AllowTextual) {
977 if (!SourceMgr.isInMainFile(Loc)) {
978 // Try to determine the module of the include directive.
979 // FIXME: Look into directly passing the FileEntry from LookupFile instead.
980 FileID IDOfIncl = SourceMgr.getFileID(SpellingLoc: SourceMgr.getExpansionLoc(Loc));
981 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(FID: IDOfIncl)) {
982 // The include comes from an included file.
983 return HeaderInfo.getModuleMap()
984 .findModuleForHeader(File: *EntryOfIncl, AllowTextual)
985 .getModule();
986 }
987 }
988
989 // This is either in the main file or not in a file at all. It belongs
990 // to the current module, if there is one.
991 return getLangOpts().CurrentModule.empty()
992 ? nullptr
993 : HeaderInfo.lookupModule(ModuleName: getLangOpts().CurrentModule, ImportLoc: Loc);
994}
995
996OptionalFileEntryRef
997Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
998 SourceLocation Loc) {
999 Module *IncM = getModuleForLocation(
1000 Loc: IncLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
1001
1002 // Walk up through the include stack, looking through textual headers of M
1003 // until we hit a non-textual header that we can #include. (We assume textual
1004 // headers of a module with non-textual headers aren't meant to be used to
1005 // import entities from the module.)
1006 auto &SM = getSourceManager();
1007 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
1008 auto ID = SM.getFileID(SpellingLoc: SM.getExpansionLoc(Loc));
1009 auto FE = SM.getFileEntryRefForID(FID: ID);
1010 if (!FE)
1011 break;
1012
1013 // We want to find all possible modules that might contain this header, so
1014 // search all enclosing directories for module maps and load them.
1015 HeaderInfo.hasModuleMap(Filename: FE->getName(), /*Root*/ nullptr,
1016 IsSystem: SourceMgr.isInSystemHeader(Loc));
1017
1018 bool InPrivateHeader = false;
1019 for (auto Header : HeaderInfo.findAllModulesForHeader(File: *FE)) {
1020 if (!Header.isAccessibleFrom(M: IncM)) {
1021 // It's in a private header; we can't #include it.
1022 // FIXME: If there's a public header in some module that re-exports it,
1023 // then we could suggest including that, but it's not clear that's the
1024 // expected way to make this entity visible.
1025 InPrivateHeader = true;
1026 continue;
1027 }
1028
1029 // Don't suggest explicitly excluded headers.
1030 if (Header.getRole() == ModuleMap::ExcludedHeader)
1031 continue;
1032
1033 // We'll suggest including textual headers below if they're
1034 // include-guarded.
1035 if (Header.getRole() & ModuleMap::TextualHeader)
1036 continue;
1037
1038 // If we have a module import syntax, we shouldn't include a header to
1039 // make a particular module visible. Let the caller know they should
1040 // suggest an import instead.
1041 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
1042 return std::nullopt;
1043
1044 // If this is an accessible, non-textual header of M's top-level module
1045 // that transitively includes the given location and makes the
1046 // corresponding module visible, this is the thing to #include.
1047 return *FE;
1048 }
1049
1050 // FIXME: If we're bailing out due to a private header, we shouldn't suggest
1051 // an import either.
1052 if (InPrivateHeader)
1053 return std::nullopt;
1054
1055 // If the header is includable and has an include guard, assume the
1056 // intended way to expose its contents is by #include, not by importing a
1057 // module that transitively includes it.
1058 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(File: *FE))
1059 return *FE;
1060
1061 Loc = SM.getIncludeLoc(FID: ID);
1062 }
1063
1064 return std::nullopt;
1065}
1066
1067OptionalFileEntryRef Preprocessor::LookupFile(
1068 SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
1069 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
1070 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
1071 SmallVectorImpl<char> *RelativePath,
1072 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
1073 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
1074 ConstSearchDirIterator CurDirLocal = nullptr;
1075 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
1076
1077 Module *RequestingModule = getModuleForLocation(
1078 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
1079
1080 // If the header lookup mechanism may be relative to the current inclusion
1081 // stack, record the parent #includes.
1082 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
1083 bool BuildSystemModule = false;
1084 if (!FromDir && !FromFile) {
1085 FileID FID = getCurrentFileLexer()->getFileID();
1086 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
1087
1088 // If there is no file entry associated with this file, it must be the
1089 // predefines buffer or the module includes buffer. Any other file is not
1090 // lexed with a normal lexer, so it won't be scanned for preprocessor
1091 // directives.
1092 //
1093 // If we have the predefines buffer, resolve #include references (which come
1094 // from the -include command line argument) from the current working
1095 // directory instead of relative to the main file.
1096 //
1097 // If we have the module includes buffer, resolve #include references (which
1098 // come from header declarations in the module map) relative to the module
1099 // map file.
1100 if (!FileEnt) {
1101 if (FID == SourceMgr.getMainFileID() && MainFileDir) {
1102 auto IncludeDir =
1103 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
1104 FileName: Filename, Module: getCurrentModule())
1105 ? HeaderInfo.getModuleMap().getBuiltinDir()
1106 : MainFileDir;
1107 Includers.push_back(Elt: std::make_pair(x: std::nullopt, y&: *IncludeDir));
1108 BuildSystemModule = getCurrentModule()->IsSystem;
1109 } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
1110 FID: SourceMgr.getMainFileID()))) {
1111 auto CWD = FileMgr.getOptionalDirectoryRef(DirName: ".");
1112 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y&: *CWD));
1113 }
1114 } else {
1115 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1116 }
1117
1118 // MSVC searches the current include stack from top to bottom for
1119 // headers included by quoted include directives.
1120 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1121 if (LangOpts.MSVCCompat && !isAngled) {
1122 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1123 if (IsFileLexer(I: ISEntry))
1124 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1125 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1126 }
1127 }
1128 }
1129
1130 CurDir = CurDirLookup;
1131
1132 if (FromFile) {
1133 // We're supposed to start looking from after a particular file. Search
1134 // the include path until we find that file or run out of files.
1135 ConstSearchDirIterator TmpCurDir = CurDir;
1136 ConstSearchDirIterator TmpFromDir = nullptr;
1137 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1138 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir: TmpFromDir, CurDir: &TmpCurDir,
1139 Includers, SearchPath, RelativePath, RequestingModule,
1140 SuggestedModule, /*IsMapped=*/nullptr,
1141 /*IsFrameworkFound=*/nullptr, SkipCache)) {
1142 // Keep looking as if this file did a #include_next.
1143 TmpFromDir = TmpCurDir;
1144 ++TmpFromDir;
1145 if (&FE->getFileEntry() == FromFile) {
1146 // Found it.
1147 FromDir = TmpFromDir;
1148 CurDir = TmpCurDir;
1149 break;
1150 }
1151 }
1152 }
1153
1154 // Do a standard file entry lookup.
1155 OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1156 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir, CurDir: &CurDir, Includers, SearchPath,
1157 RelativePath, RequestingModule, SuggestedModule, IsMapped,
1158 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1159 if (FE)
1160 return FE;
1161
1162 OptionalFileEntryRef CurFileEnt;
1163 // Otherwise, see if this is a subframework header. If so, this is relative
1164 // to one of the headers on the #include stack. Walk the list of the current
1165 // headers on the #include stack and pass them to HeaderInfo.
1166 if (IsFileLexer()) {
1167 if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1168 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1169 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1170 SuggestedModule)) {
1171 return FE;
1172 }
1173 }
1174 }
1175
1176 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1177 if (IsFileLexer(I: ISEntry)) {
1178 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1179 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1180 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath,
1181 RequestingModule, SuggestedModule)) {
1182 return FE;
1183 }
1184 }
1185 }
1186 }
1187
1188 // Otherwise, we really couldn't find the file.
1189 return std::nullopt;
1190}
1191
1192OptionalFileEntryRef Preprocessor::LookupEmbedFile(StringRef Filename,
1193 bool isAngled,
1194 bool OpenFile) {
1195 FileManager &FM = this->getFileManager();
1196 if (llvm::sys::path::is_absolute(path: Filename)) {
1197 // lookup path or immediately fail
1198 return FM.getOptionalFileRef(Filename, OpenFile, /*CacheFailure=*/true,
1199 /*IsText=*/false);
1200 }
1201
1202 auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1203 StringRef StartingFrom, StringRef FileName,
1204 bool RemoveInitialFileComponentFromLookupPath) {
1205 llvm::sys::path::native(path: StartingFrom, result&: LookupPath);
1206 if (RemoveInitialFileComponentFromLookupPath)
1207 llvm::sys::path::remove_filename(path&: LookupPath);
1208 if (!LookupPath.empty() &&
1209 !llvm::sys::path::is_separator(value: LookupPath.back())) {
1210 LookupPath.push_back(Elt: llvm::sys::path::get_separator().front());
1211 }
1212 LookupPath.append(in_start: FileName.begin(), in_end: FileName.end());
1213 };
1214
1215 // Otherwise, it's search time!
1216 SmallString<512> LookupPath;
1217 // Non-angled lookup
1218 if (!isAngled) {
1219 OptionalFileEntryRef LookupFromFile = getCurrentFileLexer()->getFileEntry();
1220 if (LookupFromFile) {
1221 // Use file-based lookup.
1222 SmallString<1024> TmpDir;
1223 TmpDir = LookupFromFile->getDir().getName();
1224 llvm::sys::path::append(path&: TmpDir, a: Filename);
1225 if (!TmpDir.empty()) {
1226 OptionalFileEntryRef ShouldBeEntry = FM.getOptionalFileRef(
1227 Filename: TmpDir, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1228 if (ShouldBeEntry)
1229 return ShouldBeEntry;
1230 }
1231 }
1232
1233 // Otherwise, do working directory lookup.
1234 LookupPath.clear();
1235 auto MaybeWorkingDirEntry = FM.getOptionalDirectoryRef(DirName: ".");
1236 if (MaybeWorkingDirEntry) {
1237 DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1238 StringRef WorkingDir = WorkingDirEntry.getName();
1239 if (!WorkingDir.empty()) {
1240 SeparateComponents(LookupPath, WorkingDir, Filename, false);
1241 OptionalFileEntryRef ShouldBeEntry = FM.getOptionalFileRef(
1242 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1243 if (ShouldBeEntry)
1244 return ShouldBeEntry;
1245 }
1246 }
1247 }
1248
1249 for (const auto &Entry : PPOpts.EmbedEntries) {
1250 LookupPath.clear();
1251 SeparateComponents(LookupPath, Entry, Filename, false);
1252 OptionalFileEntryRef ShouldBeEntry = FM.getOptionalFileRef(
1253 Filename: LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1254 if (ShouldBeEntry)
1255 return ShouldBeEntry;
1256 }
1257 return std::nullopt;
1258}
1259
1260//===----------------------------------------------------------------------===//
1261// Preprocessor Directive Handling.
1262//===----------------------------------------------------------------------===//
1263
1264class Preprocessor::ResetMacroExpansionHelper {
1265public:
1266 ResetMacroExpansionHelper(Preprocessor *pp)
1267 : PP(pp), save(pp->DisableMacroExpansion) {
1268 if (pp->MacroExpansionInDirectivesOverride)
1269 pp->DisableMacroExpansion = false;
1270 }
1271
1272 ~ResetMacroExpansionHelper() {
1273 PP->DisableMacroExpansion = save;
1274 }
1275
1276private:
1277 Preprocessor *PP;
1278 bool save;
1279};
1280
1281/// Process a directive while looking for the through header or a #pragma
1282/// hdrstop. The following directives are handled:
1283/// #include (to check if it is the through header)
1284/// #define (to warn about macros that don't match the PCH)
1285/// #pragma (to check for pragma hdrstop).
1286/// All other directives are completely discarded.
1287void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1288 SourceLocation HashLoc) {
1289 if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1290 if (II->getPPKeywordID() == tok::pp_define) {
1291 return HandleDefineDirective(Tok&: Result,
1292 /*ImmediatelyAfterHeaderGuard=*/false);
1293 }
1294 if (SkippingUntilPCHThroughHeader &&
1295 II->getPPKeywordID() == tok::pp_include) {
1296 return HandleIncludeDirective(HashLoc, Tok&: Result);
1297 }
1298 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1299 Lex(Result);
1300 auto *II = Result.getIdentifierInfo();
1301 if (II && II->getName() == "hdrstop")
1302 return HandlePragmaHdrstop(Tok&: Result);
1303 }
1304 }
1305 DiscardUntilEndOfDirective();
1306}
1307
1308/// HandleDirective - This callback is invoked when the lexer sees a # token
1309/// at the start of a line. This consumes the directive, modifies the
1310/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1311/// read is the correct one.
1312void Preprocessor::HandleDirective(Token &Result) {
1313 // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1314
1315 // We just parsed a # or @ character at the start of a line, so we're in
1316 // directive mode. Tell the lexer this so any newlines we see will be
1317 // converted into an EOD token (which terminates the directive).
1318 CurPPLexer->ParsingPreprocessorDirective = true;
1319 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1320
1321 bool ImmediatelyAfterTopLevelIfndef =
1322 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1323 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1324
1325 ++NumDirectives;
1326
1327 // We are about to read a token. For the multiple-include optimization FA to
1328 // work, we have to remember if we had read any tokens *before* this
1329 // pp-directive.
1330 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1331
1332 // Save the directive-introducing token ('#', '@', or import/module in C++20)
1333 // in case we need to return it later.
1334 Token Introducer = Result;
1335
1336 // Read the next token, the directive flavor. This isn't expanded due to
1337 // C99 6.10.3p8.
1338 if (Introducer.isOneOf(Ks: tok::hash, Ks: tok::at))
1339 LexUnexpandedToken(Result);
1340
1341 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
1342 // #define A(x) #x
1343 // A(abc
1344 // #warning blah
1345 // def)
1346 // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1347 // not support this for #include-like directives, since that can result in
1348 // terrible diagnostics, and does not work in GCC.
1349 if (InMacroArgs) {
1350 if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1351 switch (II->getPPKeywordID()) {
1352 case tok::pp_include:
1353 case tok::pp_import:
1354 case tok::pp_include_next:
1355 case tok::pp___include_macros:
1356 case tok::pp_pragma:
1357 case tok::pp_embed:
1358 case tok::pp_module:
1359 case tok::pp___preprocessed_module:
1360 case tok::pp___preprocessed_import:
1361 Diag(Tok: Result, DiagID: diag::err_embedded_directive)
1362 << Introducer.is(K: tok::hash) << II->getName();
1363 Diag(Tok: *ArgMacro, DiagID: diag::note_macro_expansion_here)
1364 << ArgMacro->getIdentifierInfo();
1365 DiscardUntilEndOfDirective();
1366 return;
1367 default:
1368 break;
1369 }
1370 }
1371 Diag(Tok: Result, DiagID: diag::ext_embedded_directive);
1372 }
1373
1374 // Temporarily enable macro expansion if set so
1375 // and reset to previous state when returning from this function.
1376 ResetMacroExpansionHelper helper(this);
1377
1378 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1379 return HandleSkippedDirectiveWhileUsingPCH(Result,
1380 HashLoc: Introducer.getLocation());
1381
1382 switch (Result.getKind()) {
1383 case tok::eod:
1384 // Ignore the null directive with regards to the multiple-include
1385 // optimization, i.e. allow the null directive to appear outside of the
1386 // include guard and still enable the multiple-include optimization.
1387 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1388 return; // null directive.
1389 case tok::code_completion:
1390 setCodeCompletionReached();
1391 if (CodeComplete)
1392 CodeComplete->CodeCompleteDirective(
1393 InConditional: CurPPLexer->getConditionalStackDepth() > 0);
1394 return;
1395 case tok::numeric_constant: // # 7 GNU line marker directive.
1396 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1397 // directive. However do permit it in the predefines file, as we use line
1398 // markers to mark the builtin macros as being in a system header.
1399 if (getLangOpts().AsmPreprocessor &&
1400 SourceMgr.getFileID(SpellingLoc: Introducer.getLocation()) != getPredefinesFileID())
1401 break;
1402 return HandleDigitDirective(Tok&: Result);
1403 default:
1404 IdentifierInfo *II = Result.getIdentifierInfo();
1405 if (!II) break; // Not an identifier.
1406
1407 // Ask what the preprocessor keyword ID is.
1408 switch (II->getPPKeywordID()) {
1409 default: break;
1410 // C99 6.10.1 - Conditional Inclusion.
1411 case tok::pp_if:
1412 return HandleIfDirective(IfToken&: Result, HashToken: Introducer,
1413 ReadAnyTokensBeforeDirective);
1414 case tok::pp_ifdef:
1415 return HandleIfdefDirective(Result, HashToken: Introducer, isIfndef: false,
1416 ReadAnyTokensBeforeDirective: true /*not valid for miopt*/);
1417 case tok::pp_ifndef:
1418 return HandleIfdefDirective(Result, HashToken: Introducer, isIfndef: true,
1419 ReadAnyTokensBeforeDirective);
1420 case tok::pp_elif:
1421 case tok::pp_elifdef:
1422 case tok::pp_elifndef:
1423 return HandleElifFamilyDirective(ElifToken&: Result, HashToken: Introducer,
1424 Kind: II->getPPKeywordID());
1425
1426 case tok::pp_else:
1427 return HandleElseDirective(Result, HashToken: Introducer);
1428 case tok::pp_endif:
1429 return HandleEndifDirective(EndifToken&: Result);
1430
1431 // C99 6.10.2 - Source File Inclusion.
1432 case tok::pp_include:
1433 // Handle #include.
1434 return HandleIncludeDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1435 case tok::pp___include_macros:
1436 // Handle -imacros.
1437 return HandleIncludeMacrosDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1438
1439 // C99 6.10.3 - Macro Replacement.
1440 case tok::pp_define:
1441 return HandleDefineDirective(Tok&: Result, ImmediatelyAfterHeaderGuard: ImmediatelyAfterTopLevelIfndef);
1442 case tok::pp_undef:
1443 return HandleUndefDirective();
1444
1445 // C99 6.10.4 - Line Control.
1446 case tok::pp_line:
1447 return HandleLineDirective();
1448
1449 // C99 6.10.5 - Error Directive.
1450 case tok::pp_error:
1451 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: false);
1452
1453 // C99 6.10.6 - Pragma Directive.
1454 case tok::pp_pragma:
1455 return HandlePragmaDirective(Introducer: {.Kind: PIK_HashPragma, .Loc: Introducer.getLocation()});
1456 case tok::pp_module:
1457 case tok::pp___preprocessed_module:
1458 return HandleCXXModuleDirective(Module: Result);
1459 case tok::pp___preprocessed_import:
1460 return HandleCXXImportDirective(Import: Result);
1461 // GNU Extensions.
1462 case tok::pp_import:
1463 switch (Introducer.getKind()) {
1464 case tok::hash:
1465 return HandleImportDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1466 case tok::at:
1467 return HandleObjCImportDirective(AtTok&: Introducer, ImportTok&: Result);
1468 case tok::kw_import:
1469 return HandleCXXImportDirective(Import: Result);
1470 default:
1471 llvm_unreachable("not a valid import directive");
1472 }
1473 case tok::pp_include_next:
1474 return HandleIncludeNextDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1475
1476 case tok::pp_warning:
1477 if (LangOpts.CPlusPlus)
1478 Diag(Tok: Result, DiagID: LangOpts.CPlusPlus23
1479 ? diag::warn_cxx23_compat_warning_directive
1480 : diag::ext_pp_warning_directive)
1481 << /*C++23*/ 1;
1482 else
1483 Diag(Tok: Result, DiagID: LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1484 : diag::ext_pp_warning_directive)
1485 << /*C23*/ 0;
1486
1487 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: true);
1488 case tok::pp_ident:
1489 return HandleIdentSCCSDirective(Tok&: Result);
1490 case tok::pp_sccs:
1491 return HandleIdentSCCSDirective(Tok&: Result);
1492 case tok::pp_embed:
1493 return HandleEmbedDirective(HashLoc: Introducer.getLocation(), Tok&: Result);
1494 case tok::pp_assert:
1495 //isExtension = true; // FIXME: implement #assert
1496 break;
1497 case tok::pp_unassert:
1498 //isExtension = true; // FIXME: implement #unassert
1499 break;
1500
1501 case tok::pp___public_macro:
1502 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1503 return HandleMacroPublicDirective(Tok&: Result);
1504 break;
1505
1506 case tok::pp___private_macro:
1507 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1508 return HandleMacroPrivateDirective();
1509 break;
1510 }
1511 break;
1512 }
1513
1514 // If this is a .S file, treat unknown # directives as non-preprocessor
1515 // directives. This is important because # may be a comment or introduce
1516 // various pseudo-ops. Just return the # token and push back the following
1517 // token to be lexed next time.
1518 if (getLangOpts().AsmPreprocessor) {
1519 auto Toks = std::make_unique<Token[]>(num: 2);
1520 // Return the # and the token after it.
1521 Toks[0] = Introducer;
1522 Toks[1] = Result;
1523
1524 // If the second token is a hashhash token, then we need to translate it to
1525 // unknown so the token lexer doesn't try to perform token pasting.
1526 if (Result.is(K: tok::hashhash))
1527 Toks[1].setKind(tok::unknown);
1528
1529 // Enter this token stream so that we re-lex the tokens. Make sure to
1530 // enable macro expansion, in case the token after the # is an identifier
1531 // that is expanded.
1532 EnterTokenStream(Toks: std::move(Toks), NumToks: 2, DisableMacroExpansion: false, /*IsReinject*/false);
1533 return;
1534 }
1535
1536 // If we reached here, the preprocessing token is not valid!
1537 // Start suggesting if a similar directive found.
1538 Diag(Tok: Result, DiagID: diag::err_pp_invalid_directive) << 0;
1539
1540 // Read the rest of the PP line.
1541 DiscardUntilEndOfDirective();
1542
1543 // Okay, we're done parsing the directive.
1544}
1545
1546/// GetLineValue - Convert a numeric token into an unsigned value, emitting
1547/// Diagnostic DiagID if it is invalid, and returning the value in Val.
1548static bool GetLineValue(Token &DigitTok, unsigned &Val,
1549 unsigned DiagID, Preprocessor &PP,
1550 bool IsGNULineDirective=false) {
1551 if (DigitTok.isNot(K: tok::numeric_constant)) {
1552 PP.Diag(Tok: DigitTok, DiagID);
1553
1554 if (DigitTok.isNot(K: tok::eod))
1555 PP.DiscardUntilEndOfDirective();
1556 return true;
1557 }
1558
1559 SmallString<64> IntegerBuffer;
1560 IntegerBuffer.resize(N: DigitTok.getLength());
1561 const char *DigitTokBegin = &IntegerBuffer[0];
1562 bool Invalid = false;
1563 unsigned ActualLength = PP.getSpelling(Tok: DigitTok, Buffer&: DigitTokBegin, Invalid: &Invalid);
1564 if (Invalid)
1565 return true;
1566
1567 // Verify that we have a simple digit-sequence, and compute the value. This
1568 // is always a simple digit string computed in decimal, so we do this manually
1569 // here.
1570 Val = 0;
1571 for (unsigned i = 0; i != ActualLength; ++i) {
1572 // C++1y [lex.fcon]p1:
1573 // Optional separating single quotes in a digit-sequence are ignored
1574 if (DigitTokBegin[i] == '\'')
1575 continue;
1576
1577 if (!isDigit(c: DigitTokBegin[i])) {
1578 PP.Diag(Loc: PP.AdvanceToTokenCharacter(TokStart: DigitTok.getLocation(), Char: i),
1579 DiagID: diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1580 PP.DiscardUntilEndOfDirective();
1581 return true;
1582 }
1583
1584 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1585 if (NextVal < Val) { // overflow.
1586 PP.Diag(Tok: DigitTok, DiagID);
1587 PP.DiscardUntilEndOfDirective();
1588 return true;
1589 }
1590 Val = NextVal;
1591 }
1592
1593 if (DigitTokBegin[0] == '0' && Val)
1594 PP.Diag(Loc: DigitTok.getLocation(), DiagID: diag::warn_pp_line_decimal)
1595 << IsGNULineDirective;
1596
1597 return false;
1598}
1599
1600/// Handle a \#line directive: C99 6.10.4.
1601///
1602/// The two acceptable forms are:
1603/// \verbatim
1604/// # line digit-sequence
1605/// # line digit-sequence "s-char-sequence"
1606/// \endverbatim
1607void Preprocessor::HandleLineDirective() {
1608 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are
1609 // expanded.
1610 Token DigitTok;
1611 Lex(Result&: DigitTok);
1612
1613 // Validate the number and convert it to an unsigned.
1614 unsigned LineNo;
1615 if (GetLineValue(DigitTok, Val&: LineNo, DiagID: diag::err_pp_line_requires_integer,PP&: *this))
1616 return;
1617
1618 if (LineNo == 0)
1619 Diag(Tok: DigitTok, DiagID: diag::ext_pp_line_zero);
1620
1621 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1622 // number greater than 2147483647". C90 requires that the line # be <= 32767.
1623 unsigned LineLimit = 32768U;
1624 if (LangOpts.C99 || LangOpts.CPlusPlus11)
1625 LineLimit = 2147483648U;
1626 if (LineNo >= LineLimit)
1627 Diag(Tok: DigitTok, DiagID: diag::ext_pp_line_too_big) << LineLimit;
1628 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1629 Diag(Tok: DigitTok, DiagID: diag::warn_cxx98_compat_pp_line_too_big);
1630
1631 int FilenameID = -1;
1632 Token StrTok;
1633 Lex(Result&: StrTok);
1634
1635 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1636 // string followed by eod.
1637 if (StrTok.is(K: tok::eod))
1638 ; // ok
1639 else if (StrTok.isNot(K: tok::string_literal)) {
1640 Diag(Tok: StrTok, DiagID: diag::err_pp_line_invalid_filename);
1641 DiscardUntilEndOfDirective();
1642 return;
1643 } else if (StrTok.hasUDSuffix()) {
1644 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1645 DiscardUntilEndOfDirective();
1646 return;
1647 } else {
1648 // Parse and validate the string, converting it into a unique ID.
1649 StringLiteralParser Literal(StrTok, *this,
1650 StringLiteralEvalMethod::Unevaluated);
1651 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1652 if (Literal.hadError) {
1653 DiscardUntilEndOfDirective();
1654 return;
1655 }
1656 if (Literal.Pascal) {
1657 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1658 DiscardUntilEndOfDirective();
1659 return;
1660 }
1661 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1662
1663 // Verify that there is nothing after the string, other than EOD. Because
1664 // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1665 CheckEndOfDirective(DirType: "line", EnableMacros: true);
1666 }
1667
1668 // Take the file kind of the file containing the #line directive. #line
1669 // directives are often used for generated sources from the same codebase, so
1670 // the new file should generally be classified the same way as the current
1671 // file. This is visible in GCC's pre-processed output, which rewrites #line
1672 // to GNU line markers.
1673 SrcMgr::CharacteristicKind FileKind =
1674 SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1675
1676 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry: false,
1677 IsFileExit: false, FileKind);
1678
1679 if (Callbacks)
1680 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(),
1681 Reason: PPCallbacks::RenameFile, FileType: FileKind);
1682}
1683
1684/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1685/// marker directive.
1686static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1687 SrcMgr::CharacteristicKind &FileKind,
1688 Preprocessor &PP) {
1689 unsigned FlagVal;
1690 Token FlagTok;
1691 PP.Lex(Result&: FlagTok);
1692 if (FlagTok.is(K: tok::eod)) return false;
1693 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag, PP))
1694 return true;
1695
1696 if (FlagVal == 1) {
1697 IsFileEntry = true;
1698
1699 PP.Lex(Result&: FlagTok);
1700 if (FlagTok.is(K: tok::eod)) return false;
1701 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag,PP))
1702 return true;
1703 } else if (FlagVal == 2) {
1704 IsFileExit = true;
1705
1706 SourceManager &SM = PP.getSourceManager();
1707 // If we are leaving the current presumed file, check to make sure the
1708 // presumed include stack isn't empty!
1709 FileID CurFileID =
1710 SM.getDecomposedExpansionLoc(Loc: FlagTok.getLocation()).first;
1711 PresumedLoc PLoc = SM.getPresumedLoc(Loc: FlagTok.getLocation());
1712 if (PLoc.isInvalid())
1713 return true;
1714
1715 // If there is no include loc (main file) or if the include loc is in a
1716 // different physical file, then we aren't in a "1" line marker flag region.
1717 SourceLocation IncLoc = PLoc.getIncludeLoc();
1718 if (IncLoc.isInvalid() ||
1719 SM.getDecomposedExpansionLoc(Loc: IncLoc).first != CurFileID) {
1720 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_pop);
1721 PP.DiscardUntilEndOfDirective();
1722 return true;
1723 }
1724
1725 PP.Lex(Result&: FlagTok);
1726 if (FlagTok.is(K: tok::eod)) return false;
1727 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag,PP))
1728 return true;
1729 }
1730
1731 // We must have 3 if there are still flags.
1732 if (FlagVal != 3) {
1733 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1734 PP.DiscardUntilEndOfDirective();
1735 return true;
1736 }
1737
1738 FileKind = SrcMgr::C_System;
1739
1740 PP.Lex(Result&: FlagTok);
1741 if (FlagTok.is(K: tok::eod)) return false;
1742 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag, PP))
1743 return true;
1744
1745 // We must have 4 if there is yet another flag.
1746 if (FlagVal != 4) {
1747 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1748 PP.DiscardUntilEndOfDirective();
1749 return true;
1750 }
1751
1752 FileKind = SrcMgr::C_ExternCSystem;
1753
1754 PP.Lex(Result&: FlagTok);
1755 if (FlagTok.is(K: tok::eod)) return false;
1756
1757 // There are no more valid flags here.
1758 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1759 PP.DiscardUntilEndOfDirective();
1760 return true;
1761}
1762
1763/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1764/// one of the following forms:
1765///
1766/// # 42
1767/// # 42 "file" ('1' | '2')?
1768/// # 42 "file" ('1' | '2')? '3' '4'?
1769///
1770void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1771 // Validate the number and convert it to an unsigned. GNU does not have a
1772 // line # limit other than it fit in 32-bits.
1773 unsigned LineNo;
1774 if (GetLineValue(DigitTok, Val&: LineNo, DiagID: diag::err_pp_linemarker_requires_integer,
1775 PP&: *this, IsGNULineDirective: true))
1776 return;
1777
1778 Token StrTok;
1779 Lex(Result&: StrTok);
1780
1781 bool IsFileEntry = false, IsFileExit = false;
1782 int FilenameID = -1;
1783 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1784
1785 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1786 // string followed by eod.
1787 if (StrTok.is(K: tok::eod)) {
1788 Diag(Tok: StrTok, DiagID: diag::ext_pp_gnu_line_directive);
1789 // Treat this like "#line NN", which doesn't change file characteristics.
1790 FileKind = SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1791 } else if (StrTok.isNot(K: tok::string_literal)) {
1792 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1793 DiscardUntilEndOfDirective();
1794 return;
1795 } else if (StrTok.hasUDSuffix()) {
1796 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1797 DiscardUntilEndOfDirective();
1798 return;
1799 } else {
1800 // Parse and validate the string, converting it into a unique ID.
1801 StringLiteralParser Literal(StrTok, *this,
1802 StringLiteralEvalMethod::Unevaluated);
1803 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1804 if (Literal.hadError) {
1805 DiscardUntilEndOfDirective();
1806 return;
1807 }
1808 if (Literal.Pascal) {
1809 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1810 DiscardUntilEndOfDirective();
1811 return;
1812 }
1813
1814 // If a filename was present, read any flags that are present.
1815 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, PP&: *this))
1816 return;
1817 if (!SourceMgr.isInPredefinedFile(Loc: DigitTok.getLocation()))
1818 Diag(Tok: StrTok, DiagID: diag::ext_pp_gnu_line_directive);
1819
1820 // Exiting to an empty string means pop to the including file, so leave
1821 // FilenameID as -1 in that case.
1822 if (!(IsFileExit && Literal.GetString().empty()))
1823 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1824 }
1825
1826 // Create a line note with this information.
1827 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1828 IsFileExit, FileKind);
1829
1830 // If the preprocessor has callbacks installed, notify them of the #line
1831 // change. This is used so that the line marker comes out in -E mode for
1832 // example.
1833 if (Callbacks) {
1834 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1835 if (IsFileEntry)
1836 Reason = PPCallbacks::EnterFile;
1837 else if (IsFileExit)
1838 Reason = PPCallbacks::ExitFile;
1839
1840 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(), Reason, FileType: FileKind);
1841 }
1842}
1843
1844/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1845///
1846void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1847 bool isWarning) {
1848 // Read the rest of the line raw. We do this because we don't want macros
1849 // to be expanded and we don't require that the tokens be valid preprocessing
1850 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
1851 // collapse multiple consecutive white space between tokens, but this isn't
1852 // specified by the standard.
1853 SmallString<128> Message;
1854 CurLexer->ReadToEndOfLine(Result: &Message);
1855
1856 // Find the first non-whitespace character, so that we can make the
1857 // diagnostic more succinct.
1858 StringRef Msg = Message.str().ltrim(Char: ' ');
1859
1860 if (isWarning)
1861 Diag(Tok, DiagID: diag::pp_hash_warning) << Msg;
1862 else
1863 Diag(Tok, DiagID: diag::err_pp_hash_error) << Msg;
1864}
1865
1866/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1867///
1868void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1869 // Yes, this directive is an extension.
1870 Diag(Tok, DiagID: diag::ext_pp_ident_directive);
1871
1872 // Read the string argument.
1873 Token StrTok;
1874 Lex(Result&: StrTok);
1875
1876 // If the token kind isn't a string, it's a malformed directive.
1877 if (StrTok.isNot(K: tok::string_literal) &&
1878 StrTok.isNot(K: tok::wide_string_literal)) {
1879 Diag(Tok: StrTok, DiagID: diag::err_pp_malformed_ident);
1880 if (StrTok.isNot(K: tok::eod))
1881 DiscardUntilEndOfDirective();
1882 return;
1883 }
1884
1885 if (StrTok.hasUDSuffix()) {
1886 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1887 DiscardUntilEndOfDirective();
1888 return;
1889 }
1890
1891 // Verify that there is nothing after the string, other than EOD.
1892 CheckEndOfDirective(DirType: "ident");
1893
1894 if (Callbacks) {
1895 bool Invalid = false;
1896 std::string Str = getSpelling(Tok: StrTok, Invalid: &Invalid);
1897 if (!Invalid)
1898 Callbacks->Ident(Loc: Tok.getLocation(), str: Str);
1899 }
1900}
1901
1902/// Handle a #public directive.
1903void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1904 Token MacroNameTok;
1905 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1906
1907 // Error reading macro name? If so, diagnostic already issued.
1908 if (MacroNameTok.is(K: tok::eod))
1909 return;
1910
1911 // Check to see if this is the last token on the #__public_macro line.
1912 CheckEndOfDirective(DirType: "__public_macro");
1913
1914 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1915 // Okay, we finally have a valid identifier to undef.
1916 MacroDirective *MD = getLocalMacroDirective(II);
1917
1918 // If the macro is not defined, this is an error.
1919 if (!MD) {
1920 Diag(Tok: MacroNameTok, DiagID: diag::err_pp_visibility_non_macro) << II;
1921 return;
1922 }
1923
1924 // Note that this macro has now been exported.
1925 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1926 Loc: MacroNameTok.getLocation(), /*isPublic=*/true));
1927}
1928
1929/// Handle a #private directive.
1930void Preprocessor::HandleMacroPrivateDirective() {
1931 Token MacroNameTok;
1932 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1933
1934 // Error reading macro name? If so, diagnostic already issued.
1935 if (MacroNameTok.is(K: tok::eod))
1936 return;
1937
1938 // Check to see if this is the last token on the #__private_macro line.
1939 CheckEndOfDirective(DirType: "__private_macro");
1940
1941 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1942 // Okay, we finally have a valid identifier to undef.
1943 MacroDirective *MD = getLocalMacroDirective(II);
1944
1945 // If the macro is not defined, this is an error.
1946 if (!MD) {
1947 Diag(Tok: MacroNameTok, DiagID: diag::err_pp_visibility_non_macro) << II;
1948 return;
1949 }
1950
1951 // Note that this macro has now been marked private.
1952 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1953 Loc: MacroNameTok.getLocation(), /*isPublic=*/false));
1954}
1955
1956//===----------------------------------------------------------------------===//
1957// Preprocessor Include Directive Handling.
1958//===----------------------------------------------------------------------===//
1959
1960/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1961/// checked and spelled filename, e.g. as an operand of \#include. This returns
1962/// true if the input filename was in <>'s or false if it were in ""'s. The
1963/// caller is expected to provide a buffer that is large enough to hold the
1964/// spelling of the filename, but is also expected to handle the case when
1965/// this method decides to use a different buffer.
1966bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1967 StringRef &Buffer) {
1968 // Get the text form of the filename.
1969 assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1970
1971 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1972 // C++20 [lex.header]/2:
1973 //
1974 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1975 // in C: behavior is undefined
1976 // in C++: program is conditionally-supported with implementation-defined
1977 // semantics
1978
1979 // Make sure the filename is <x> or "x".
1980 bool isAngled;
1981 if (Buffer[0] == '<') {
1982 if (Buffer.back() != '>') {
1983 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1984 Buffer = StringRef();
1985 return true;
1986 }
1987 isAngled = true;
1988 } else if (Buffer[0] == '"') {
1989 if (Buffer.back() != '"') {
1990 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1991 Buffer = StringRef();
1992 return true;
1993 }
1994 isAngled = false;
1995 } else {
1996 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1997 Buffer = StringRef();
1998 return true;
1999 }
2000
2001 // Diagnose #include "" as invalid.
2002 if (Buffer.size() <= 2) {
2003 Diag(Loc, DiagID: diag::err_pp_empty_filename);
2004 Buffer = StringRef();
2005 return true;
2006 }
2007
2008 // Skip the brackets.
2009 Buffer = Buffer.substr(Start: 1, N: Buffer.size()-2);
2010 return isAngled;
2011}
2012
2013/// Push a token onto the token stream containing an annotation.
2014void Preprocessor::EnterAnnotationToken(SourceRange Range,
2015 tok::TokenKind Kind,
2016 void *AnnotationVal) {
2017 // FIXME: Produce this as the current token directly, rather than
2018 // allocating a new token for it.
2019 auto Tok = std::make_unique<Token[]>(num: 1);
2020 Tok[0].startToken();
2021 Tok[0].setKind(Kind);
2022 Tok[0].setLocation(Range.getBegin());
2023 Tok[0].setAnnotationEndLoc(Range.getEnd());
2024 Tok[0].setAnnotationValue(AnnotationVal);
2025 EnterTokenStream(Toks: std::move(Tok), NumToks: 1, DisableMacroExpansion: true, /*IsReinject*/ false);
2026}
2027
2028/// Produce a diagnostic informing the user that a #include or similar
2029/// was implicitly treated as a module import.
2030static void diagnoseAutoModuleImport(Preprocessor &PP, SourceLocation HashLoc,
2031 Token &IncludeTok,
2032 ArrayRef<IdentifierLoc> Path,
2033 SourceLocation PathEnd) {
2034 SmallString<128> PathString;
2035 for (size_t I = 0, N = Path.size(); I != N; ++I) {
2036 if (I)
2037 PathString += '.';
2038 PathString += Path[I].getIdentifierInfo()->getName();
2039 }
2040
2041 int IncludeKind = 0;
2042 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
2043 case tok::pp_include:
2044 IncludeKind = 0;
2045 break;
2046
2047 case tok::pp_import:
2048 IncludeKind = 1;
2049 break;
2050
2051 case tok::pp_include_next:
2052 IncludeKind = 2;
2053 break;
2054
2055 case tok::pp___include_macros:
2056 IncludeKind = 3;
2057 break;
2058
2059 default:
2060 llvm_unreachable("unknown include directive kind");
2061 }
2062
2063 PP.Diag(Loc: HashLoc, DiagID: diag::remark_pp_include_directive_modular_translation)
2064 << IncludeKind << PathString;
2065}
2066
2067// Given a vector of path components and a string containing the real
2068// path to the file, build a properly-cased replacement in the vector,
2069// and return true if the replacement should be suggested.
2070static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
2071 StringRef RealPathName,
2072 llvm::sys::path::Style Separator) {
2073 auto RealPathComponentIter = llvm::sys::path::rbegin(path: RealPathName);
2074 auto RealPathComponentEnd = llvm::sys::path::rend(path: RealPathName);
2075 int Cnt = 0;
2076 bool SuggestReplacement = false;
2077
2078 auto IsSep = [Separator](StringRef Component) {
2079 return Component.size() == 1 &&
2080 llvm::sys::path::is_separator(value: Component[0], style: Separator);
2081 };
2082
2083 // Below is a best-effort to handle ".." in paths. It is admittedly
2084 // not 100% correct in the presence of symlinks.
2085 for (auto &Component : llvm::reverse(C&: Components)) {
2086 if ("." == Component) {
2087 } else if (".." == Component) {
2088 ++Cnt;
2089 } else if (Cnt) {
2090 --Cnt;
2091 } else if (RealPathComponentIter != RealPathComponentEnd) {
2092 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
2093 Component != *RealPathComponentIter) {
2094 // If these non-separator path components differ by more than just case,
2095 // then we may be looking at symlinked paths. Bail on this diagnostic to
2096 // avoid noisy false positives.
2097 SuggestReplacement =
2098 RealPathComponentIter->equals_insensitive(RHS: Component);
2099 if (!SuggestReplacement)
2100 break;
2101 Component = *RealPathComponentIter;
2102 }
2103 ++RealPathComponentIter;
2104 }
2105 }
2106 return SuggestReplacement;
2107}
2108
2109bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
2110 const TargetInfo &TargetInfo,
2111 const Module &M,
2112 DiagnosticsEngine &Diags) {
2113 Module::Requirement Requirement;
2114 Module::UnresolvedHeaderDirective MissingHeader;
2115 Module *ShadowingModule = nullptr;
2116 if (M.isAvailable(LangOpts, Target: TargetInfo, Req&: Requirement, MissingHeader,
2117 ShadowingModule))
2118 return false;
2119
2120 if (MissingHeader.FileNameLoc.isValid()) {
2121 Diags.Report(Loc: MissingHeader.FileNameLoc, DiagID: diag::err_module_header_missing)
2122 << MissingHeader.IsUmbrella << MissingHeader.FileName;
2123 } else if (ShadowingModule) {
2124 Diags.Report(Loc: M.DefinitionLoc, DiagID: diag::err_module_shadowed) << M.Name;
2125 Diags.Report(Loc: ShadowingModule->DefinitionLoc,
2126 DiagID: diag::note_previous_definition);
2127 } else {
2128 // FIXME: Track the location at which the requirement was specified, and
2129 // use it here.
2130 Diags.Report(Loc: M.DefinitionLoc, DiagID: diag::err_module_unavailable)
2131 << M.getFullModuleName() << Requirement.RequiredState
2132 << Requirement.FeatureName;
2133 }
2134 return true;
2135}
2136
2137std::pair<ConstSearchDirIterator, const FileEntry *>
2138Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2139 // #include_next is like #include, except that we start searching after
2140 // the current found directory. If we can't do this, issue a
2141 // diagnostic.
2142 ConstSearchDirIterator Lookup = CurDirLookup;
2143 const FileEntry *LookupFromFile = nullptr;
2144
2145 if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2146 // If the main file is a header, then it's either for PCH/AST generation,
2147 // or libclang opened it. Either way, handle it as a normal include below
2148 // and do not complain about include_next.
2149 } else if (isInPrimaryFile()) {
2150 Lookup = nullptr;
2151 Diag(Tok: IncludeNextTok, DiagID: diag::pp_include_next_in_primary);
2152 } else if (CurLexerSubmodule) {
2153 // Start looking up in the directory *after* the one in which the current
2154 // file would be found, if any.
2155 assert(CurPPLexer && "#include_next directive in macro?");
2156 if (auto FE = CurPPLexer->getFileEntry())
2157 LookupFromFile = *FE;
2158 Lookup = nullptr;
2159 } else if (!Lookup) {
2160 // The current file was not found by walking the include path. Either it
2161 // is the primary file (handled above), or it was found by absolute path,
2162 // or it was found relative to such a file.
2163 // FIXME: Track enough information so we know which case we're in.
2164 Diag(Tok: IncludeNextTok, DiagID: diag::pp_include_next_absolute_path);
2165 } else {
2166 // Start looking up in the next directory.
2167 ++Lookup;
2168 }
2169
2170 return {Lookup, LookupFromFile};
2171}
2172
2173/// HandleIncludeDirective - The "\#include" tokens have just been read, read
2174/// the file to be included from the lexer, then include it! This is a common
2175/// routine with functionality shared between \#include, \#include_next and
2176/// \#import. LookupFrom is set when this is a \#include_next directive, it
2177/// specifies the file to start searching from.
2178void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2179 Token &IncludeTok,
2180 ConstSearchDirIterator LookupFrom,
2181 const FileEntry *LookupFromFile) {
2182 Token FilenameTok;
2183 if (LexHeaderName(Result&: FilenameTok))
2184 return;
2185
2186 if (FilenameTok.isNot(K: tok::header_name)) {
2187 if (FilenameTok.is(K: tok::identifier) &&
2188 (PPOpts.SingleFileParseMode || PPOpts.SingleModuleParseMode)) {
2189 // If we saw #include IDENTIFIER and lexing didn't turn in into a header
2190 // name, it was undefined. In 'single-{file,module}-parse' mode, just skip
2191 // the directive without emitting diagnostics - the identifier might be
2192 // normally defined in previously-skipped include directive.
2193 DiscardUntilEndOfDirective();
2194 return;
2195 }
2196
2197 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_pp_expects_filename);
2198 if (FilenameTok.isNot(K: tok::eod))
2199 DiscardUntilEndOfDirective();
2200 return;
2201 }
2202
2203 // Verify that there is nothing after the filename, other than EOD. Note
2204 // that we allow macros that expand to nothing after the filename, because
2205 // this falls into the category of "#include pp-tokens new-line" specified
2206 // in C99 6.10.2p4.
2207 SourceLocation EndLoc =
2208 CheckEndOfDirective(DirType: IncludeTok.getIdentifierInfo()->getNameStart(), EnableMacros: true);
2209
2210 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2211 EndLoc, LookupFrom, LookupFromFile);
2212 switch (Action.Kind) {
2213 case ImportAction::None:
2214 case ImportAction::SkippedModuleImport:
2215 break;
2216 case ImportAction::ModuleBegin:
2217 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2218 Kind: tok::annot_module_begin, AnnotationVal: Action.ModuleForHeader);
2219 break;
2220 case ImportAction::HeaderUnitImport:
2221 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc), Kind: tok::annot_header_unit,
2222 AnnotationVal: Action.ModuleForHeader);
2223 break;
2224 case ImportAction::ModuleImport:
2225 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2226 Kind: tok::annot_module_include, AnnotationVal: Action.ModuleForHeader);
2227 break;
2228 case ImportAction::Failure:
2229 assert(TheModuleLoader.HadFatalFailure &&
2230 "This should be an early exit only to a fatal error");
2231 TheModuleLoader.HadFatalFailure = true;
2232 IncludeTok.setKind(tok::eof);
2233 CurLexer->cutOffLexing();
2234 return;
2235 }
2236}
2237
2238OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2239 ConstSearchDirIterator *CurDir, StringRef &Filename,
2240 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2241 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2242 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2243 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2244 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2245 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2246 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2247 if (LangOpts.AsmPreprocessor)
2248 return;
2249
2250 Module *RequestingModule = getModuleForLocation(
2251 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
2252 bool RequestingModuleIsModuleInterface =
2253 !SourceMgr.isInMainFile(Loc: FilenameLoc);
2254
2255 HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2256 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2257 Filename, File: FE);
2258 };
2259
2260 OptionalFileEntryRef File = LookupFile(
2261 FilenameLoc, Filename: LookupFilename, isAngled, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2262 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2263 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped, IsFrameworkFound: &IsFrameworkFound);
2264 if (File) {
2265 DiagnoseHeaderInclusion(*File);
2266 return File;
2267 }
2268
2269 // Give the clients a chance to silently skip this include.
2270 if (Callbacks && Callbacks->FileNotFound(FileName: Filename))
2271 return std::nullopt;
2272
2273 if (SuppressIncludeNotFoundError)
2274 return std::nullopt;
2275
2276 // If the file could not be located and it was included via angle
2277 // brackets, we can attempt a lookup as though it were a quoted path to
2278 // provide the user with a possible fixit.
2279 if (isAngled) {
2280 OptionalFileEntryRef File = LookupFile(
2281 FilenameLoc, Filename: LookupFilename, isAngled: false, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2282 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2283 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2284 /*IsFrameworkFound=*/nullptr);
2285 if (File) {
2286 DiagnoseHeaderInclusion(*File);
2287 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found_angled_include_not_fatal)
2288 << Filename << IsImportDecl
2289 << FixItHint::CreateReplacement(RemoveRange: FilenameRange,
2290 Code: "\"" + Filename.str() + "\"");
2291 return File;
2292 }
2293 }
2294
2295 // Check for likely typos due to leading or trailing non-isAlphanumeric
2296 // characters
2297 StringRef OriginalFilename = Filename;
2298 if (LangOpts.SpellChecking) {
2299 // A heuristic to correct a typo file name by removing leading and
2300 // trailing non-isAlphanumeric characters.
2301 auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2302 Filename = Filename.drop_until(F: isAlphanumeric);
2303 while (!Filename.empty() && !isAlphanumeric(c: Filename.back())) {
2304 Filename = Filename.drop_back();
2305 }
2306 return Filename;
2307 };
2308 StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2309 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2310
2311 OptionalFileEntryRef File = LookupFile(
2312 FilenameLoc, Filename: TypoCorrectionLookupName, isAngled, FromDir: LookupFrom,
2313 FromFile: LookupFromFile, CurDirArg: CurDir, SearchPath: Callbacks ? &SearchPath : nullptr,
2314 RelativePath: Callbacks ? &RelativePath : nullptr, SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2315 /*IsFrameworkFound=*/nullptr);
2316 if (File) {
2317 DiagnoseHeaderInclusion(*File);
2318 auto Hint =
2319 isAngled ? FixItHint::CreateReplacement(
2320 RemoveRange: FilenameRange, Code: "<" + TypoCorrectionName.str() + ">")
2321 : FixItHint::CreateReplacement(
2322 RemoveRange: FilenameRange, Code: "\"" + TypoCorrectionName.str() + "\"");
2323 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found_typo_not_fatal)
2324 << OriginalFilename << TypoCorrectionName << Hint;
2325 // We found the file, so set the Filename to the name after typo
2326 // correction.
2327 Filename = TypoCorrectionName;
2328 LookupFilename = TypoCorrectionLookupName;
2329 return File;
2330 }
2331 }
2332
2333 // If the file is still not found, just go with the vanilla diagnostic
2334 assert(!File && "expected missing file");
2335 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found)
2336 << OriginalFilename << FilenameRange;
2337 if (IsFrameworkFound) {
2338 size_t SlashPos = OriginalFilename.find(C: '/');
2339 assert(SlashPos != StringRef::npos &&
2340 "Include with framework name should have '/' in the filename");
2341 StringRef FrameworkName = OriginalFilename.substr(Start: 0, N: SlashPos);
2342 FrameworkCacheEntry &CacheEntry =
2343 HeaderInfo.LookupFrameworkCache(FWName: FrameworkName);
2344 assert(CacheEntry.Directory && "Found framework should be in cache");
2345 Diag(Tok: FilenameTok, DiagID: diag::note_pp_framework_without_header)
2346 << OriginalFilename.substr(Start: SlashPos + 1) << FrameworkName
2347 << CacheEntry.Directory->getName();
2348 }
2349
2350 return std::nullopt;
2351}
2352
2353/// Handle either a #include-like directive or an import declaration that names
2354/// a header file.
2355///
2356/// \param HashLoc The location of the '#' token for an include, or
2357/// SourceLocation() for an import declaration.
2358/// \param IncludeTok The include / include_next / import token.
2359/// \param FilenameTok The header-name token.
2360/// \param EndLoc The location at which any imported macros become visible.
2361/// \param LookupFrom For #include_next, the starting directory for the
2362/// directory lookup.
2363/// \param LookupFromFile For #include_next, the starting file for the directory
2364/// lookup.
2365Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2366 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2367 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2368 const FileEntry *LookupFromFile) {
2369 SmallString<128> FilenameBuffer;
2370 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
2371 SourceLocation CharEnd = FilenameTok.getEndLoc();
2372
2373 CharSourceRange FilenameRange
2374 = CharSourceRange::getCharRange(B: FilenameTok.getLocation(), E: CharEnd);
2375 StringRef OriginalFilename = Filename;
2376 bool isAngled =
2377 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
2378
2379 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2380 // error.
2381 if (Filename.empty())
2382 return {ImportAction::None};
2383 if (Filename.ends_with(Suffix: ' ') || Filename.ends_with(Suffix: '.')) {
2384 unsigned Selection = Filename.ends_with(Suffix: '.') ? 1 : 0;
2385 Diag(Tok: FilenameTok, DiagID: diag::pp_nonportable_path_trailing)
2386 << Filename << Selection;
2387 }
2388
2389 bool IsImportDecl = HashLoc.isInvalid();
2390 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2391
2392 // Complain about attempts to #include files in an audit pragma.
2393 if (PragmaARCCFCodeAuditedInfo.getLoc().isValid()) {
2394 Diag(Loc: StartLoc, DiagID: diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2395 Diag(Loc: PragmaARCCFCodeAuditedInfo.getLoc(), DiagID: diag::note_pragma_entered_here);
2396
2397 // Immediately leave the pragma.
2398 PragmaARCCFCodeAuditedInfo = IdentifierLoc();
2399 }
2400
2401 // Complain about attempts to #include files in an assume-nonnull pragma.
2402 if (PragmaAssumeNonNullLoc.isValid()) {
2403 Diag(Loc: StartLoc, DiagID: diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2404 Diag(Loc: PragmaAssumeNonNullLoc, DiagID: diag::note_pragma_entered_here);
2405
2406 // Immediately leave the pragma.
2407 PragmaAssumeNonNullLoc = SourceLocation();
2408 }
2409
2410 if (HeaderInfo.HasIncludeAliasMap()) {
2411 // Map the filename with the brackets still attached. If the name doesn't
2412 // map to anything, fall back on the filename we've already gotten the
2413 // spelling for.
2414 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(Source: OriginalFilename);
2415 if (!NewName.empty())
2416 Filename = NewName;
2417 }
2418
2419 // Search include directories.
2420 bool IsMapped = false;
2421 bool IsFrameworkFound = false;
2422 ConstSearchDirIterator CurDir = nullptr;
2423 SmallString<1024> SearchPath;
2424 SmallString<1024> RelativePath;
2425 // We get the raw path only if we have 'Callbacks' to which we later pass
2426 // the path.
2427 ModuleMap::KnownHeader SuggestedModule;
2428 SourceLocation FilenameLoc = FilenameTok.getLocation();
2429 StringRef LookupFilename = Filename;
2430
2431 // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2432 // is unnecessary on Windows since the filesystem there handles backslashes.
2433 SmallString<128> NormalizedPath;
2434 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2435 if (is_style_posix(S: BackslashStyle) && LangOpts.MicrosoftExt) {
2436 NormalizedPath = Filename.str();
2437 llvm::sys::path::native(path&: NormalizedPath);
2438 LookupFilename = NormalizedPath;
2439 BackslashStyle = llvm::sys::path::Style::windows;
2440 }
2441
2442 OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2443 CurDir: &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2444 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2445 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2446
2447 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2448 if (File && isPCHThroughHeader(FE: &File->getFileEntry()))
2449 SkippingUntilPCHThroughHeader = false;
2450 return {ImportAction::None};
2451 }
2452
2453 // Should we enter the source file? Set to Skip if either the source file is
2454 // known to have no effect beyond its effect on module visibility -- that is,
2455 // if it's got an include guard that is already defined, set to Import if it
2456 // is a modular header we've already built and should import.
2457
2458 // For C++20 Modules
2459 // [cpp.include]/7 If the header identified by the header-name denotes an
2460 // importable header, it is implementation-defined whether the #include
2461 // preprocessing directive is instead replaced by an import directive.
2462 // For this implementation, the translation is permitted when we are parsing
2463 // the Global Module Fragment, and not otherwise (the cases where it would be
2464 // valid to replace an include with an import are highly constrained once in
2465 // named module purview; this choice avoids considerable complexity in
2466 // determining valid cases).
2467
2468 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2469
2470 if (PPOpts.SingleFileParseMode)
2471 Action = IncludeLimitReached;
2472
2473 // If we've reached the max allowed include depth, it is usually due to an
2474 // include cycle. Don't enter already processed files again as it can lead to
2475 // reaching the max allowed include depth again.
2476 if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2477 alreadyIncluded(File: *File))
2478 Action = IncludeLimitReached;
2479
2480 // FIXME: We do not have a good way to disambiguate C++ clang modules from
2481 // C++ standard modules (other than use/non-use of Header Units).
2482
2483 Module *ModuleToImport = SuggestedModule.getModule();
2484
2485 bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2486 !ModuleToImport->isForBuilding(LangOpts: getLangOpts());
2487
2488 // Maybe a usable Header Unit
2489 bool UsableHeaderUnit = false;
2490 if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2491 ModuleToImport->isHeaderUnit()) {
2492 if (TrackGMFState.inGMF() || IsImportDecl)
2493 UsableHeaderUnit = true;
2494 else if (!IsImportDecl) {
2495 // This is a Header Unit that we do not include-translate
2496 ModuleToImport = nullptr;
2497 }
2498 }
2499 // Maybe a usable clang header module.
2500 bool UsableClangHeaderModule =
2501 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2502 ModuleToImport && !ModuleToImport->isHeaderUnit();
2503
2504 // Determine whether we should try to import the module for this #include, if
2505 // there is one. Don't do so if precompiled module support is disabled or we
2506 // are processing this module textually (because we're building the module).
2507 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2508 // If this include corresponds to a module but that module is
2509 // unavailable, diagnose the situation and bail out.
2510 // FIXME: Remove this; loadModule does the same check (but produces
2511 // slightly worse diagnostics).
2512 if (checkModuleIsAvailable(LangOpts: getLangOpts(), TargetInfo: getTargetInfo(), M: *ModuleToImport,
2513 Diags&: getDiagnostics())) {
2514 Diag(Loc: FilenameTok.getLocation(),
2515 DiagID: diag::note_implicit_top_level_module_import_here)
2516 << ModuleToImport->getTopLevelModuleName();
2517 return {ImportAction::None};
2518 }
2519
2520 // Compute the module access path corresponding to this module.
2521 // FIXME: Should we have a second loadModule() overload to avoid this
2522 // extra lookup step?
2523 SmallVector<IdentifierLoc, 2> Path;
2524 for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2525 Path.emplace_back(Args: FilenameTok.getLocation(),
2526 Args: getIdentifierInfo(Name: Mod->Name));
2527 std::reverse(first: Path.begin(), last: Path.end());
2528
2529 // Warn that we're replacing the include/import with a module import.
2530 if (!IsImportDecl)
2531 diagnoseAutoModuleImport(PP&: *this, HashLoc: StartLoc, IncludeTok, Path, PathEnd: CharEnd);
2532
2533 // Load the module to import its macros. We'll make the declarations
2534 // visible when the parser gets here.
2535 // FIXME: Pass ModuleToImport in here rather than converting it to a path
2536 // and making the module loader convert it back again.
2537 ModuleLoadResult Imported = TheModuleLoader.loadModule(
2538 ImportLoc: IncludeTok.getLocation(), Path, Visibility: Module::Hidden,
2539 /*IsInclusionDirective=*/true);
2540 assert((Imported == nullptr || Imported == ModuleToImport) &&
2541 "the imported module is different than the suggested one");
2542
2543 if (Imported) {
2544 Action = Import;
2545 } else if (Imported.isMissingExpected()) {
2546 markClangModuleAsAffecting(
2547 M: static_cast<Module *>(Imported)->getTopLevelModule());
2548 // We failed to find a submodule that we assumed would exist (because it
2549 // was in the directory of an umbrella header, for instance), but no
2550 // actual module containing it exists (because the umbrella header is
2551 // incomplete). Treat this as a textual inclusion.
2552 ModuleToImport = nullptr;
2553 } else if (Imported.isConfigMismatch()) {
2554 // On a configuration mismatch, enter the header textually. We still know
2555 // that it's part of the corresponding module.
2556 } else {
2557 // We hit an error processing the import. Bail out.
2558 if (hadModuleLoaderFatalFailure()) {
2559 // With a fatal failure in the module loader, we abort parsing.
2560 Token &Result = IncludeTok;
2561 assert(CurLexer && "#include but no current lexer set!");
2562 Result.startToken();
2563 CurLexer->FormTokenWithChars(Result, TokEnd: CurLexer->BufferEnd, Kind: tok::eof);
2564 CurLexer->cutOffLexing();
2565 }
2566 return {ImportAction::None};
2567 }
2568 }
2569
2570 // The #included file will be considered to be a system header if either it is
2571 // in a system include directory, or if the #includer is a system include
2572 // header.
2573 SrcMgr::CharacteristicKind FileCharacter =
2574 SourceMgr.getFileCharacteristic(Loc: FilenameTok.getLocation());
2575 if (File)
2576 FileCharacter = std::max(a: HeaderInfo.getFileDirFlavor(File: *File), b: FileCharacter);
2577
2578 // If this is a '#import' or an import-declaration, don't re-enter the file.
2579 //
2580 // FIXME: If we have a suggested module for a '#include', and we've already
2581 // visited this file, don't bother entering it again. We know it has no
2582 // further effect.
2583 bool EnterOnce =
2584 IsImportDecl ||
2585 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2586
2587 bool IsFirstIncludeOfFile = false;
2588
2589 // Ask HeaderInfo if we should enter this #include file. If not, #including
2590 // this file will have no effect.
2591 if (Action == Enter && File &&
2592 !HeaderInfo.ShouldEnterIncludeFile(PP&: *this, File: *File, isImport: EnterOnce,
2593 ModulesEnabled: getLangOpts().Modules, M: ModuleToImport,
2594 IsFirstIncludeOfFile)) {
2595 // C++ standard modules:
2596 // If we are not in the GMF, then we textually include only
2597 // clang modules:
2598 // Even if we've already preprocessed this header once and know that we
2599 // don't need to see its contents again, we still need to import it if it's
2600 // modular because we might not have imported it from this submodule before.
2601 //
2602 // FIXME: We don't do this when compiling a PCH because the AST
2603 // serialization layer can't cope with it. This means we get local
2604 // submodule visibility semantics wrong in that case.
2605 if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2606 Action = TrackGMFState.inGMF() ? Import : Skip;
2607 else
2608 Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2609 }
2610
2611 // Check for circular inclusion of the main file.
2612 // We can't generate a consistent preamble with regard to the conditional
2613 // stack if the main file is included again as due to the preamble bounds
2614 // some directives (e.g. #endif of a header guard) will never be seen.
2615 // Since this will lead to confusing errors, avoid the inclusion.
2616 if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2617 SourceMgr.isMainFile(SourceFile: File->getFileEntry())) {
2618 Diag(Loc: FilenameTok.getLocation(),
2619 DiagID: diag::err_pp_including_mainfile_in_preamble);
2620 return {ImportAction::None};
2621 }
2622
2623 if (Callbacks && !IsImportDecl) {
2624 // Notify the callback object that we've seen an inclusion directive.
2625 // FIXME: Use a different callback for a pp-import?
2626 Callbacks->InclusionDirective(HashLoc, IncludeTok, FileName: LookupFilename, IsAngled: isAngled,
2627 FilenameRange, File, SearchPath, RelativePath,
2628 SuggestedModule: SuggestedModule.getModule(), ModuleImported: Action == Import,
2629 FileType: FileCharacter);
2630 if (Action == Skip && File)
2631 Callbacks->FileSkipped(SkippedFile: *File, FilenameTok, FileType: FileCharacter);
2632 }
2633
2634 if (!File)
2635 return {ImportAction::None};
2636
2637 // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2638 // module corresponding to the named header.
2639 if (IsImportDecl && !ModuleToImport) {
2640 Diag(Tok: FilenameTok, DiagID: diag::err_header_import_not_header_unit)
2641 << OriginalFilename << File->getName();
2642 return {ImportAction::None};
2643 }
2644
2645 // Issue a diagnostic if the name of the file on disk has a different case
2646 // than the one we're about to open.
2647 const bool CheckIncludePathPortability =
2648 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2649
2650 if (CheckIncludePathPortability) {
2651 StringRef Name = LookupFilename;
2652 StringRef NameWithoriginalSlashes = Filename;
2653#if defined(_WIN32)
2654 // Skip UNC prefix if present. (tryGetRealPathName() always
2655 // returns a path with the prefix skipped.)
2656 bool NameWasUNC = Name.consume_front("\\\\?\\");
2657 NameWithoriginalSlashes.consume_front("\\\\?\\");
2658#endif
2659 StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2660 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(path: Name),
2661 llvm::sys::path::end(path: Name));
2662#if defined(_WIN32)
2663 // -Wnonportable-include-path is designed to diagnose includes using
2664 // case even on systems with a case-insensitive file system.
2665 // On Windows, RealPathName always starts with an upper-case drive
2666 // letter for absolute paths, but Name might start with either
2667 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2668 // ("foo" will always have on-disk case, no matter which case was
2669 // used in the cd command). To not emit this warning solely for
2670 // the drive letter, whose case is dependent on if `cd` is used
2671 // with upper- or lower-case drive letters, always consider the
2672 // given drive letter case as correct for the purpose of this warning.
2673 SmallString<128> FixedDriveRealPath;
2674 if (llvm::sys::path::is_absolute(Name) &&
2675 llvm::sys::path::is_absolute(RealPathName) &&
2676 toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2677 isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2678 assert(Components.size() >= 3 && "should have drive, backslash, name");
2679 assert(Components[0].size() == 2 && "should start with drive");
2680 assert(Components[0][1] == ':' && "should have colon");
2681 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2682 RealPathName = FixedDriveRealPath;
2683 }
2684#endif
2685
2686 if (trySimplifyPath(Components, RealPathName, Separator: BackslashStyle)) {
2687 SmallString<128> Path;
2688 Path.reserve(N: Name.size()+2);
2689 Path.push_back(Elt: isAngled ? '<' : '"');
2690
2691 const auto IsSep = [BackslashStyle](char c) {
2692 return llvm::sys::path::is_separator(value: c, style: BackslashStyle);
2693 };
2694
2695 for (auto Component : Components) {
2696 // On POSIX, Components will contain a single '/' as first element
2697 // exactly if Name is an absolute path.
2698 // On Windows, it will contain "C:" followed by '\' for absolute paths.
2699 // The drive letter is optional for absolute paths on Windows, but
2700 // clang currently cannot process absolute paths in #include lines that
2701 // don't have a drive.
2702 // If the first entry in Components is a directory separator,
2703 // then the code at the bottom of this loop that keeps the original
2704 // directory separator style copies it. If the second entry is
2705 // a directory separator (the C:\ case), then that separator already
2706 // got copied when the C: was processed and we want to skip that entry.
2707 if (!(Component.size() == 1 && IsSep(Component[0])))
2708 Path.append(RHS: Component);
2709 else if (Path.size() != 1)
2710 continue;
2711
2712 // Append the separator(s) the user used, or the close quote
2713 if (Path.size() > NameWithoriginalSlashes.size()) {
2714 Path.push_back(Elt: isAngled ? '>' : '"');
2715 continue;
2716 }
2717 assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2718 do
2719 Path.push_back(Elt: NameWithoriginalSlashes[Path.size()-1]);
2720 while (Path.size() <= NameWithoriginalSlashes.size() &&
2721 IsSep(NameWithoriginalSlashes[Path.size()-1]));
2722 }
2723
2724#if defined(_WIN32)
2725 // Restore UNC prefix if it was there.
2726 if (NameWasUNC)
2727 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2728#endif
2729
2730 // For user files and known standard headers, issue a diagnostic.
2731 // For other system headers, don't. They can be controlled separately.
2732 auto DiagId =
2733 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Include: Name))
2734 ? diag::pp_nonportable_path
2735 : diag::pp_nonportable_system_path;
2736 Diag(Tok: FilenameTok, DiagID: DiagId) << Path <<
2737 FixItHint::CreateReplacement(RemoveRange: FilenameRange, Code: Path);
2738 }
2739
2740 bool SuppressBackslashDiag =
2741 // The diagnostic logic is expensive, so only run it if it's enabled...
2742 Diags->isIgnored(DiagID: diag::pp_nonportable_path_separator, Loc: FilenameLoc) ||
2743 // ...and try to only trigger on paths that appear in source.
2744 FilenameLoc.isMacroID() ||
2745 SourceMgr.isWrittenInBuiltinFile(Loc: FilenameLoc) ||
2746 SourceMgr.isWrittenInModuleIncludes(Loc: FilenameLoc);
2747 if (!SuppressBackslashDiag && OriginalFilename.contains(C: '\\')) {
2748 std::string SuggestedPath = OriginalFilename.str();
2749 llvm::replace(Range&: SuggestedPath, OldValue: '\\', NewValue: '/');
2750 Diag(Tok: FilenameTok, DiagID: diag::pp_nonportable_path_separator)
2751 << Name << FixItHint::CreateReplacement(RemoveRange: FilenameRange, Code: SuggestedPath);
2752 }
2753 }
2754
2755 switch (Action) {
2756 case Skip:
2757 // If we don't need to enter the file, stop now.
2758 if (ModuleToImport)
2759 return {ImportAction::SkippedModuleImport, ModuleToImport};
2760 return {ImportAction::None};
2761
2762 case IncludeLimitReached:
2763 // If we reached our include limit and don't want to enter any more files,
2764 // don't go any further.
2765 return {ImportAction::None};
2766
2767 case Import: {
2768 // If this is a module import, make it visible if needed.
2769 assert(ModuleToImport && "no module to import");
2770
2771 makeModuleVisible(M: ModuleToImport, Loc: EndLoc);
2772
2773 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2774 tok::pp___include_macros)
2775 return {ImportAction::None};
2776
2777 return {ImportAction::ModuleImport, ModuleToImport};
2778 }
2779
2780 case Enter:
2781 break;
2782 }
2783
2784 // Check that we don't have infinite #include recursion.
2785 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2786 Diag(Tok: FilenameTok, DiagID: diag::err_pp_include_too_deep);
2787 HasReachedMaxIncludeDepth = true;
2788 return {ImportAction::None};
2789 }
2790
2791 if (isAngled && isInNamedModule())
2792 Diag(Tok: FilenameTok, DiagID: diag::warn_pp_include_angled_in_module_purview)
2793 << getNamedModuleName();
2794
2795 // Look up the file, create a File ID for it.
2796 SourceLocation IncludePos = FilenameTok.getLocation();
2797 // If the filename string was the result of macro expansions, set the include
2798 // position on the file where it will be included and after the expansions.
2799 if (IncludePos.isMacroID())
2800 IncludePos = SourceMgr.getExpansionRange(Loc: IncludePos).getEnd();
2801 FileID FID = SourceMgr.createFileID(SourceFile: *File, IncludePos, FileCharacter);
2802 if (!FID.isValid()) {
2803 TheModuleLoader.HadFatalFailure = true;
2804 return ImportAction::Failure;
2805 }
2806
2807 // If all is good, enter the new file!
2808 if (EnterSourceFile(FID, Dir: CurDir, Loc: FilenameTok.getLocation(),
2809 IsFirstIncludeOfFile))
2810 return {ImportAction::None};
2811
2812 // Determine if we're switching to building a new submodule, and which one.
2813 // This does not apply for C++20 modules header units.
2814 if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2815 if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2816 // We are building a submodule that belongs to a shadowed module. This
2817 // means we find header files in the shadowed module.
2818 Diag(Loc: ModuleToImport->DefinitionLoc,
2819 DiagID: diag::err_module_build_shadowed_submodule)
2820 << ModuleToImport->getFullModuleName();
2821 Diag(Loc: ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2822 DiagID: diag::note_previous_definition);
2823 return {ImportAction::None};
2824 }
2825 // When building a pch, -fmodule-name tells the compiler to textually
2826 // include headers in the specified module. We are not building the
2827 // specified module.
2828 //
2829 // FIXME: This is the wrong way to handle this. We should produce a PCH
2830 // that behaves the same as the header would behave in a compilation using
2831 // that PCH, which means we should enter the submodule. We need to teach
2832 // the AST serialization layer to deal with the resulting AST.
2833 if (getLangOpts().CompilingPCH &&
2834 ModuleToImport->isForBuilding(LangOpts: getLangOpts()))
2835 return {ImportAction::None};
2836
2837 assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2838 CurLexerSubmodule = ModuleToImport;
2839
2840 // Let the macro handling code know that any future macros are within
2841 // the new submodule.
2842 EnterSubmodule(M: ModuleToImport, ImportLoc: EndLoc, /*ForPragma*/ false);
2843
2844 // Let the parser know that any future declarations are within the new
2845 // submodule.
2846 // FIXME: There's no point doing this if we're handling a #__include_macros
2847 // directive.
2848 return {ImportAction::ModuleBegin, ModuleToImport};
2849 }
2850
2851 assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2852 return {ImportAction::None};
2853}
2854
2855/// HandleIncludeNextDirective - Implements \#include_next.
2856///
2857void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2858 Token &IncludeNextTok) {
2859 Diag(Tok: IncludeNextTok, DiagID: diag::ext_pp_include_next_directive);
2860
2861 ConstSearchDirIterator Lookup = nullptr;
2862 const FileEntry *LookupFromFile;
2863 std::tie(args&: Lookup, args&: LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2864
2865 return HandleIncludeDirective(HashLoc, IncludeTok&: IncludeNextTok, LookupFrom: Lookup,
2866 LookupFromFile);
2867}
2868
2869/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2870void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2871 // The Microsoft #import directive takes a type library and generates header
2872 // files from it, and includes those. This is beyond the scope of what clang
2873 // does, so we ignore it and error out. However, #import can optionally have
2874 // trailing attributes that span multiple lines. We're going to eat those
2875 // so we can continue processing from there.
2876 Diag(Tok, DiagID: diag::err_pp_import_directive_ms );
2877
2878 // Read tokens until we get to the end of the directive. Note that the
2879 // directive can be split over multiple lines using the backslash character.
2880 DiscardUntilEndOfDirective();
2881}
2882
2883/// HandleImportDirective - Implements \#import.
2884///
2885void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2886 Token &ImportTok) {
2887 if (!LangOpts.ObjC) { // #import is standard for ObjC.
2888 if (LangOpts.MSVCCompat)
2889 return HandleMicrosoftImportDirective(Tok&: ImportTok);
2890 Diag(Tok: ImportTok, DiagID: diag::ext_pp_import_directive);
2891 }
2892 return HandleIncludeDirective(HashLoc, IncludeTok&: ImportTok);
2893}
2894
2895/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2896/// pseudo directive in the predefines buffer. This handles it by sucking all
2897/// tokens through the preprocessor and discarding them (only keeping the side
2898/// effects on the preprocessor).
2899void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2900 Token &IncludeMacrosTok) {
2901 // This directive should only occur in the predefines buffer. If not, emit an
2902 // error and reject it.
2903 SourceLocation Loc = IncludeMacrosTok.getLocation();
2904 if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2905 Diag(Loc: IncludeMacrosTok.getLocation(),
2906 DiagID: diag::pp_include_macros_out_of_predefines);
2907 DiscardUntilEndOfDirective();
2908 return;
2909 }
2910
2911 // Treat this as a normal #include for checking purposes. If this is
2912 // successful, it will push a new lexer onto the include stack.
2913 HandleIncludeDirective(HashLoc, IncludeTok&: IncludeMacrosTok);
2914
2915 Token TmpTok;
2916 do {
2917 Lex(Result&: TmpTok);
2918 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2919 } while (TmpTok.isNot(K: tok::hashhash));
2920}
2921
2922//===----------------------------------------------------------------------===//
2923// Preprocessor Macro Directive Handling.
2924//===----------------------------------------------------------------------===//
2925
2926/// ReadMacroParameterList - The ( starting a parameter list of a macro
2927/// definition has just been read. Lex the rest of the parameters and the
2928/// closing ), updating MI with what we learn. Return true if an error occurs
2929/// parsing the param list.
2930bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2931 SmallVector<IdentifierInfo*, 32> Parameters;
2932
2933 while (true) {
2934 LexUnexpandedNonComment(Result&: Tok);
2935 switch (Tok.getKind()) {
2936 case tok::r_paren:
2937 // Found the end of the parameter list.
2938 if (Parameters.empty()) // #define FOO()
2939 return false;
2940 // Otherwise we have #define FOO(A,)
2941 Diag(Tok, DiagID: diag::err_pp_expected_ident_in_arg_list);
2942 return true;
2943 case tok::ellipsis: // #define X(... -> C99 varargs
2944 if (!LangOpts.C99)
2945 Diag(Tok, DiagID: LangOpts.CPlusPlus11 ?
2946 diag::warn_cxx98_compat_variadic_macro :
2947 diag::ext_variadic_macro);
2948
2949 // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2950 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2951 Diag(Tok, DiagID: diag::ext_pp_opencl_variadic_macros);
2952 }
2953
2954 // Lex the token after the identifier.
2955 LexUnexpandedNonComment(Result&: Tok);
2956 if (Tok.isNot(K: tok::r_paren)) {
2957 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2958 return true;
2959 }
2960 // Add the __VA_ARGS__ identifier as a parameter.
2961 Parameters.push_back(Elt: Ident__VA_ARGS__);
2962 MI->setIsC99Varargs();
2963 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2964 return false;
2965 case tok::eod: // #define X(
2966 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2967 return true;
2968 default:
2969 // Handle keywords and identifiers here to accept things like
2970 // #define Foo(for) for.
2971 IdentifierInfo *II = Tok.getIdentifierInfo();
2972 if (!II) {
2973 // #define X(1
2974 Diag(Tok, DiagID: diag::err_pp_invalid_tok_in_arg_list);
2975 return true;
2976 }
2977
2978 // If this is already used as a parameter, it is used multiple times (e.g.
2979 // #define X(A,A.
2980 if (llvm::is_contained(Range&: Parameters, Element: II)) { // C99 6.10.3p6
2981 Diag(Tok, DiagID: diag::err_pp_duplicate_name_in_arg_list) << II;
2982 return true;
2983 }
2984
2985 // Add the parameter to the macro info.
2986 Parameters.push_back(Elt: II);
2987
2988 // Lex the token after the identifier.
2989 LexUnexpandedNonComment(Result&: Tok);
2990
2991 switch (Tok.getKind()) {
2992 default: // #define X(A B
2993 Diag(Tok, DiagID: diag::err_pp_expected_comma_in_arg_list);
2994 return true;
2995 case tok::r_paren: // #define X(A)
2996 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2997 return false;
2998 case tok::comma: // #define X(A,
2999 break;
3000 case tok::ellipsis: // #define X(A... -> GCC extension
3001 // Diagnose extension.
3002 Diag(Tok, DiagID: diag::ext_named_variadic_macro);
3003
3004 // Lex the token after the identifier.
3005 LexUnexpandedNonComment(Result&: Tok);
3006 if (Tok.isNot(K: tok::r_paren)) {
3007 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
3008 return true;
3009 }
3010
3011 MI->setIsGNUVarargs();
3012 MI->setParameterList(List: Parameters, PPAllocator&: BP);
3013 return false;
3014 }
3015 }
3016 }
3017}
3018
3019static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
3020 const LangOptions &LOptions) {
3021 if (MI->getNumTokens() == 1) {
3022 const Token &Value = MI->getReplacementToken(Tok: 0);
3023
3024 // Macro that is identity, like '#define inline inline' is a valid pattern.
3025 if (MacroName.getKind() == Value.getKind())
3026 return true;
3027
3028 // Macro that maps a keyword to the same keyword decorated with leading/
3029 // trailing underscores is a valid pattern:
3030 // #define inline __inline
3031 // #define inline __inline__
3032 // #define inline _inline (in MS compatibility mode)
3033 StringRef MacroText = MacroName.getIdentifierInfo()->getName();
3034 if (IdentifierInfo *II = Value.getIdentifierInfo()) {
3035 if (!II->isKeyword(LangOpts: LOptions))
3036 return false;
3037 StringRef ValueText = II->getName();
3038 StringRef TrimmedValue = ValueText;
3039 if (!ValueText.starts_with(Prefix: "__")) {
3040 if (ValueText.starts_with(Prefix: "_"))
3041 TrimmedValue = TrimmedValue.drop_front(N: 1);
3042 else
3043 return false;
3044 } else {
3045 TrimmedValue = TrimmedValue.drop_front(N: 2);
3046 if (TrimmedValue.ends_with(Suffix: "__"))
3047 TrimmedValue = TrimmedValue.drop_back(N: 2);
3048 }
3049 return TrimmedValue == MacroText;
3050 } else {
3051 return false;
3052 }
3053 }
3054
3055 // #define inline
3056 return MacroName.isOneOf(Ks: tok::kw_extern, Ks: tok::kw_inline, Ks: tok::kw_static,
3057 Ks: tok::kw_const) &&
3058 MI->getNumTokens() == 0;
3059}
3060
3061// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
3062// entire line) of the macro's tokens and adds them to MacroInfo, and while
3063// doing so performs certain validity checks including (but not limited to):
3064// - # (stringization) is followed by a macro parameter
3065//
3066// Returns a nullptr if an invalid sequence of tokens is encountered or returns
3067// a pointer to a MacroInfo object.
3068
3069MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
3070 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
3071
3072 Token LastTok = MacroNameTok;
3073 // Create the new macro.
3074 MacroInfo *const MI = AllocateMacroInfo(L: MacroNameTok.getLocation());
3075
3076 Token Tok;
3077 LexUnexpandedToken(Result&: Tok);
3078
3079 // Ensure we consume the rest of the macro body if errors occur.
3080 llvm::scope_exit _([&]() {
3081 // The flag indicates if we are still waiting for 'eod'.
3082 if (CurLexer->ParsingPreprocessorDirective)
3083 DiscardUntilEndOfDirective();
3084 });
3085
3086 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
3087 // within their appropriate context.
3088 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
3089
3090 // If this is a function-like macro definition, parse the argument list,
3091 // marking each of the identifiers as being used as macro arguments. Also,
3092 // check other constraints on the first token of the macro body.
3093 if (Tok.is(K: tok::eod)) {
3094 if (ImmediatelyAfterHeaderGuard) {
3095 // Save this macro information since it may part of a header guard.
3096 CurPPLexer->MIOpt.SetDefinedMacro(M: MacroNameTok.getIdentifierInfo(),
3097 Loc: MacroNameTok.getLocation());
3098 }
3099 // If there is no body to this macro, we have no special handling here.
3100 } else if (Tok.hasLeadingSpace()) {
3101 // This is a normal token with leading space. Clear the leading space
3102 // marker on the first token to get proper expansion.
3103 Tok.clearFlag(Flag: Token::LeadingSpace);
3104 } else if (Tok.is(K: tok::l_paren)) {
3105 // This is a function-like macro definition. Read the argument list.
3106 MI->setIsFunctionLike();
3107 if (ReadMacroParameterList(MI, Tok&: LastTok))
3108 return nullptr;
3109
3110 // If this is a definition of an ISO C/C++ variadic function-like macro (not
3111 // using the GNU named varargs extension) inform our variadic scope guard
3112 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
3113 // allowed only within the definition of a variadic macro.
3114
3115 if (MI->isC99Varargs()) {
3116 VariadicMacroScopeGuard.enterScope();
3117 }
3118
3119 // Read the first token after the arg list for down below.
3120 LexUnexpandedToken(Result&: Tok);
3121 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
3122 // C99 requires whitespace between the macro definition and the body. Emit
3123 // a diagnostic for something like "#define X+".
3124 Diag(Tok, DiagID: diag::ext_c99_whitespace_required_after_macro_name);
3125 } else {
3126 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
3127 // first character of a replacement list is not a character required by
3128 // subclause 5.2.1, then there shall be white-space separation between the
3129 // identifier and the replacement list.". 5.2.1 lists this set:
3130 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
3131 // is irrelevant here.
3132 bool isInvalid = false;
3133 if (Tok.is(K: tok::at)) // @ is not in the list above.
3134 isInvalid = true;
3135 else if (Tok.is(K: tok::unknown)) {
3136 // If we have an unknown token, it is something strange like "`". Since
3137 // all of valid characters would have lexed into a single character
3138 // token of some sort, we know this is not a valid case.
3139 isInvalid = true;
3140 }
3141 if (isInvalid)
3142 Diag(Tok, DiagID: diag::ext_missing_whitespace_after_macro_name);
3143 else
3144 Diag(Tok, DiagID: diag::warn_missing_whitespace_after_macro_name);
3145 }
3146
3147 if (!Tok.is(K: tok::eod))
3148 LastTok = Tok;
3149
3150 SmallVector<Token, 16> Tokens;
3151
3152 // Read the rest of the macro body.
3153 if (MI->isObjectLike()) {
3154 // Object-like macros are very simple, just read their body.
3155 while (Tok.isNot(K: tok::eod)) {
3156 LastTok = Tok;
3157 Tokens.push_back(Elt: Tok);
3158 // Get the next token of the macro.
3159 LexUnexpandedToken(Result&: Tok);
3160 }
3161 } else {
3162 // Otherwise, read the body of a function-like macro. While we are at it,
3163 // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3164 // parameters in function-like macro expansions.
3165
3166 VAOptDefinitionContext VAOCtx(*this);
3167
3168 while (Tok.isNot(K: tok::eod)) {
3169 LastTok = Tok;
3170
3171 if (!Tok.isOneOf(Ks: tok::hash, Ks: tok::hashat, Ks: tok::hashhash)) {
3172 Tokens.push_back(Elt: Tok);
3173
3174 if (VAOCtx.isVAOptToken(T: Tok)) {
3175 // If we're already within a VAOPT, emit an error.
3176 if (VAOCtx.isInVAOpt()) {
3177 Diag(Tok, DiagID: diag::err_pp_vaopt_nested_use);
3178 return nullptr;
3179 }
3180 // Ensure VAOPT is followed by a '(' .
3181 LexUnexpandedToken(Result&: Tok);
3182 if (Tok.isNot(K: tok::l_paren)) {
3183 Diag(Tok, DiagID: diag::err_pp_missing_lparen_in_vaopt_use);
3184 return nullptr;
3185 }
3186 Tokens.push_back(Elt: Tok);
3187 VAOCtx.sawVAOptFollowedByOpeningParens(LParenLoc: Tok.getLocation());
3188 LexUnexpandedToken(Result&: Tok);
3189 if (Tok.is(K: tok::hashhash)) {
3190 Diag(Tok, DiagID: diag::err_vaopt_paste_at_start);
3191 return nullptr;
3192 }
3193 continue;
3194 } else if (VAOCtx.isInVAOpt()) {
3195 if (Tok.is(K: tok::r_paren)) {
3196 if (VAOCtx.sawClosingParen()) {
3197 assert(Tokens.size() >= 3 &&
3198 "Must have seen at least __VA_OPT__( "
3199 "and a subsequent tok::r_paren");
3200 if (Tokens[Tokens.size() - 2].is(K: tok::hashhash)) {
3201 Diag(Tok, DiagID: diag::err_vaopt_paste_at_end);
3202 return nullptr;
3203 }
3204 }
3205 } else if (Tok.is(K: tok::l_paren)) {
3206 VAOCtx.sawOpeningParen(LParenLoc: Tok.getLocation());
3207 }
3208 }
3209 // Get the next token of the macro.
3210 LexUnexpandedToken(Result&: Tok);
3211 continue;
3212 }
3213
3214 // If we're in -traditional mode, then we should ignore stringification
3215 // and token pasting. Mark the tokens as unknown so as not to confuse
3216 // things.
3217 if (getLangOpts().TraditionalCPP) {
3218 Tok.setKind(tok::unknown);
3219 Tokens.push_back(Elt: Tok);
3220
3221 // Get the next token of the macro.
3222 LexUnexpandedToken(Result&: Tok);
3223 continue;
3224 }
3225
3226 if (Tok.is(K: tok::hashhash)) {
3227 // If we see token pasting, check if it looks like the gcc comma
3228 // pasting extension. We'll use this information to suppress
3229 // diagnostics later on.
3230
3231 // Get the next token of the macro.
3232 LexUnexpandedToken(Result&: Tok);
3233
3234 if (Tok.is(K: tok::eod)) {
3235 Tokens.push_back(Elt: LastTok);
3236 break;
3237 }
3238
3239 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3240 Tokens[Tokens.size() - 1].is(K: tok::comma))
3241 MI->setHasCommaPasting();
3242
3243 // Things look ok, add the '##' token to the macro.
3244 Tokens.push_back(Elt: LastTok);
3245 continue;
3246 }
3247
3248 // Our Token is a stringization operator.
3249 // Get the next token of the macro.
3250 LexUnexpandedToken(Result&: Tok);
3251
3252 // Check for a valid macro arg identifier or __VA_OPT__.
3253 if (!VAOCtx.isVAOptToken(T: Tok) &&
3254 (Tok.getIdentifierInfo() == nullptr ||
3255 MI->getParameterNum(Arg: Tok.getIdentifierInfo()) == -1)) {
3256
3257 // If this is assembler-with-cpp mode, we accept random gibberish after
3258 // the '#' because '#' is often a comment character. However, change
3259 // the kind of the token to tok::unknown so that the preprocessor isn't
3260 // confused.
3261 if (getLangOpts().AsmPreprocessor && Tok.isNot(K: tok::eod)) {
3262 LastTok.setKind(tok::unknown);
3263 Tokens.push_back(Elt: LastTok);
3264 continue;
3265 } else {
3266 Diag(Tok, DiagID: diag::err_pp_stringize_not_parameter)
3267 << LastTok.is(K: tok::hashat);
3268 return nullptr;
3269 }
3270 }
3271
3272 // Things look ok, add the '#' and param name tokens to the macro.
3273 Tokens.push_back(Elt: LastTok);
3274
3275 // If the token following '#' is VAOPT, let the next iteration handle it
3276 // and check it for correctness, otherwise add the token and prime the
3277 // loop with the next one.
3278 if (!VAOCtx.isVAOptToken(T: Tok)) {
3279 Tokens.push_back(Elt: Tok);
3280 LastTok = Tok;
3281
3282 // Get the next token of the macro.
3283 LexUnexpandedToken(Result&: Tok);
3284 }
3285 }
3286 if (VAOCtx.isInVAOpt()) {
3287 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3288 Diag(Tok, DiagID: diag::err_pp_expected_after)
3289 << LastTok.getKind() << tok::r_paren;
3290 Diag(Loc: VAOCtx.getUnmatchedOpeningParenLoc(), DiagID: diag::note_matching) << tok::l_paren;
3291 return nullptr;
3292 }
3293 }
3294 MI->setDefinitionEndLoc(LastTok.getLocation());
3295
3296 MI->setTokens(Tokens, PPAllocator&: BP);
3297 return MI;
3298}
3299
3300static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3301 return II->isStr(Str: "__strong") || II->isStr(Str: "__weak") ||
3302 II->isStr(Str: "__unsafe_unretained") || II->isStr(Str: "__autoreleasing");
3303}
3304
3305/// HandleDefineDirective - Implements \#define. This consumes the entire macro
3306/// line then lets the caller lex the next real token.
3307void Preprocessor::HandleDefineDirective(
3308 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3309 ++NumDefined;
3310
3311 Token MacroNameTok;
3312 bool MacroShadowsKeyword;
3313 ReadMacroName(MacroNameTok, isDefineUndef: MU_Define, ShadowFlag: &MacroShadowsKeyword);
3314
3315 // Error reading macro name? If so, diagnostic already issued.
3316 if (MacroNameTok.is(K: tok::eod))
3317 return;
3318
3319 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3320 // Issue a final pragma warning if we're defining a macro that was has been
3321 // undefined and is being redefined.
3322 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3323 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3324
3325 // If we are supposed to keep comments in #defines, reenable comment saving
3326 // mode.
3327 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3328
3329 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3330 MacroNameTok, ImmediatelyAfterHeaderGuard);
3331
3332 if (!MI) return;
3333
3334 if (MacroShadowsKeyword &&
3335 !isConfigurationPattern(MacroName&: MacroNameTok, MI, LOptions: getLangOpts())) {
3336 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_hides_keyword);
3337 }
3338 // Check that there is no paste (##) operator at the beginning or end of the
3339 // replacement list.
3340 unsigned NumTokens = MI->getNumTokens();
3341 if (NumTokens != 0) {
3342 if (MI->getReplacementToken(Tok: 0).is(K: tok::hashhash)) {
3343 Diag(Tok: MI->getReplacementToken(Tok: 0), DiagID: diag::err_paste_at_start);
3344 return;
3345 }
3346 if (MI->getReplacementToken(Tok: NumTokens-1).is(K: tok::hashhash)) {
3347 Diag(Tok: MI->getReplacementToken(Tok: NumTokens-1), DiagID: diag::err_paste_at_end);
3348 return;
3349 }
3350 }
3351
3352 // When skipping just warn about macros that do not match.
3353 if (SkippingUntilPCHThroughHeader) {
3354 const MacroInfo *OtherMI = getMacroInfo(II: MacroNameTok.getIdentifierInfo());
3355 if (!OtherMI || !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3356 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt))
3357 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::warn_pp_macro_def_mismatch_with_pch)
3358 << MacroNameTok.getIdentifierInfo();
3359 // Issue the diagnostic but allow the change if msvc extensions are enabled
3360 if (!LangOpts.MicrosoftExt)
3361 return;
3362 }
3363
3364 // Finally, if this identifier already had a macro defined for it, verify that
3365 // the macro bodies are identical, and issue diagnostics if they are not.
3366 if (const MacroInfo *OtherMI=getMacroInfo(II: MacroNameTok.getIdentifierInfo())) {
3367 // Final macros are hard-mode: they always warn. Even if the bodies are
3368 // identical. Even if they are in system headers. Even if they are things we
3369 // would silently allow in the past.
3370 if (MacroNameTok.getIdentifierInfo()->isFinal())
3371 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3372
3373 // In Objective-C, ignore attempts to directly redefine the builtin
3374 // definitions of the ownership qualifiers. It's still possible to
3375 // #undef them.
3376 if (getLangOpts().ObjC &&
3377 SourceMgr.getFileID(SpellingLoc: OtherMI->getDefinitionLoc()) ==
3378 getPredefinesFileID() &&
3379 isObjCProtectedMacro(II: MacroNameTok.getIdentifierInfo())) {
3380 // Warn if it changes the tokens.
3381 if ((!getDiagnostics().getSuppressSystemWarnings() ||
3382 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) &&
3383 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3384 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3385 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::warn_pp_objc_macro_redef_ignored);
3386 }
3387 assert(!OtherMI->isWarnIfUnused());
3388 return;
3389 }
3390
3391 // It is very common for system headers to have tons of macro redefinitions
3392 // and for warnings to be disabled in system headers. If this is the case,
3393 // then don't bother calling MacroInfo::isIdenticalTo.
3394 if (!getDiagnostics().getSuppressSystemWarnings() ||
3395 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) {
3396
3397 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3398 Diag(Loc: OtherMI->getDefinitionLoc(), DiagID: diag::pp_macro_not_used);
3399
3400 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3401 // C++ [cpp.predefined]p4, but allow it as an extension.
3402 if (isLanguageDefinedBuiltin(SourceMgr, MI: OtherMI, MacroName: II->getName()))
3403 Diag(Tok: MacroNameTok, DiagID: diag::ext_pp_redef_builtin_macro);
3404 // Macros must be identical. This means all tokens and whitespace
3405 // separation must be the same. C99 6.10.3p2.
3406 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3407 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this, /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3408 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::ext_pp_macro_redef)
3409 << MacroNameTok.getIdentifierInfo();
3410 Diag(Loc: OtherMI->getDefinitionLoc(), DiagID: diag::note_previous_definition);
3411 }
3412 }
3413 if (OtherMI->isWarnIfUnused())
3414 WarnUnusedMacroLocs.erase(V: OtherMI->getDefinitionLoc());
3415 }
3416
3417 DefMacroDirective *MD =
3418 appendDefMacroDirective(II: MacroNameTok.getIdentifierInfo(), MI);
3419
3420 assert(!MI->isUsed());
3421 // If we need warning for not using the macro, add its location in the
3422 // warn-because-unused-macro set. If it gets used it will be removed from set.
3423 if (getSourceManager().isInMainFile(Loc: MI->getDefinitionLoc()) &&
3424 !Diags->isIgnored(DiagID: diag::pp_macro_not_used, Loc: MI->getDefinitionLoc()) &&
3425 !MacroExpansionInDirectivesOverride &&
3426 getSourceManager().getFileID(SpellingLoc: MI->getDefinitionLoc()) !=
3427 getPredefinesFileID()) {
3428 MI->setIsWarnIfUnused(true);
3429 WarnUnusedMacroLocs.insert(V: MI->getDefinitionLoc());
3430 }
3431
3432 // If the callbacks want to know, tell them about the macro definition.
3433 if (Callbacks)
3434 Callbacks->MacroDefined(MacroNameTok, MD);
3435}
3436
3437/// HandleUndefDirective - Implements \#undef.
3438///
3439void Preprocessor::HandleUndefDirective() {
3440 ++NumUndefined;
3441
3442 Token MacroNameTok;
3443 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
3444
3445 // Error reading macro name? If so, diagnostic already issued.
3446 if (MacroNameTok.is(K: tok::eod))
3447 return;
3448
3449 // Check to see if this is the last token on the #undef line.
3450 CheckEndOfDirective(DirType: "undef");
3451
3452 // Okay, we have a valid identifier to undef.
3453 auto *II = MacroNameTok.getIdentifierInfo();
3454 auto MD = getMacroDefinition(II);
3455 UndefMacroDirective *Undef = nullptr;
3456
3457 if (II->isFinal())
3458 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/true);
3459
3460 // If the macro is not defined, this is a noop undef.
3461 if (const MacroInfo *MI = MD.getMacroInfo()) {
3462 if (!MI->isUsed() && MI->isWarnIfUnused())
3463 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::pp_macro_not_used);
3464
3465 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3466 // C++ [cpp.predefined]p4, but allow it as an extension.
3467 if (isLanguageDefinedBuiltin(SourceMgr, MI, MacroName: II->getName()))
3468 Diag(Tok: MacroNameTok, DiagID: diag::ext_pp_undef_builtin_macro);
3469
3470 if (MI->isWarnIfUnused())
3471 WarnUnusedMacroLocs.erase(V: MI->getDefinitionLoc());
3472
3473 Undef = AllocateUndefMacroDirective(UndefLoc: MacroNameTok.getLocation());
3474 }
3475
3476 // If the callbacks want to know, tell them about the macro #undef.
3477 // Note: no matter if the macro was defined or not.
3478 if (Callbacks)
3479 Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3480
3481 if (Undef)
3482 appendMacroDirective(II, MD: Undef);
3483}
3484
3485//===----------------------------------------------------------------------===//
3486// Preprocessor Conditional Directive Handling.
3487//===----------------------------------------------------------------------===//
3488
3489/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef
3490/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is
3491/// true if any tokens have been returned or pp-directives activated before this
3492/// \#ifndef has been lexed.
3493///
3494void Preprocessor::HandleIfdefDirective(Token &Result,
3495 const Token &HashToken,
3496 bool isIfndef,
3497 bool ReadAnyTokensBeforeDirective) {
3498 ++NumIf;
3499 Token DirectiveTok = Result;
3500
3501 Token MacroNameTok;
3502 ReadMacroName(MacroNameTok);
3503
3504 // Error reading macro name? If so, diagnostic already issued.
3505 if (MacroNameTok.is(K: tok::eod)) {
3506 // Skip code until we get to #endif. This helps with recovery by not
3507 // emitting an error when the #endif is reached.
3508 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3509 IfTokenLoc: DirectiveTok.getLocation(),
3510 /*Foundnonskip*/ FoundNonSkipPortion: false, /*FoundElse*/ false);
3511 return;
3512 }
3513
3514 emitMacroExpansionWarnings(Identifier: MacroNameTok, /*IsIfnDef=*/true);
3515
3516 // Check to see if this is the last token on the #if[n]def line.
3517 CheckEndOfDirective(DirType: isIfndef ? "ifndef" : "ifdef");
3518
3519 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3520 auto MD = getMacroDefinition(II: MII);
3521 MacroInfo *MI = MD.getMacroInfo();
3522
3523 if (CurPPLexer->getConditionalStackDepth() == 0) {
3524 // If the start of a top-level #ifdef and if the macro is not defined,
3525 // inform MIOpt that this might be the start of a proper include guard.
3526 // Otherwise it is some other form of unknown conditional which we can't
3527 // handle.
3528 if (!ReadAnyTokensBeforeDirective && !MI) {
3529 assert(isIfndef && "#ifdef shouldn't reach here");
3530 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: MII, Loc: MacroNameTok.getLocation());
3531 } else
3532 CurPPLexer->MIOpt.EnterTopLevelConditional();
3533 }
3534
3535 // If there is a macro, process it.
3536 if (MI) // Mark it used.
3537 markMacroAsUsed(MI);
3538
3539 if (Callbacks) {
3540 if (isIfndef)
3541 Callbacks->Ifndef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3542 else
3543 Callbacks->Ifdef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3544 }
3545
3546 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3547 getSourceManager().isInMainFile(Loc: DirectiveTok.getLocation());
3548
3549 // Should we include the stuff contained by this directive?
3550 if (PPOpts.SingleFileParseMode && !MI) {
3551 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3552 // the directive blocks.
3553 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3554 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: false,
3555 /*foundelse*/FoundElse: false);
3556 } else if (PPOpts.SingleModuleParseMode && !MI) {
3557 // In 'single-module-parse mode' undefined identifiers trigger skipping of
3558 // all the directive blocks. We lie here and set FoundNonSkipPortion so that
3559 // even any \#else blocks get skipped.
3560 SkipExcludedConditionalBlock(
3561 HashTokenLoc: HashToken.getLocation(), IfTokenLoc: DirectiveTok.getLocation(),
3562 /*FoundNonSkipPortion=*/true, /*FoundElse=*/false);
3563 } else if (!MI == isIfndef || RetainExcludedCB) {
3564 // Yes, remember that we are inside a conditional, then lex the next token.
3565 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3566 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: true,
3567 /*foundelse*/FoundElse: false);
3568 } else {
3569 // No, skip the contents of this block.
3570 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3571 IfTokenLoc: DirectiveTok.getLocation(),
3572 /*Foundnonskip*/ FoundNonSkipPortion: false,
3573 /*FoundElse*/ false);
3574 }
3575}
3576
3577/// HandleIfDirective - Implements the \#if directive.
3578///
3579void Preprocessor::HandleIfDirective(Token &IfToken,
3580 const Token &HashToken,
3581 bool ReadAnyTokensBeforeDirective) {
3582 ++NumIf;
3583
3584 // Parse and evaluate the conditional expression.
3585 IdentifierInfo *IfNDefMacro = nullptr;
3586 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3587 const bool ConditionalTrue = DER.Conditional;
3588 // Lexer might become invalid if we hit code completion point while evaluating
3589 // expression.
3590 if (!CurPPLexer)
3591 return;
3592
3593 // If this condition is equivalent to #ifndef X, and if this is the first
3594 // directive seen, handle it for the multiple-include optimization.
3595 if (CurPPLexer->getConditionalStackDepth() == 0) {
3596 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3597 // FIXME: Pass in the location of the macro name, not the 'if' token.
3598 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: IfNDefMacro, Loc: IfToken.getLocation());
3599 else
3600 CurPPLexer->MIOpt.EnterTopLevelConditional();
3601 }
3602
3603 if (Callbacks)
3604 Callbacks->If(
3605 Loc: IfToken.getLocation(), ConditionRange: DER.ExprRange,
3606 ConditionValue: (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3607
3608 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3609 getSourceManager().isInMainFile(Loc: IfToken.getLocation());
3610
3611 // Should we include the stuff contained by this directive?
3612 if (PPOpts.SingleFileParseMode && DER.IncludedUndefinedIds) {
3613 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3614 // the directive blocks.
3615 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3616 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3617 } else if (PPOpts.SingleModuleParseMode && DER.IncludedUndefinedIds) {
3618 // In 'single-module-parse mode' undefined identifiers trigger skipping of
3619 // all the directive blocks. We lie here and set FoundNonSkipPortion so that
3620 // even any \#else blocks get skipped.
3621 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: IfToken.getLocation(),
3622 /*FoundNonSkipPortion=*/true,
3623 /*FoundElse=*/false);
3624 } else if (ConditionalTrue || RetainExcludedCB) {
3625 // Yes, remember that we are inside a conditional, then lex the next token.
3626 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3627 /*foundnonskip*/FoundNonSkip: true, /*foundelse*/FoundElse: false);
3628 } else {
3629 // No, skip the contents of this block.
3630 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: IfToken.getLocation(),
3631 /*Foundnonskip*/ FoundNonSkipPortion: false,
3632 /*FoundElse*/ false);
3633 }
3634}
3635
3636/// HandleEndifDirective - Implements the \#endif directive.
3637///
3638void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3639 ++NumEndif;
3640
3641 // Check that this is the whole directive.
3642 CheckEndOfDirective(DirType: "endif");
3643
3644 PPConditionalInfo CondInfo;
3645 if (CurPPLexer->popConditionalLevel(CI&: CondInfo)) {
3646 // No conditionals on the stack: this is an #endif without an #if.
3647 Diag(Tok: EndifToken, DiagID: diag::err_pp_endif_without_if);
3648 return;
3649 }
3650
3651 // If this the end of a top-level #endif, inform MIOpt.
3652 if (CurPPLexer->getConditionalStackDepth() == 0)
3653 CurPPLexer->MIOpt.ExitTopLevelConditional();
3654
3655 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3656 "This code should only be reachable in the non-skipping case!");
3657
3658 if (Callbacks)
3659 Callbacks->Endif(Loc: EndifToken.getLocation(), IfLoc: CondInfo.IfLoc);
3660}
3661
3662/// HandleElseDirective - Implements the \#else directive.
3663///
3664void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3665 ++NumElse;
3666
3667 // #else directive in a non-skipping conditional... start skipping.
3668 CheckEndOfDirective(DirType: "else");
3669
3670 PPConditionalInfo CI;
3671 if (CurPPLexer->popConditionalLevel(CI)) {
3672 Diag(Tok: Result, DiagID: diag::pp_err_else_without_if);
3673 return;
3674 }
3675
3676 // If this is a top-level #else, inform the MIOpt.
3677 if (CurPPLexer->getConditionalStackDepth() == 0)
3678 CurPPLexer->MIOpt.EnterTopLevelConditional();
3679
3680 // If this is a #else with a #else before it, report the error.
3681 if (CI.FoundElse) Diag(Tok: Result, DiagID: diag::pp_err_else_after_else);
3682
3683 if (Callbacks)
3684 Callbacks->Else(Loc: Result.getLocation(), IfLoc: CI.IfLoc);
3685
3686 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3687 getSourceManager().isInMainFile(Loc: Result.getLocation());
3688
3689 if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3690 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3691 // the directive blocks.
3692 CurPPLexer->pushConditionalLevel(DirectiveStart: CI.IfLoc, /*wasskip*/WasSkipping: false,
3693 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: true);
3694 return;
3695 }
3696
3697 // Finally, skip the rest of the contents of this block.
3698 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc,
3699 /*Foundnonskip*/ FoundNonSkipPortion: true,
3700 /*FoundElse*/ true, ElseLoc: Result.getLocation());
3701}
3702
3703/// Implements the \#elif, \#elifdef, and \#elifndef directives.
3704void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3705 const Token &HashToken,
3706 tok::PPKeywordKind Kind) {
3707 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif
3708 : Kind == tok::pp_elifdef ? PED_Elifdef
3709 : PED_Elifndef;
3710 ++NumElse;
3711
3712 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3713 switch (DirKind) {
3714 case PED_Elifdef:
3715 case PED_Elifndef:
3716 unsigned DiagID;
3717 if (LangOpts.CPlusPlus)
3718 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3719 : diag::ext_cxx23_pp_directive;
3720 else
3721 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3722 : diag::ext_c23_pp_directive;
3723 Diag(Tok: ElifToken, DiagID) << DirKind;
3724 break;
3725 default:
3726 break;
3727 }
3728
3729 // #elif directive in a non-skipping conditional... start skipping.
3730 // We don't care what the condition is, because we will always skip it (since
3731 // the block immediately before it was included).
3732 SourceRange ConditionRange = DiscardUntilEndOfDirective();
3733
3734 PPConditionalInfo CI;
3735 if (CurPPLexer->popConditionalLevel(CI)) {
3736 Diag(Tok: ElifToken, DiagID: diag::pp_err_elif_without_if) << DirKind;
3737 return;
3738 }
3739
3740 // If this is a top-level #elif, inform the MIOpt.
3741 if (CurPPLexer->getConditionalStackDepth() == 0)
3742 CurPPLexer->MIOpt.EnterTopLevelConditional();
3743
3744 // If this is a #elif with a #else before it, report the error.
3745 if (CI.FoundElse)
3746 Diag(Tok: ElifToken, DiagID: diag::pp_err_elif_after_else) << DirKind;
3747
3748 if (Callbacks) {
3749 switch (Kind) {
3750 case tok::pp_elif:
3751 Callbacks->Elif(Loc: ElifToken.getLocation(), ConditionRange,
3752 ConditionValue: PPCallbacks::CVK_NotEvaluated, IfLoc: CI.IfLoc);
3753 break;
3754 case tok::pp_elifdef:
3755 Callbacks->Elifdef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3756 break;
3757 case tok::pp_elifndef:
3758 Callbacks->Elifndef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3759 break;
3760 default:
3761 assert(false && "unexpected directive kind");
3762 break;
3763 }
3764 }
3765
3766 bool RetainExcludedCB = PPOpts.RetainExcludedConditionalBlocks &&
3767 getSourceManager().isInMainFile(Loc: ElifToken.getLocation());
3768
3769 if ((PPOpts.SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3770 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3771 // the directive blocks.
3772 CurPPLexer->pushConditionalLevel(DirectiveStart: ElifToken.getLocation(), /*wasskip*/WasSkipping: false,
3773 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3774 return;
3775 }
3776
3777 // Finally, skip the rest of the contents of this block.
3778 SkipExcludedConditionalBlock(
3779 HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc, /*Foundnonskip*/ FoundNonSkipPortion: true,
3780 /*FoundElse*/ CI.FoundElse, ElseLoc: ElifToken.getLocation());
3781}
3782
3783std::optional<LexEmbedParametersResult>
3784Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3785 LexEmbedParametersResult Result{};
3786 tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3787
3788 auto DiagMismatchedBracesAndSkipToEOD =
3789 [&](tok::TokenKind Expected,
3790 std::pair<tok::TokenKind, SourceLocation> Matches) {
3791 Diag(Tok: CurTok, DiagID: diag::err_expected) << Expected;
3792 Diag(Loc: Matches.second, DiagID: diag::note_matching) << Matches.first;
3793 if (CurTok.isNot(K: tok::eod))
3794 DiscardUntilEndOfDirective(Tmp&: CurTok);
3795 };
3796
3797 auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3798 if (CurTok.isNot(K: Kind)) {
3799 Diag(Tok: CurTok, DiagID: diag::err_expected) << Kind;
3800 if (CurTok.isNot(K: tok::eod))
3801 DiscardUntilEndOfDirective(Tmp&: CurTok);
3802 return false;
3803 }
3804 return true;
3805 };
3806
3807 // C23 6.10:
3808 // pp-parameter-name:
3809 // pp-standard-parameter
3810 // pp-prefixed-parameter
3811 //
3812 // pp-standard-parameter:
3813 // identifier
3814 //
3815 // pp-prefixed-parameter:
3816 // identifier :: identifier
3817 auto LexPPParameterName = [&]() -> std::optional<std::string> {
3818 // We expect the current token to be an identifier; if it's not, things
3819 // have gone wrong.
3820 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3821 return std::nullopt;
3822
3823 const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3824
3825 // Lex another token; it is either a :: or we're done with the parameter
3826 // name.
3827 LexNonComment(Result&: CurTok);
3828 if (CurTok.is(K: tok::coloncolon)) {
3829 // We found a ::, so lex another identifier token.
3830 LexNonComment(Result&: CurTok);
3831 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3832 return std::nullopt;
3833
3834 const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3835
3836 // Lex another token so we're past the name.
3837 LexNonComment(Result&: CurTok);
3838 return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3839 }
3840 return Prefix->getName().str();
3841 };
3842
3843 // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3844 // this document as an identifier pp_param and an identifier of the form
3845 // __pp_param__ shall behave the same when used as a preprocessor parameter,
3846 // except for the spelling.
3847 auto NormalizeParameterName = [](StringRef Name) {
3848 if (Name.size() > 4 && Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "__"))
3849 return Name.substr(Start: 2, N: Name.size() - 4);
3850 return Name;
3851 };
3852
3853 auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3854 // we have a limit parameter and its internals are processed using
3855 // evaluation rules from #if.
3856 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3857 return std::nullopt;
3858
3859 // We do not consume the ( because EvaluateDirectiveExpression will lex
3860 // the next token for us.
3861 IdentifierInfo *ParameterIfNDef = nullptr;
3862 bool EvaluatedDefined;
3863 DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3864 IfNDefMacro&: ParameterIfNDef, Tok&: CurTok, EvaluatedDefined, /*CheckForEOD=*/CheckForEoD: false);
3865
3866 if (!LimitEvalResult.Value) {
3867 // If there was an error evaluating the directive expression, we expect
3868 // to be at the end of directive token.
3869 assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3870 return std::nullopt;
3871 }
3872
3873 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3874 return std::nullopt;
3875
3876 // Eat the ).
3877 LexNonComment(Result&: CurTok);
3878
3879 // C23 6.10.3.2p2: The token defined shall not appear within the constant
3880 // expression.
3881 if (EvaluatedDefined) {
3882 Diag(Tok: CurTok, DiagID: diag::err_defined_in_pp_embed);
3883 return std::nullopt;
3884 }
3885
3886 if (LimitEvalResult.Value) {
3887 const llvm::APSInt &Result = *LimitEvalResult.Value;
3888 if (Result.isNegative()) {
3889 Diag(Tok: CurTok, DiagID: diag::err_requires_positive_value)
3890 << toString(I: Result, Radix: 10) << /*positive*/ 0;
3891 if (CurTok.isNot(K: EndTokenKind))
3892 DiscardUntilEndOfDirective(Tmp&: CurTok);
3893 return std::nullopt;
3894 }
3895 return Result.getLimitedValue();
3896 }
3897 return std::nullopt;
3898 };
3899
3900 auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3901 switch (Kind) {
3902 case tok::l_paren:
3903 return tok::r_paren;
3904 case tok::l_brace:
3905 return tok::r_brace;
3906 case tok::l_square:
3907 return tok::r_square;
3908 default:
3909 llvm_unreachable("should not get here");
3910 }
3911 };
3912
3913 auto LexParenthesizedBalancedTokenSoup =
3914 [&](llvm::SmallVectorImpl<Token> &Tokens) {
3915 std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3916
3917 // We expect the current token to be a left paren.
3918 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3919 return false;
3920 LexNonComment(Result&: CurTok); // Eat the (
3921
3922 bool WaitingForInnerCloseParen = false;
3923 while (CurTok.isNot(K: tok::eod) &&
3924 (WaitingForInnerCloseParen || CurTok.isNot(K: tok::r_paren))) {
3925 switch (CurTok.getKind()) {
3926 default: // Shutting up diagnostics about not fully-covered switch.
3927 break;
3928 case tok::l_paren:
3929 WaitingForInnerCloseParen = true;
3930 [[fallthrough]];
3931 case tok::l_brace:
3932 case tok::l_square:
3933 BracketStack.push_back(x: {CurTok.getKind(), CurTok.getLocation()});
3934 break;
3935 case tok::r_paren:
3936 WaitingForInnerCloseParen = false;
3937 [[fallthrough]];
3938 case tok::r_brace:
3939 case tok::r_square: {
3940 if (BracketStack.empty()) {
3941 ExpectOrDiagAndSkipToEOD(tok::r_paren);
3942 return false;
3943 }
3944 tok::TokenKind Matching =
3945 GetMatchingCloseBracket(BracketStack.back().first);
3946 if (CurTok.getKind() != Matching) {
3947 DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3948 return false;
3949 }
3950 BracketStack.pop_back();
3951 } break;
3952 }
3953 Tokens.push_back(Elt: CurTok);
3954 LexNonComment(Result&: CurTok);
3955 }
3956
3957 // When we're done, we want to eat the closing paren.
3958 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3959 return false;
3960
3961 LexNonComment(Result&: CurTok); // Eat the )
3962 return true;
3963 };
3964
3965 LexNonComment(Result&: CurTok); // Prime the pump.
3966 while (!CurTok.isOneOf(Ks: EndTokenKind, Ks: tok::eod)) {
3967 SourceLocation ParamStartLoc = CurTok.getLocation();
3968 std::optional<std::string> ParamName = LexPPParameterName();
3969 if (!ParamName)
3970 return std::nullopt;
3971 StringRef Parameter = NormalizeParameterName(*ParamName);
3972
3973 // Lex the parameters (dependent on the parameter type we want!).
3974 //
3975 // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3976 // one time in the embed parameter sequence.
3977 if (Parameter == "limit") {
3978 if (Result.MaybeLimitParam)
3979 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3980
3981 std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3982 if (!Limit)
3983 return std::nullopt;
3984 Result.MaybeLimitParam =
3985 PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3986 } else if (Parameter == "clang::offset") {
3987 if (Result.MaybeOffsetParam)
3988 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3989
3990 std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3991 if (!Offset)
3992 return std::nullopt;
3993 Result.MaybeOffsetParam = PPEmbedParameterOffset{
3994 *Offset, {ParamStartLoc, CurTok.getLocation()}};
3995 } else if (Parameter == "prefix") {
3996 if (Result.MaybePrefixParam)
3997 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3998
3999 SmallVector<Token, 4> Soup;
4000 if (!LexParenthesizedBalancedTokenSoup(Soup))
4001 return std::nullopt;
4002 Result.MaybePrefixParam = PPEmbedParameterPrefix{
4003 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
4004 } else if (Parameter == "suffix") {
4005 if (Result.MaybeSuffixParam)
4006 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
4007
4008 SmallVector<Token, 4> Soup;
4009 if (!LexParenthesizedBalancedTokenSoup(Soup))
4010 return std::nullopt;
4011 Result.MaybeSuffixParam = PPEmbedParameterSuffix{
4012 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
4013 } else if (Parameter == "if_empty") {
4014 if (Result.MaybeIfEmptyParam)
4015 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
4016
4017 SmallVector<Token, 4> Soup;
4018 if (!LexParenthesizedBalancedTokenSoup(Soup))
4019 return std::nullopt;
4020 Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
4021 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
4022 } else {
4023 ++Result.UnrecognizedParams;
4024
4025 // If there's a left paren, we need to parse a balanced token sequence
4026 // and just eat those tokens.
4027 if (CurTok.is(K: tok::l_paren)) {
4028 SmallVector<Token, 4> Soup;
4029 if (!LexParenthesizedBalancedTokenSoup(Soup))
4030 return std::nullopt;
4031 }
4032 if (!ForHasEmbed) {
4033 Diag(Loc: ParamStartLoc, DiagID: diag::err_pp_unknown_parameter) << 1 << Parameter;
4034 if (CurTok.isNot(K: EndTokenKind))
4035 DiscardUntilEndOfDirective(Tmp&: CurTok);
4036 return std::nullopt;
4037 }
4038 }
4039 }
4040 return Result;
4041}
4042
4043void Preprocessor::HandleEmbedDirectiveImpl(
4044 SourceLocation HashLoc, const LexEmbedParametersResult &Params,
4045 StringRef BinaryContents, StringRef FileName) {
4046 if (BinaryContents.empty()) {
4047 // If we have no binary contents, the only thing we need to emit are the
4048 // if_empty tokens, if any.
4049 // FIXME: this loses AST fidelity; nothing in the compiler will see that
4050 // these tokens came from #embed. We have to hack around this when printing
4051 // preprocessed output. The same is true for prefix and suffix tokens.
4052 if (Params.MaybeIfEmptyParam) {
4053 ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
4054 size_t TokCount = Toks.size();
4055 auto NewToks = std::make_unique<Token[]>(num: TokCount);
4056 llvm::copy(Range&: Toks, Out: NewToks.get());
4057 EnterTokenStream(Toks: std::move(NewToks), NumToks: TokCount, DisableMacroExpansion: true, IsReinject: true);
4058 }
4059 return;
4060 }
4061
4062 size_t NumPrefixToks = Params.PrefixTokenCount(),
4063 NumSuffixToks = Params.SuffixTokenCount();
4064 size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
4065 size_t CurIdx = 0;
4066 auto Toks = std::make_unique<Token[]>(num: TotalNumToks);
4067
4068 // Add the prefix tokens, if any.
4069 if (Params.MaybePrefixParam) {
4070 llvm::copy(Range: Params.MaybePrefixParam->Tokens, Out: &Toks[CurIdx]);
4071 CurIdx += NumPrefixToks;
4072 }
4073
4074 EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
4075 Data->BinaryData = BinaryContents;
4076 Data->FileName = FileName;
4077
4078 Toks[CurIdx].startToken();
4079 Toks[CurIdx].setKind(tok::annot_embed);
4080 Toks[CurIdx].setAnnotationRange(HashLoc);
4081 Toks[CurIdx++].setAnnotationValue(Data);
4082
4083 // Now add the suffix tokens, if any.
4084 if (Params.MaybeSuffixParam) {
4085 llvm::copy(Range: Params.MaybeSuffixParam->Tokens, Out: &Toks[CurIdx]);
4086 CurIdx += NumSuffixToks;
4087 }
4088
4089 assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
4090 EnterTokenStream(Toks: std::move(Toks), NumToks: TotalNumToks, DisableMacroExpansion: true, IsReinject: true);
4091}
4092
4093void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc,
4094 Token &EmbedTok) {
4095 // Give the usual extension/compatibility warnings.
4096 if (LangOpts.C23)
4097 Diag(Tok: EmbedTok, DiagID: diag::warn_compat_pp_embed_directive);
4098 else
4099 Diag(Tok: EmbedTok, DiagID: diag::ext_pp_embed_directive)
4100 << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
4101
4102 // Parse the filename header
4103 Token FilenameTok;
4104 if (LexHeaderName(Result&: FilenameTok))
4105 return;
4106
4107 if (FilenameTok.isNot(K: tok::header_name)) {
4108 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_pp_expects_filename);
4109 if (FilenameTok.isNot(K: tok::eod))
4110 DiscardUntilEndOfDirective();
4111 return;
4112 }
4113
4114 // Parse the optional sequence of
4115 // directive-parameters:
4116 // identifier parameter-name-list[opt] directive-argument-list[opt]
4117 // directive-argument-list:
4118 // '(' balanced-token-sequence ')'
4119 // parameter-name-list:
4120 // '::' identifier parameter-name-list[opt]
4121 Token CurTok;
4122 std::optional<LexEmbedParametersResult> Params =
4123 LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
4124
4125 assert((Params || CurTok.is(tok::eod)) &&
4126 "expected success or to be at the end of the directive");
4127 if (!Params)
4128 return;
4129
4130 // Now, splat the data out!
4131 SmallString<128> FilenameBuffer;
4132 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
4133 StringRef OriginalFilename = Filename;
4134 bool isAngled =
4135 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
4136
4137 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
4138 // error.
4139 if (Filename.empty())
4140 return;
4141
4142 OptionalFileEntryRef MaybeFileRef =
4143 this->LookupEmbedFile(Filename, isAngled, /*OpenFile=*/true);
4144 if (!MaybeFileRef) {
4145 // could not find file
4146 if (Callbacks && Callbacks->EmbedFileNotFound(FileName: Filename)) {
4147 return;
4148 }
4149 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found) << Filename;
4150 return;
4151 }
4152
4153 if (MaybeFileRef->isDeviceFile()) {
4154 Diag(Tok: FilenameTok, DiagID: diag::err_pp_embed_device_file) << Filename;
4155 return;
4156 }
4157
4158 std::optional<llvm::MemoryBufferRef> MaybeFile =
4159 getSourceManager().getMemoryBufferForFileOrNone(File: *MaybeFileRef);
4160 if (!MaybeFile) {
4161 // could not find file
4162 Diag(Tok: FilenameTok, DiagID: diag::err_cannot_open_file)
4163 << Filename << "a buffer to the contents could not be created";
4164 return;
4165 }
4166 StringRef BinaryContents = MaybeFile->getBuffer();
4167
4168 // The order is important between 'offset' and 'limit'; we want to offset
4169 // first and then limit second; otherwise we may reduce the notional resource
4170 // size to something too small to offset into.
4171 if (Params->MaybeOffsetParam) {
4172 // FIXME: just like with the limit() and if_empty() parameters, this loses
4173 // source fidelity in the AST; it has no idea that there was an offset
4174 // involved.
4175 // offsets all the way to the end of the file make for an empty file.
4176 BinaryContents = BinaryContents.substr(Start: Params->MaybeOffsetParam->Offset);
4177 }
4178
4179 if (Params->MaybeLimitParam) {
4180 // FIXME: just like with the clang::offset() and if_empty() parameters,
4181 // this loses source fidelity in the AST; it has no idea there was a limit
4182 // involved.
4183 BinaryContents = BinaryContents.substr(Start: 0, N: Params->MaybeLimitParam->Limit);
4184 }
4185
4186 if (Callbacks)
4187 Callbacks->EmbedDirective(HashLoc, FileName: Filename, IsAngled: isAngled, File: MaybeFileRef,
4188 Params: *Params);
4189 // getSpelling() may return a buffer from the token itself or it may use the
4190 // SmallString buffer we provided. getSpelling() may also return a string that
4191 // is actually longer than FilenameTok.getLength(), so we first pass a
4192 // locally created buffer to getSpelling() to get the string of real length
4193 // and then we allocate a long living buffer because the buffer we used
4194 // previously will only live till the end of this function and we need
4195 // filename info to live longer.
4196 void *Mem = BP.Allocate(Size: OriginalFilename.size(), Alignment: alignof(char *));
4197 memcpy(dest: Mem, src: OriginalFilename.data(), n: OriginalFilename.size());
4198 StringRef FilenameToGo =
4199 StringRef(static_cast<char *>(Mem), OriginalFilename.size());
4200 HandleEmbedDirectiveImpl(HashLoc, Params: *Params, BinaryContents, FileName: FilenameToGo);
4201}
4202
4203/// HandleCXXImportDirective - Handle the C++ modules import directives
4204///
4205/// pp-import:
4206/// export[opt] import header-name pp-tokens[opt] ; new-line
4207/// export[opt] import header-name-tokens pp-tokens[opt] ; new-line
4208/// export[opt] import pp-tokens ; new-line
4209///
4210/// The header importing are replaced by annot_header_unit token, and the
4211/// lexed module name are replaced by annot_module_name token.
4212void Preprocessor::HandleCXXImportDirective(Token ImportTok) {
4213 assert(getLangOpts().CPlusPlusModules && ImportTok.is(tok::kw_import));
4214 llvm::SaveAndRestore<bool> SaveImportingCXXModules(
4215 this->ImportingCXXNamedModules, true);
4216
4217 Token Tok;
4218 if (LexHeaderName(Result&: Tok)) {
4219 if (Tok.isNot(K: tok::eod))
4220 CheckEndOfDirective(DirType: ImportTok.getIdentifierInfo()->getName());
4221 return;
4222 }
4223
4224 SourceLocation UseLoc = ImportTok.getLocation();
4225 SmallVector<Token, 4> DirToks{ImportTok};
4226 SmallVector<IdentifierLoc, 2> Path;
4227 bool ImportingHeader = false;
4228 bool IsPartition = false;
4229
4230 switch (Tok.getKind()) {
4231 case tok::header_name:
4232 ImportingHeader = true;
4233 DirToks.push_back(Elt: Tok);
4234 Lex(Result&: DirToks.emplace_back());
4235 break;
4236 case tok::colon:
4237 IsPartition = true;
4238 DirToks.push_back(Elt: Tok);
4239 UseLoc = Tok.getLocation();
4240 Lex(Result&: Tok);
4241 [[fallthrough]];
4242 case tok::identifier: {
4243 if (HandleModuleName(DirType: ImportTok.getIdentifierInfo()->getName(), UseLoc, Tok,
4244 Path, DirToks, /*AllowMacroExpansion=*/true,
4245 IsPartition))
4246 return;
4247
4248 std::string FlatName;
4249 bool IsValid =
4250 (IsPartition && ModuleDeclState.isNamedModule()) || !IsPartition;
4251 if (Callbacks && IsValid) {
4252 if (IsPartition && ModuleDeclState.isNamedModule()) {
4253 FlatName += ModuleDeclState.getPrimaryName();
4254 FlatName += ":";
4255 }
4256
4257 FlatName += ModuleLoader::getFlatNameFromPath(Path);
4258 SourceLocation StartLoc = IsPartition ? UseLoc : Path[0].getLoc();
4259 IdentifierLoc FlatNameLoc(StartLoc, getIdentifierInfo(Name: FlatName));
4260
4261 // We don't/shouldn't load the standard c++20 modules when preprocessing.
4262 // so the imported module is nullptr.
4263 Callbacks->moduleImport(ImportLoc: ImportTok.getLocation(),
4264 Path: ModuleIdPath(FlatNameLoc),
4265 /*Imported=*/nullptr);
4266 }
4267 break;
4268 }
4269 default:
4270 DirToks.push_back(Elt: Tok);
4271 break;
4272 }
4273
4274 // Consume the pp-import-suffix and expand any macros in it now, if we're not
4275 // at the semicolon already.
4276 if (!DirToks.back().isOneOf(Ks: tok::semi, Ks: tok::eod))
4277 CollectPPImportSuffix(Toks&: DirToks);
4278
4279 if (DirToks.back().isNot(K: tok::eod))
4280 CheckEndOfDirective(DirType: ImportTok.getIdentifierInfo()->getName());
4281 else
4282 DirToks.pop_back();
4283
4284 // This is not a pp-import after all.
4285 if (DirToks.back().isNot(K: tok::semi)) {
4286 EnterModuleSuffixTokenStream(Toks: DirToks);
4287 return;
4288 }
4289
4290 if (ImportingHeader) {
4291 // C++2a [cpp.module]p1:
4292 // The ';' preprocessing-token terminating a pp-import shall not have
4293 // been produced by macro replacement.
4294 SourceLocation SemiLoc = DirToks.back().getLocation();
4295 if (SemiLoc.isMacroID())
4296 Diag(Loc: SemiLoc, DiagID: diag::err_header_import_semi_in_macro);
4297
4298 auto Action = HandleHeaderIncludeOrImport(
4299 /*HashLoc*/ SourceLocation(), IncludeTok&: ImportTok, FilenameTok&: Tok, EndLoc: SemiLoc);
4300 switch (Action.Kind) {
4301 case ImportAction::None:
4302 break;
4303
4304 case ImportAction::ModuleBegin:
4305 // Let the parser know we're textually entering the module.
4306 DirToks.emplace_back();
4307 DirToks.back().startToken();
4308 DirToks.back().setKind(tok::annot_module_begin);
4309 DirToks.back().setLocation(SemiLoc);
4310 DirToks.back().setAnnotationEndLoc(SemiLoc);
4311 DirToks.back().setAnnotationValue(Action.ModuleForHeader);
4312 [[fallthrough]];
4313
4314 case ImportAction::ModuleImport:
4315 case ImportAction::HeaderUnitImport:
4316 case ImportAction::SkippedModuleImport:
4317 // We chose to import (or textually enter) the file. Convert the
4318 // header-name token into a header unit annotation token.
4319 DirToks[1].setKind(tok::annot_header_unit);
4320 DirToks[1].setAnnotationEndLoc(DirToks[0].getLocation());
4321 DirToks[1].setAnnotationValue(Action.ModuleForHeader);
4322 // FIXME: Call the moduleImport callback?
4323 break;
4324 case ImportAction::Failure:
4325 assert(TheModuleLoader.HadFatalFailure &&
4326 "This should be an early exit only to a fatal error");
4327 CurLexer->cutOffLexing();
4328 return;
4329 }
4330 }
4331
4332 EnterModuleSuffixTokenStream(Toks: DirToks);
4333}
4334
4335/// HandleCXXModuleDirective - Handle C++ module declaration directives.
4336///
4337/// pp-module:
4338/// export[opt] module pp-tokens[opt] ; new-line
4339///
4340/// pp-module-name:
4341/// pp-module-name-qualifier[opt] identifier
4342/// pp-module-partition:
4343/// : pp-module-name-qualifier[opt] identifier
4344/// pp-module-name-qualifier:
4345/// identifier .
4346/// pp-module-name-qualifier identifier .
4347///
4348/// global-module-fragment:
4349/// module-keyword ; declaration-seq[opt]
4350///
4351/// private-module-fragment:
4352/// module-keyword : private ; declaration-seq[opt]
4353///
4354/// The lexed module name are replaced by annot_module_name token.
4355void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) {
4356 assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module));
4357 SourceLocation StartLoc = ModuleTok.getLocation();
4358
4359 Token Tok;
4360 SourceLocation UseLoc = ModuleTok.getLocation();
4361 SmallVector<Token, 4> DirToks{ModuleTok};
4362 SmallVector<IdentifierLoc, 2> Path, Partition;
4363 LexUnexpandedToken(Result&: Tok);
4364
4365 switch (Tok.getKind()) {
4366 // Global Module Fragment.
4367 case tok::semi:
4368 DirToks.push_back(Elt: Tok);
4369 break;
4370 case tok::colon:
4371 DirToks.push_back(Elt: Tok);
4372 LexUnexpandedToken(Result&: Tok);
4373 if (Tok.isNot(K: tok::kw_private)) {
4374 if (Tok.isNot(K: tok::eod))
4375 CheckEndOfDirective(DirType: ModuleTok.getIdentifierInfo()->getName(),
4376 /*EnableMacros=*/false, ExtraToks: &DirToks);
4377 EnterModuleSuffixTokenStream(Toks: DirToks);
4378 return;
4379 }
4380 DirToks.push_back(Elt: Tok);
4381 break;
4382 case tok::identifier: {
4383 if (HandleModuleName(DirType: ModuleTok.getIdentifierInfo()->getName(), UseLoc, Tok,
4384 Path, DirToks, /*AllowMacroExpansion=*/false,
4385 /*IsPartition=*/false))
4386 return;
4387
4388 // C++20 [cpp.module]p
4389 // The pp-tokens, if any, of a pp-module shall be of the form:
4390 // pp-module-name pp-module-partition[opt] pp-tokens[opt]
4391 if (Tok.is(K: tok::colon)) {
4392 LexUnexpandedToken(Result&: Tok);
4393 if (HandleModuleName(DirType: ModuleTok.getIdentifierInfo()->getName(), UseLoc,
4394 Tok, Path&: Partition, DirToks,
4395 /*AllowMacroExpansion=*/false, /*IsPartition=*/true))
4396 return;
4397 }
4398
4399 // If the current token is a macro definition, put it back to token stream
4400 // and expand any macros in it later.
4401 //
4402 // export module M ATTR(some_attr); // -D'ATTR(x)=[[x]]'
4403 //
4404 // Current token is `ATTR`.
4405 if (Tok.is(K: tok::identifier) &&
4406 getMacroDefinition(II: Tok.getIdentifierInfo())) {
4407 std::unique_ptr<Token[]> TokCopy = std::make_unique<Token[]>(num: 1);
4408 TokCopy[0] = Tok;
4409 EnterTokenStream(Toks: std::move(TokCopy), /*NumToks=*/1,
4410 /*DisableMacroExpansion=*/false, /*IsReinject=*/false);
4411 Lex(Result&: Tok);
4412 DirToks.back() = Tok;
4413 }
4414 break;
4415 }
4416 default:
4417 DirToks.push_back(Elt: Tok);
4418 break;
4419 }
4420
4421 // Consume the pp-import-suffix and expand any macros in it now, if we're not
4422 // at the semicolon already.
4423 std::optional<Token> NextPPTok =
4424 DirToks.back().is(K: tok::eod) ? peekNextPPToken() : DirToks.back();
4425
4426 // Only ';' and '[' are allowed after module name.
4427 // We also check 'private' because the previous is not a module name.
4428 if (NextPPTok) {
4429 if (NextPPTok->is(K: tok::raw_identifier))
4430 LookUpIdentifierInfo(Identifier&: *NextPPTok);
4431 if (!NextPPTok->isOneOf(Ks: tok::semi, Ks: tok::eod, Ks: tok::l_square,
4432 Ks: tok::kw_private))
4433 Diag(Tok: *NextPPTok, DiagID: diag::err_pp_unexpected_tok_after_module_name)
4434 << getSpelling(Tok: *NextPPTok);
4435 }
4436
4437 if (!DirToks.back().isOneOf(Ks: tok::semi, Ks: tok::eod)) {
4438 // Consume the pp-import-suffix and expand any macros in it now. We'll add
4439 // it back into the token stream later.
4440 CollectPPImportSuffix(Toks&: DirToks);
4441 }
4442
4443 SourceLocation End =
4444 DirToks.back().isNot(K: tok::eod)
4445 ? CheckEndOfDirective(DirType: ModuleTok.getIdentifierInfo()->getName(),
4446 /*EnableMacros=*/false, ExtraToks: &DirToks)
4447
4448 : DirToks.pop_back_val().getLocation();
4449
4450 if (!IncludeMacroStack.empty()) {
4451 Diag(Loc: StartLoc, DiagID: diag::err_pp_module_decl_in_header)
4452 << SourceRange(StartLoc, End);
4453 }
4454
4455 if (CurPPLexer->getConditionalStackDepth() != 0) {
4456 Diag(Loc: StartLoc, DiagID: diag::err_pp_cond_span_module_decl)
4457 << SourceRange(StartLoc, End);
4458 }
4459 EnterModuleSuffixTokenStream(Toks: DirToks);
4460}
4461
4462/// Lex a token following the 'import' contextual keyword.
4463///
4464/// pp-import:
4465/// [ObjC] @ import module-name ;
4466///
4467/// module-name:
4468/// module-name-qualifier[opt] identifier
4469///
4470/// module-name-qualifier
4471/// module-name-qualifier[opt] identifier .
4472///
4473/// We respond to a pp-import by importing macros from the named module.
4474void Preprocessor::HandleObjCImportDirective(Token &AtTok, Token &ImportTok) {
4475 assert(getLangOpts().ObjC && AtTok.is(tok::at) &&
4476 ImportTok.isObjCAtKeyword(tok::objc_import));
4477 ImportTok.setKind(tok::kw_import);
4478 SmallVector<Token, 32> DirToks{AtTok, ImportTok};
4479 SmallVector<IdentifierLoc, 3> Path;
4480 SourceLocation UseLoc = ImportTok.getLocation();
4481 ModuleImportLoc = ImportTok.getLocation();
4482 Token Tok;
4483 Lex(Result&: Tok);
4484 if (HandleModuleName(DirType: ImportTok.getIdentifierInfo()->getName(), UseLoc, Tok,
4485 Path, DirToks,
4486 /*AllowMacroExpansion=*/true,
4487 /*IsPartition=*/false))
4488 return;
4489
4490 // Consume the pp-import-suffix and expand any macros in it now, if we're not
4491 // at the semicolon already.
4492 if (!DirToks.back().isOneOf(Ks: tok::semi, Ks: tok::eod))
4493 CollectPPImportSuffix(Toks&: DirToks);
4494
4495 SourceLocation End =
4496 DirToks.back().isNot(K: tok::eod)
4497 ? CheckEndOfDirective(DirType: ImportTok.getIdentifierInfo()->getName(),
4498 /*EnableMacros=*/false, ExtraToks: &DirToks)
4499
4500 : DirToks.pop_back_val().getLocation();
4501
4502 Module *Imported = nullptr;
4503 if (getLangOpts().Modules) {
4504 Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc, Path, Visibility: Module::Hidden,
4505 /*IsInclusionDirective=*/false);
4506 if (Imported)
4507 makeModuleVisible(M: Imported, Loc: End);
4508 }
4509
4510 if (Callbacks)
4511 Callbacks->moduleImport(ImportLoc: ModuleImportLoc, Path, Imported);
4512
4513 EnterModuleSuffixTokenStream(Toks: DirToks);
4514}
4515