1//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implements # directive processing for the Preprocessor.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/CharInfo.h"
15#include "clang/Basic/DirectoryEntry.h"
16#include "clang/Basic/FileManager.h"
17#include "clang/Basic/IdentifierTable.h"
18#include "clang/Basic/LangOptions.h"
19#include "clang/Basic/Module.h"
20#include "clang/Basic/SourceLocation.h"
21#include "clang/Basic/SourceManager.h"
22#include "clang/Basic/TargetInfo.h"
23#include "clang/Basic/TokenKinds.h"
24#include "clang/Lex/CodeCompletionHandler.h"
25#include "clang/Lex/HeaderSearch.h"
26#include "clang/Lex/HeaderSearchOptions.h"
27#include "clang/Lex/LexDiagnostic.h"
28#include "clang/Lex/LiteralSupport.h"
29#include "clang/Lex/MacroInfo.h"
30#include "clang/Lex/ModuleLoader.h"
31#include "clang/Lex/ModuleMap.h"
32#include "clang/Lex/PPCallbacks.h"
33#include "clang/Lex/Pragma.h"
34#include "clang/Lex/Preprocessor.h"
35#include "clang/Lex/PreprocessorOptions.h"
36#include "clang/Lex/Token.h"
37#include "clang/Lex/VariadicMacroSupport.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/ScopeExit.h"
41#include "llvm/ADT/SmallString.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringExtras.h"
44#include "llvm/ADT/StringRef.h"
45#include "llvm/ADT/StringSwitch.h"
46#include "llvm/Support/AlignOf.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/Path.h"
49#include "llvm/Support/SaveAndRestore.h"
50#include <algorithm>
51#include <cassert>
52#include <cstring>
53#include <new>
54#include <optional>
55#include <string>
56#include <utility>
57
58using namespace clang;
59
60//===----------------------------------------------------------------------===//
61// Utility Methods for Preprocessor Directive Handling.
62//===----------------------------------------------------------------------===//
63
64MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
65 static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
66 return new (BP) MacroInfo(L);
67}
68
69DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
70 SourceLocation Loc) {
71 return new (BP) DefMacroDirective(MI, Loc);
72}
73
74UndefMacroDirective *
75Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
76 return new (BP) UndefMacroDirective(UndefLoc);
77}
78
79VisibilityMacroDirective *
80Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
81 bool isPublic) {
82 return new (BP) VisibilityMacroDirective(Loc, isPublic);
83}
84
85/// Read and discard all tokens remaining on the current line until
86/// the tok::eod token is found.
87SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
88 SourceRange Res;
89
90 LexUnexpandedToken(Result&: Tmp);
91 Res.setBegin(Tmp.getLocation());
92 while (Tmp.isNot(K: tok::eod)) {
93 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
94 LexUnexpandedToken(Result&: Tmp);
95 }
96 Res.setEnd(Tmp.getLocation());
97 return Res;
98}
99
100/// Enumerates possible cases of #define/#undef a reserved identifier.
101enum MacroDiag {
102 MD_NoWarn, //> Not a reserved identifier
103 MD_KeywordDef, //> Macro hides keyword, enabled by default
104 MD_ReservedMacro //> #define of #undef reserved id, disabled by default
105};
106
107/// Enumerates possible %select values for the pp_err_elif_after_else and
108/// pp_err_elif_without_if diagnostics.
109enum PPElifDiag {
110 PED_Elif,
111 PED_Elifdef,
112 PED_Elifndef
113};
114
115static bool isFeatureTestMacro(StringRef MacroName) {
116 // list from:
117 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
118 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
119 // * man 7 feature_test_macros
120 // The list must be sorted for correct binary search.
121 static constexpr StringRef ReservedMacro[] = {
122 "_ATFILE_SOURCE",
123 "_BSD_SOURCE",
124 "_CRT_NONSTDC_NO_WARNINGS",
125 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
126 "_CRT_SECURE_NO_WARNINGS",
127 "_FILE_OFFSET_BITS",
128 "_FORTIFY_SOURCE",
129 "_GLIBCXX_ASSERTIONS",
130 "_GLIBCXX_CONCEPT_CHECKS",
131 "_GLIBCXX_DEBUG",
132 "_GLIBCXX_DEBUG_PEDANTIC",
133 "_GLIBCXX_PARALLEL",
134 "_GLIBCXX_PARALLEL_ASSERTIONS",
135 "_GLIBCXX_SANITIZE_VECTOR",
136 "_GLIBCXX_USE_CXX11_ABI",
137 "_GLIBCXX_USE_DEPRECATED",
138 "_GNU_SOURCE",
139 "_ISOC11_SOURCE",
140 "_ISOC95_SOURCE",
141 "_ISOC99_SOURCE",
142 "_LARGEFILE64_SOURCE",
143 "_POSIX_C_SOURCE",
144 "_REENTRANT",
145 "_SVID_SOURCE",
146 "_THREAD_SAFE",
147 "_XOPEN_SOURCE",
148 "_XOPEN_SOURCE_EXTENDED",
149 "__STDCPP_WANT_MATH_SPEC_FUNCS__",
150 "__STDC_FORMAT_MACROS",
151 };
152 return std::binary_search(first: std::begin(arr: ReservedMacro), last: std::end(arr: ReservedMacro),
153 val: MacroName);
154}
155
156static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
157 const MacroInfo *MI,
158 const StringRef MacroName) {
159 // If this is a macro with special handling (like __LINE__) then it's language
160 // defined.
161 if (MI->isBuiltinMacro())
162 return true;
163 // Builtin macros are defined in the builtin file
164 if (!SourceMgr.isWrittenInBuiltinFile(Loc: MI->getDefinitionLoc()))
165 return false;
166 // C defines macros starting with __STDC, and C++ defines macros starting with
167 // __STDCPP
168 if (MacroName.starts_with(Prefix: "__STDC"))
169 return true;
170 // C++ defines the __cplusplus macro
171 if (MacroName == "__cplusplus")
172 return true;
173 // C++ defines various feature-test macros starting with __cpp
174 if (MacroName.starts_with(Prefix: "__cpp"))
175 return true;
176 // Anything else isn't language-defined
177 return false;
178}
179
180static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
181 const LangOptions &Lang = PP.getLangOpts();
182 StringRef Text = II->getName();
183 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
184 return isFeatureTestMacro(MacroName: Text) ? MD_NoWarn : MD_ReservedMacro;
185 if (II->isKeyword(LangOpts: Lang))
186 return MD_KeywordDef;
187 if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
188 return MD_KeywordDef;
189 return MD_NoWarn;
190}
191
192static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
193 const LangOptions &Lang = PP.getLangOpts();
194 // Do not warn on keyword undef. It is generally harmless and widely used.
195 if (isReservedInAllContexts(Status: II->isReserved(LangOpts: Lang)))
196 return MD_ReservedMacro;
197 return MD_NoWarn;
198}
199
200// Return true if we want to issue a diagnostic by default if we
201// encounter this name in a #include with the wrong case. For now,
202// this includes the standard C and C++ headers, Posix headers,
203// and Boost headers. Improper case for these #includes is a
204// potential portability issue.
205static bool warnByDefaultOnWrongCase(StringRef Include) {
206 // If the first component of the path is "boost", treat this like a standard header
207 // for the purposes of diagnostics.
208 if (::llvm::sys::path::begin(path: Include)->equals_insensitive(RHS: "boost"))
209 return true;
210
211 // "condition_variable" is the longest standard header name at 18 characters.
212 // If the include file name is longer than that, it can't be a standard header.
213 static const size_t MaxStdHeaderNameLen = 18u;
214 if (Include.size() > MaxStdHeaderNameLen)
215 return false;
216
217 // Lowercase and normalize the search string.
218 SmallString<32> LowerInclude{Include};
219 for (char &Ch : LowerInclude) {
220 // In the ASCII range?
221 if (static_cast<unsigned char>(Ch) > 0x7f)
222 return false; // Can't be a standard header
223 // ASCII lowercase:
224 if (Ch >= 'A' && Ch <= 'Z')
225 Ch += 'a' - 'A';
226 // Normalize path separators for comparison purposes.
227 else if (::llvm::sys::path::is_separator(value: Ch))
228 Ch = '/';
229 }
230
231 // The standard C/C++ and Posix headers
232 return llvm::StringSwitch<bool>(LowerInclude)
233 // C library headers
234 .Cases(S0: "assert.h", S1: "complex.h", S2: "ctype.h", S3: "errno.h", S4: "fenv.h", Value: true)
235 .Cases(S0: "float.h", S1: "inttypes.h", S2: "iso646.h", S3: "limits.h", S4: "locale.h", Value: true)
236 .Cases(S0: "math.h", S1: "setjmp.h", S2: "signal.h", S3: "stdalign.h", S4: "stdarg.h", Value: true)
237 .Cases(S0: "stdatomic.h", S1: "stdbool.h", S2: "stdckdint.h", S3: "stddef.h", Value: true)
238 .Cases(S0: "stdint.h", S1: "stdio.h", S2: "stdlib.h", S3: "stdnoreturn.h", Value: true)
239 .Cases(S0: "string.h", S1: "tgmath.h", S2: "threads.h", S3: "time.h", S4: "uchar.h", Value: true)
240 .Cases(S0: "wchar.h", S1: "wctype.h", Value: true)
241
242 // C++ headers for C library facilities
243 .Cases(S0: "cassert", S1: "ccomplex", S2: "cctype", S3: "cerrno", S4: "cfenv", Value: true)
244 .Cases(S0: "cfloat", S1: "cinttypes", S2: "ciso646", S3: "climits", S4: "clocale", Value: true)
245 .Cases(S0: "cmath", S1: "csetjmp", S2: "csignal", S3: "cstdalign", S4: "cstdarg", Value: true)
246 .Cases(S0: "cstdbool", S1: "cstddef", S2: "cstdint", S3: "cstdio", S4: "cstdlib", Value: true)
247 .Cases(S0: "cstring", S1: "ctgmath", S2: "ctime", S3: "cuchar", S4: "cwchar", Value: true)
248 .Case(S: "cwctype", Value: true)
249
250 // C++ library headers
251 .Cases(S0: "algorithm", S1: "fstream", S2: "list", S3: "regex", S4: "thread", Value: true)
252 .Cases(S0: "array", S1: "functional", S2: "locale", S3: "scoped_allocator", S4: "tuple", Value: true)
253 .Cases(S0: "atomic", S1: "future", S2: "map", S3: "set", S4: "type_traits", Value: true)
254 .Cases(S0: "bitset", S1: "initializer_list", S2: "memory", S3: "shared_mutex", S4: "typeindex", Value: true)
255 .Cases(S0: "chrono", S1: "iomanip", S2: "mutex", S3: "sstream", S4: "typeinfo", Value: true)
256 .Cases(S0: "codecvt", S1: "ios", S2: "new", S3: "stack", S4: "unordered_map", Value: true)
257 .Cases(S0: "complex", S1: "iosfwd", S2: "numeric", S3: "stdexcept", S4: "unordered_set", Value: true)
258 .Cases(S0: "condition_variable", S1: "iostream", S2: "ostream", S3: "streambuf", S4: "utility", Value: true)
259 .Cases(S0: "deque", S1: "istream", S2: "queue", S3: "string", S4: "valarray", Value: true)
260 .Cases(S0: "exception", S1: "iterator", S2: "random", S3: "strstream", S4: "vector", Value: true)
261 .Cases(S0: "forward_list", S1: "limits", S2: "ratio", S3: "system_error", Value: true)
262
263 // POSIX headers (which aren't also C headers)
264 .Cases(S0: "aio.h", S1: "arpa/inet.h", S2: "cpio.h", S3: "dirent.h", S4: "dlfcn.h", Value: true)
265 .Cases(S0: "fcntl.h", S1: "fmtmsg.h", S2: "fnmatch.h", S3: "ftw.h", S4: "glob.h", Value: true)
266 .Cases(S0: "grp.h", S1: "iconv.h", S2: "langinfo.h", S3: "libgen.h", S4: "monetary.h", Value: true)
267 .Cases(S0: "mqueue.h", S1: "ndbm.h", S2: "net/if.h", S3: "netdb.h", S4: "netinet/in.h", Value: true)
268 .Cases(S0: "netinet/tcp.h", S1: "nl_types.h", S2: "poll.h", S3: "pthread.h", S4: "pwd.h", Value: true)
269 .Cases(S0: "regex.h", S1: "sched.h", S2: "search.h", S3: "semaphore.h", S4: "spawn.h", Value: true)
270 .Cases(S0: "strings.h", S1: "stropts.h", S2: "sys/ipc.h", S3: "sys/mman.h", S4: "sys/msg.h", Value: true)
271 .Cases(S0: "sys/resource.h", S1: "sys/select.h", S2: "sys/sem.h", S3: "sys/shm.h", S4: "sys/socket.h", Value: true)
272 .Cases(S0: "sys/stat.h", S1: "sys/statvfs.h", S2: "sys/time.h", S3: "sys/times.h", S4: "sys/types.h", Value: true)
273 .Cases(S0: "sys/uio.h", S1: "sys/un.h", S2: "sys/utsname.h", S3: "sys/wait.h", S4: "syslog.h", Value: true)
274 .Cases(S0: "tar.h", S1: "termios.h", S2: "trace.h", S3: "ulimit.h", Value: true)
275 .Cases(S0: "unistd.h", S1: "utime.h", S2: "utmpx.h", S3: "wordexp.h", Value: true)
276 .Default(Value: false);
277}
278
279/// Find a similar string in `Candidates`.
280///
281/// \param LHS a string for a similar string in `Candidates`
282///
283/// \param Candidates the candidates to find a similar string.
284///
285/// \returns a similar string if exists. If no similar string exists,
286/// returns std::nullopt.
287static std::optional<StringRef>
288findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
289 // We need to check if `Candidates` has the exact case-insensitive string
290 // because the Levenshtein distance match does not care about it.
291 for (StringRef C : Candidates) {
292 if (LHS.equals_insensitive(RHS: C)) {
293 return C;
294 }
295 }
296
297 // Keep going with the Levenshtein distance match.
298 // If the LHS size is less than 3, use the LHS size minus 1 and if not,
299 // use the LHS size divided by 3.
300 size_t Length = LHS.size();
301 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
302
303 std::optional<std::pair<StringRef, size_t>> SimilarStr;
304 for (StringRef C : Candidates) {
305 size_t CurDist = LHS.edit_distance(Other: C, AllowReplacements: true);
306 if (CurDist <= MaxDist) {
307 if (!SimilarStr) {
308 // The first similar string found.
309 SimilarStr = {C, CurDist};
310 } else if (CurDist < SimilarStr->second) {
311 // More similar string found.
312 SimilarStr = {C, CurDist};
313 }
314 }
315 }
316
317 if (SimilarStr) {
318 return SimilarStr->first;
319 } else {
320 return std::nullopt;
321 }
322}
323
324bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
325 bool *ShadowFlag) {
326 // Missing macro name?
327 if (MacroNameTok.is(K: tok::eod))
328 return Diag(Tok: MacroNameTok, DiagID: diag::err_pp_missing_macro_name);
329
330 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
331 if (!II)
332 return Diag(Tok: MacroNameTok, DiagID: diag::err_pp_macro_not_identifier);
333
334 if (II->isCPlusPlusOperatorKeyword()) {
335 // C++ 2.5p2: Alternative tokens behave the same as its primary token
336 // except for their spellings.
337 Diag(Tok: MacroNameTok, DiagID: getLangOpts().MicrosoftExt
338 ? diag::ext_pp_operator_used_as_macro_name
339 : diag::err_pp_operator_used_as_macro_name)
340 << II << MacroNameTok.getKind();
341 // Allow #defining |and| and friends for Microsoft compatibility or
342 // recovery when legacy C headers are included in C++.
343 }
344
345 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
346 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
347 return Diag(Tok: MacroNameTok, DiagID: diag::err_defined_macro_name);
348 }
349
350 // If defining/undefining reserved identifier or a keyword, we need to issue
351 // a warning.
352 SourceLocation MacroNameLoc = MacroNameTok.getLocation();
353 if (ShadowFlag)
354 *ShadowFlag = false;
355 if (!SourceMgr.isInSystemHeader(Loc: MacroNameLoc) &&
356 (SourceMgr.getBufferName(Loc: MacroNameLoc) != "<built-in>")) {
357 MacroDiag D = MD_NoWarn;
358 if (isDefineUndef == MU_Define) {
359 D = shouldWarnOnMacroDef(PP&: *this, II);
360 }
361 else if (isDefineUndef == MU_Undef)
362 D = shouldWarnOnMacroUndef(PP&: *this, II);
363 if (D == MD_KeywordDef) {
364 // We do not want to warn on some patterns widely used in configuration
365 // scripts. This requires analyzing next tokens, so do not issue warnings
366 // now, only inform caller.
367 if (ShadowFlag)
368 *ShadowFlag = true;
369 }
370 if (D == MD_ReservedMacro)
371 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_is_reserved_id);
372 }
373
374 // Okay, we got a good identifier.
375 return false;
376}
377
378/// Lex and validate a macro name, which occurs after a
379/// \#define or \#undef.
380///
381/// This sets the token kind to eod and discards the rest of the macro line if
382/// the macro name is invalid.
383///
384/// \param MacroNameTok Token that is expected to be a macro name.
385/// \param isDefineUndef Context in which macro is used.
386/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
387void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
388 bool *ShadowFlag) {
389 // Read the token, don't allow macro expansion on it.
390 LexUnexpandedToken(Result&: MacroNameTok);
391
392 if (MacroNameTok.is(K: tok::code_completion)) {
393 if (CodeComplete)
394 CodeComplete->CodeCompleteMacroName(IsDefinition: isDefineUndef == MU_Define);
395 setCodeCompletionReached();
396 LexUnexpandedToken(Result&: MacroNameTok);
397 }
398
399 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
400 return;
401
402 // Invalid macro name, read and discard the rest of the line and set the
403 // token kind to tok::eod if necessary.
404 if (MacroNameTok.isNot(K: tok::eod)) {
405 MacroNameTok.setKind(tok::eod);
406 DiscardUntilEndOfDirective();
407 }
408}
409
410/// Ensure that the next token is a tok::eod token.
411///
412/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
413/// true, then we consider macros that expand to zero tokens as being ok.
414///
415/// Returns the location of the end of the directive.
416SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
417 bool EnableMacros) {
418 Token Tmp;
419 // Lex unexpanded tokens for most directives: macros might expand to zero
420 // tokens, causing us to miss diagnosing invalid lines. Some directives (like
421 // #line) allow empty macros.
422 if (EnableMacros)
423 Lex(Result&: Tmp);
424 else
425 LexUnexpandedToken(Result&: Tmp);
426
427 // There should be no tokens after the directive, but we allow them as an
428 // extension.
429 while (Tmp.is(K: tok::comment)) // Skip comments in -C mode.
430 LexUnexpandedToken(Result&: Tmp);
431
432 if (Tmp.is(K: tok::eod))
433 return Tmp.getLocation();
434
435 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
436 // or if this is a macro-style preprocessing directive, because it is more
437 // trouble than it is worth to insert /**/ and check that there is no /**/
438 // in the range also.
439 FixItHint Hint;
440 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
441 !CurTokenLexer)
442 Hint = FixItHint::CreateInsertion(InsertionLoc: Tmp.getLocation(),Code: "//");
443 Diag(Tok: Tmp, DiagID: diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
444 return DiscardUntilEndOfDirective().getEnd();
445}
446
447void Preprocessor::SuggestTypoedDirective(const Token &Tok,
448 StringRef Directive) const {
449 // If this is a `.S` file, treat unknown # directives as non-preprocessor
450 // directives.
451 if (getLangOpts().AsmPreprocessor) return;
452
453 std::vector<StringRef> Candidates = {
454 "if", "ifdef", "ifndef", "elif", "else", "endif"
455 };
456 if (LangOpts.C23 || LangOpts.CPlusPlus23)
457 Candidates.insert(position: Candidates.end(), l: {"elifdef", "elifndef"});
458
459 if (std::optional<StringRef> Sugg = findSimilarStr(LHS: Directive, Candidates)) {
460 // Directive cannot be coming from macro.
461 assert(Tok.getLocation().isFileID());
462 CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
463 B: Tok.getLocation(),
464 E: Tok.getLocation().getLocWithOffset(Offset: Directive.size()));
465 StringRef SuggValue = *Sugg;
466
467 auto Hint = FixItHint::CreateReplacement(RemoveRange: DirectiveRange, Code: SuggValue);
468 Diag(Tok, DiagID: diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
469 }
470}
471
472/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
473/// decided that the subsequent tokens are in the \#if'd out portion of the
474/// file. Lex the rest of the file, until we see an \#endif. If
475/// FoundNonSkipPortion is true, then we have already emitted code for part of
476/// this \#if directive, so \#else/\#elif blocks should never be entered.
477/// If ElseOk is true, then \#else directives are ok, if not, then we have
478/// already seen one so a \#else directive is a duplicate. When this returns,
479/// the caller can lex the first valid token.
480void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
481 SourceLocation IfTokenLoc,
482 bool FoundNonSkipPortion,
483 bool FoundElse,
484 SourceLocation ElseLoc) {
485 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
486 // not getting called recursively by storing the RecordedSkippedRanges
487 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
488 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
489 // invalidated. If this changes and there is a need to call
490 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
491 // change to do a second lookup in endLexPass function instead of reusing the
492 // lookup pointer.
493 assert(!SkippingExcludedConditionalBlock &&
494 "calling SkipExcludedConditionalBlock recursively");
495 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
496
497 ++NumSkipped;
498 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
499 assert(CurPPLexer && "Conditional PP block must be in a file!");
500 assert(CurLexer && "Conditional PP block but no current lexer set!");
501
502 if (PreambleConditionalStack.reachedEOFWhileSkipping())
503 PreambleConditionalStack.clearSkipInfo();
504 else
505 CurPPLexer->pushConditionalLevel(DirectiveStart: IfTokenLoc, /*isSkipping*/ WasSkipping: false,
506 FoundNonSkip: FoundNonSkipPortion, FoundElse);
507
508 // Enter raw mode to disable identifier lookup (and thus macro expansion),
509 // disabling warnings, etc.
510 CurPPLexer->LexingRawMode = true;
511 Token Tok;
512 SourceLocation endLoc;
513
514 /// Keeps track and caches skipped ranges and also retrieves a prior skipped
515 /// range if the same block is re-visited.
516 struct SkippingRangeStateTy {
517 Preprocessor &PP;
518
519 const char *BeginPtr = nullptr;
520 unsigned *SkipRangePtr = nullptr;
521
522 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
523
524 void beginLexPass() {
525 if (BeginPtr)
526 return; // continue skipping a block.
527
528 // Initiate a skipping block and adjust the lexer if we already skipped it
529 // before.
530 BeginPtr = PP.CurLexer->getBufferLocation();
531 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
532 if (*SkipRangePtr) {
533 PP.CurLexer->seek(Offset: PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
534 /*IsAtStartOfLine*/ true);
535 }
536 }
537
538 void endLexPass(const char *Hashptr) {
539 if (!BeginPtr) {
540 // Not doing normal lexing.
541 assert(PP.CurLexer->isDependencyDirectivesLexer());
542 return;
543 }
544
545 // Finished skipping a block, record the range if it's first time visited.
546 if (!*SkipRangePtr) {
547 *SkipRangePtr = Hashptr - BeginPtr;
548 }
549 assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
550 BeginPtr = nullptr;
551 SkipRangePtr = nullptr;
552 }
553 } SkippingRangeState(*this);
554
555 while (true) {
556 if (CurLexer->isDependencyDirectivesLexer()) {
557 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Result&: Tok);
558 } else {
559 SkippingRangeState.beginLexPass();
560 while (true) {
561 CurLexer->Lex(Result&: Tok);
562
563 if (Tok.is(K: tok::code_completion)) {
564 setCodeCompletionReached();
565 if (CodeComplete)
566 CodeComplete->CodeCompleteInConditionalExclusion();
567 continue;
568 }
569
570 // If this is the end of the buffer, we have an error.
571 if (Tok.is(K: tok::eof)) {
572 // We don't emit errors for unterminated conditionals here,
573 // Lexer::LexEndOfFile can do that properly.
574 // Just return and let the caller lex after this #include.
575 if (PreambleConditionalStack.isRecording())
576 PreambleConditionalStack.SkipInfo.emplace(args&: HashTokenLoc, args&: IfTokenLoc,
577 args&: FoundNonSkipPortion,
578 args&: FoundElse, args&: ElseLoc);
579 break;
580 }
581
582 // If this token is not a preprocessor directive, just skip it.
583 if (Tok.isNot(K: tok::hash) || !Tok.isAtStartOfLine())
584 continue;
585
586 break;
587 }
588 }
589 if (Tok.is(K: tok::eof))
590 break;
591
592 // We just parsed a # character at the start of a line, so we're in
593 // directive mode. Tell the lexer this so any newlines we see will be
594 // converted into an EOD token (this terminates the macro).
595 CurPPLexer->ParsingPreprocessorDirective = true;
596 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
597
598 assert(Tok.is(tok::hash));
599 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
600 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
601
602 // Read the next token, the directive flavor.
603 LexUnexpandedToken(Result&: Tok);
604
605 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
606 // something bogus), skip it.
607 if (Tok.isNot(K: tok::raw_identifier)) {
608 CurPPLexer->ParsingPreprocessorDirective = false;
609 // Restore comment saving mode.
610 if (CurLexer) CurLexer->resetExtendedTokenMode();
611 continue;
612 }
613
614 // If the first letter isn't i or e, it isn't intesting to us. We know that
615 // this is safe in the face of spelling differences, because there is no way
616 // to spell an i/e in a strange way that is another letter. Skipping this
617 // allows us to avoid looking up the identifier info for #define/#undef and
618 // other common directives.
619 StringRef RI = Tok.getRawIdentifier();
620
621 char FirstChar = RI[0];
622 if (FirstChar >= 'a' && FirstChar <= 'z' &&
623 FirstChar != 'i' && FirstChar != 'e') {
624 CurPPLexer->ParsingPreprocessorDirective = false;
625 // Restore comment saving mode.
626 if (CurLexer) CurLexer->resetExtendedTokenMode();
627 continue;
628 }
629
630 // Get the identifier name without trigraphs or embedded newlines. Note
631 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
632 // when skipping.
633 char DirectiveBuf[20];
634 StringRef Directive;
635 if (!Tok.needsCleaning() && RI.size() < 20) {
636 Directive = RI;
637 } else {
638 std::string DirectiveStr = getSpelling(Tok);
639 size_t IdLen = DirectiveStr.size();
640 if (IdLen >= 20) {
641 CurPPLexer->ParsingPreprocessorDirective = false;
642 // Restore comment saving mode.
643 if (CurLexer) CurLexer->resetExtendedTokenMode();
644 continue;
645 }
646 memcpy(dest: DirectiveBuf, src: &DirectiveStr[0], n: IdLen);
647 Directive = StringRef(DirectiveBuf, IdLen);
648 }
649
650 if (Directive.starts_with(Prefix: "if")) {
651 StringRef Sub = Directive.substr(Start: 2);
652 if (Sub.empty() || // "if"
653 Sub == "def" || // "ifdef"
654 Sub == "ndef") { // "ifndef"
655 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
656 // bother parsing the condition.
657 DiscardUntilEndOfDirective();
658 CurPPLexer->pushConditionalLevel(DirectiveStart: Tok.getLocation(), /*wasskipping*/WasSkipping: true,
659 /*foundnonskip*/FoundNonSkip: false,
660 /*foundelse*/FoundElse: false);
661 } else {
662 SuggestTypoedDirective(Tok, Directive);
663 }
664 } else if (Directive[0] == 'e') {
665 StringRef Sub = Directive.substr(Start: 1);
666 if (Sub == "ndif") { // "endif"
667 PPConditionalInfo CondInfo;
668 CondInfo.WasSkipping = true; // Silence bogus warning.
669 bool InCond = CurPPLexer->popConditionalLevel(CI&: CondInfo);
670 (void)InCond; // Silence warning in no-asserts mode.
671 assert(!InCond && "Can't be skipping if not in a conditional!");
672
673 // If we popped the outermost skipping block, we're done skipping!
674 if (!CondInfo.WasSkipping) {
675 SkippingRangeState.endLexPass(Hashptr);
676 // Restore the value of LexingRawMode so that trailing comments
677 // are handled correctly, if we've reached the outermost block.
678 CurPPLexer->LexingRawMode = false;
679 endLoc = CheckEndOfDirective(DirType: "endif");
680 CurPPLexer->LexingRawMode = true;
681 if (Callbacks)
682 Callbacks->Endif(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
683 break;
684 } else {
685 DiscardUntilEndOfDirective();
686 }
687 } else if (Sub == "lse") { // "else".
688 // #else directive in a skipping conditional. If not in some other
689 // skipping conditional, and if #else hasn't already been seen, enter it
690 // as a non-skipping conditional.
691 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
692
693 if (!CondInfo.WasSkipping)
694 SkippingRangeState.endLexPass(Hashptr);
695
696 // If this is a #else with a #else before it, report the error.
697 if (CondInfo.FoundElse)
698 Diag(Tok, DiagID: diag::pp_err_else_after_else);
699
700 // Note that we've seen a #else in this conditional.
701 CondInfo.FoundElse = true;
702
703 // If the conditional is at the top level, and the #if block wasn't
704 // entered, enter the #else block now.
705 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
706 CondInfo.FoundNonSkip = true;
707 // Restore the value of LexingRawMode so that trailing comments
708 // are handled correctly.
709 CurPPLexer->LexingRawMode = false;
710 endLoc = CheckEndOfDirective(DirType: "else");
711 CurPPLexer->LexingRawMode = true;
712 if (Callbacks)
713 Callbacks->Else(Loc: Tok.getLocation(), IfLoc: CondInfo.IfLoc);
714 break;
715 } else {
716 DiscardUntilEndOfDirective(); // C99 6.10p4.
717 }
718 } else if (Sub == "lif") { // "elif".
719 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
720
721 if (!CondInfo.WasSkipping)
722 SkippingRangeState.endLexPass(Hashptr);
723
724 // If this is a #elif with a #else before it, report the error.
725 if (CondInfo.FoundElse)
726 Diag(Tok, DiagID: diag::pp_err_elif_after_else) << PED_Elif;
727
728 // If this is in a skipping block or if we're already handled this #if
729 // block, don't bother parsing the condition.
730 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
731 // FIXME: We should probably do at least some minimal parsing of the
732 // condition to verify that it is well-formed. The current state
733 // allows #elif* directives with completely malformed (or missing)
734 // conditions.
735 DiscardUntilEndOfDirective();
736 } else {
737 // Restore the value of LexingRawMode so that identifiers are
738 // looked up, etc, inside the #elif expression.
739 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
740 CurPPLexer->LexingRawMode = false;
741 IdentifierInfo *IfNDefMacro = nullptr;
742 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
743 // Stop if Lexer became invalid after hitting code completion token.
744 if (!CurPPLexer)
745 return;
746 const bool CondValue = DER.Conditional;
747 CurPPLexer->LexingRawMode = true;
748 if (Callbacks) {
749 Callbacks->Elif(
750 Loc: Tok.getLocation(), ConditionRange: DER.ExprRange,
751 ConditionValue: (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
752 IfLoc: CondInfo.IfLoc);
753 }
754 // If this condition is true, enter it!
755 if (CondValue) {
756 CondInfo.FoundNonSkip = true;
757 break;
758 }
759 }
760 } else if (Sub == "lifdef" || // "elifdef"
761 Sub == "lifndef") { // "elifndef"
762 bool IsElifDef = Sub == "lifdef";
763 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
764 Token DirectiveToken = Tok;
765
766 if (!CondInfo.WasSkipping)
767 SkippingRangeState.endLexPass(Hashptr);
768
769 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
770 // if this branch is in a skipping block.
771 unsigned DiagID;
772 if (LangOpts.CPlusPlus)
773 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
774 : diag::ext_cxx23_pp_directive;
775 else
776 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
777 : diag::ext_c23_pp_directive;
778 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
779
780 // If this is a #elif with a #else before it, report the error.
781 if (CondInfo.FoundElse)
782 Diag(Tok, DiagID: diag::pp_err_elif_after_else)
783 << (IsElifDef ? PED_Elifdef : PED_Elifndef);
784
785 // If this is in a skipping block or if we're already handled this #if
786 // block, don't bother parsing the condition.
787 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
788 // FIXME: We should probably do at least some minimal parsing of the
789 // condition to verify that it is well-formed. The current state
790 // allows #elif* directives with completely malformed (or missing)
791 // conditions.
792 DiscardUntilEndOfDirective();
793 } else {
794 // Restore the value of LexingRawMode so that identifiers are
795 // looked up, etc, inside the #elif[n]def expression.
796 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
797 CurPPLexer->LexingRawMode = false;
798 Token MacroNameTok;
799 ReadMacroName(MacroNameTok);
800 CurPPLexer->LexingRawMode = true;
801
802 // If the macro name token is tok::eod, there was an error that was
803 // already reported.
804 if (MacroNameTok.is(K: tok::eod)) {
805 // Skip code until we get to #endif. This helps with recovery by
806 // not emitting an error when the #endif is reached.
807 continue;
808 }
809
810 emitMacroExpansionWarnings(Identifier: MacroNameTok);
811
812 CheckEndOfDirective(DirType: IsElifDef ? "elifdef" : "elifndef");
813
814 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
815 auto MD = getMacroDefinition(II: MII);
816 MacroInfo *MI = MD.getMacroInfo();
817
818 if (Callbacks) {
819 if (IsElifDef) {
820 Callbacks->Elifdef(Loc: DirectiveToken.getLocation(), MacroNameTok,
821 MD);
822 } else {
823 Callbacks->Elifndef(Loc: DirectiveToken.getLocation(), MacroNameTok,
824 MD);
825 }
826 }
827 // If this condition is true, enter it!
828 if (static_cast<bool>(MI) == IsElifDef) {
829 CondInfo.FoundNonSkip = true;
830 break;
831 }
832 }
833 } else {
834 SuggestTypoedDirective(Tok, Directive);
835 }
836 } else {
837 SuggestTypoedDirective(Tok, Directive);
838 }
839
840 CurPPLexer->ParsingPreprocessorDirective = false;
841 // Restore comment saving mode.
842 if (CurLexer) CurLexer->resetExtendedTokenMode();
843 }
844
845 // Finally, if we are out of the conditional (saw an #endif or ran off the end
846 // of the file, just stop skipping and return to lexing whatever came after
847 // the #if block.
848 CurPPLexer->LexingRawMode = false;
849
850 // The last skipped range isn't actually skipped yet if it's truncated
851 // by the end of the preamble; we'll resume parsing after the preamble.
852 if (Callbacks && (Tok.isNot(K: tok::eof) || !isRecordingPreamble()))
853 Callbacks->SourceRangeSkipped(
854 Range: SourceRange(HashTokenLoc, endLoc.isValid()
855 ? endLoc
856 : CurPPLexer->getSourceLocation()),
857 EndifLoc: Tok.getLocation());
858}
859
860Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
861 bool AllowTextual) {
862 if (!SourceMgr.isInMainFile(Loc)) {
863 // Try to determine the module of the include directive.
864 // FIXME: Look into directly passing the FileEntry from LookupFile instead.
865 FileID IDOfIncl = SourceMgr.getFileID(SpellingLoc: SourceMgr.getExpansionLoc(Loc));
866 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(FID: IDOfIncl)) {
867 // The include comes from an included file.
868 return HeaderInfo.getModuleMap()
869 .findModuleForHeader(File: *EntryOfIncl, AllowTextual)
870 .getModule();
871 }
872 }
873
874 // This is either in the main file or not in a file at all. It belongs
875 // to the current module, if there is one.
876 return getLangOpts().CurrentModule.empty()
877 ? nullptr
878 : HeaderInfo.lookupModule(ModuleName: getLangOpts().CurrentModule, ImportLoc: Loc);
879}
880
881OptionalFileEntryRef
882Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
883 SourceLocation Loc) {
884 Module *IncM = getModuleForLocation(
885 Loc: IncLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
886
887 // Walk up through the include stack, looking through textual headers of M
888 // until we hit a non-textual header that we can #include. (We assume textual
889 // headers of a module with non-textual headers aren't meant to be used to
890 // import entities from the module.)
891 auto &SM = getSourceManager();
892 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
893 auto ID = SM.getFileID(SpellingLoc: SM.getExpansionLoc(Loc));
894 auto FE = SM.getFileEntryRefForID(FID: ID);
895 if (!FE)
896 break;
897
898 // We want to find all possible modules that might contain this header, so
899 // search all enclosing directories for module maps and load them.
900 HeaderInfo.hasModuleMap(Filename: FE->getName(), /*Root*/ nullptr,
901 IsSystem: SourceMgr.isInSystemHeader(Loc));
902
903 bool InPrivateHeader = false;
904 for (auto Header : HeaderInfo.findAllModulesForHeader(File: *FE)) {
905 if (!Header.isAccessibleFrom(M: IncM)) {
906 // It's in a private header; we can't #include it.
907 // FIXME: If there's a public header in some module that re-exports it,
908 // then we could suggest including that, but it's not clear that's the
909 // expected way to make this entity visible.
910 InPrivateHeader = true;
911 continue;
912 }
913
914 // Don't suggest explicitly excluded headers.
915 if (Header.getRole() == ModuleMap::ExcludedHeader)
916 continue;
917
918 // We'll suggest including textual headers below if they're
919 // include-guarded.
920 if (Header.getRole() & ModuleMap::TextualHeader)
921 continue;
922
923 // If we have a module import syntax, we shouldn't include a header to
924 // make a particular module visible. Let the caller know they should
925 // suggest an import instead.
926 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
927 return std::nullopt;
928
929 // If this is an accessible, non-textual header of M's top-level module
930 // that transitively includes the given location and makes the
931 // corresponding module visible, this is the thing to #include.
932 return *FE;
933 }
934
935 // FIXME: If we're bailing out due to a private header, we shouldn't suggest
936 // an import either.
937 if (InPrivateHeader)
938 return std::nullopt;
939
940 // If the header is includable and has an include guard, assume the
941 // intended way to expose its contents is by #include, not by importing a
942 // module that transitively includes it.
943 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(File: *FE))
944 return *FE;
945
946 Loc = SM.getIncludeLoc(FID: ID);
947 }
948
949 return std::nullopt;
950}
951
952OptionalFileEntryRef Preprocessor::LookupFile(
953 SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
954 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
955 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
956 SmallVectorImpl<char> *RelativePath,
957 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
958 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
959 ConstSearchDirIterator CurDirLocal = nullptr;
960 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
961
962 Module *RequestingModule = getModuleForLocation(
963 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
964
965 // If the header lookup mechanism may be relative to the current inclusion
966 // stack, record the parent #includes.
967 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
968 bool BuildSystemModule = false;
969 if (!FromDir && !FromFile) {
970 FileID FID = getCurrentFileLexer()->getFileID();
971 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
972
973 // If there is no file entry associated with this file, it must be the
974 // predefines buffer or the module includes buffer. Any other file is not
975 // lexed with a normal lexer, so it won't be scanned for preprocessor
976 // directives.
977 //
978 // If we have the predefines buffer, resolve #include references (which come
979 // from the -include command line argument) from the current working
980 // directory instead of relative to the main file.
981 //
982 // If we have the module includes buffer, resolve #include references (which
983 // come from header declarations in the module map) relative to the module
984 // map file.
985 if (!FileEnt) {
986 if (FID == SourceMgr.getMainFileID() && MainFileDir) {
987 auto IncludeDir =
988 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
989 FileName: Filename, Module: getCurrentModule())
990 ? HeaderInfo.getModuleMap().getBuiltinDir()
991 : MainFileDir;
992 Includers.push_back(Elt: std::make_pair(x: std::nullopt, y&: *IncludeDir));
993 BuildSystemModule = getCurrentModule()->IsSystem;
994 } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
995 FID: SourceMgr.getMainFileID()))) {
996 auto CWD = FileMgr.getOptionalDirectoryRef(DirName: ".");
997 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y&: *CWD));
998 }
999 } else {
1000 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1001 }
1002
1003 // MSVC searches the current include stack from top to bottom for
1004 // headers included by quoted include directives.
1005 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1006 if (LangOpts.MSVCCompat && !isAngled) {
1007 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1008 if (IsFileLexer(I: ISEntry))
1009 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1010 Includers.push_back(Elt: std::make_pair(x&: *FileEnt, y: FileEnt->getDir()));
1011 }
1012 }
1013 }
1014
1015 CurDir = CurDirLookup;
1016
1017 if (FromFile) {
1018 // We're supposed to start looking from after a particular file. Search
1019 // the include path until we find that file or run out of files.
1020 ConstSearchDirIterator TmpCurDir = CurDir;
1021 ConstSearchDirIterator TmpFromDir = nullptr;
1022 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1023 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir: TmpFromDir, CurDir: &TmpCurDir,
1024 Includers, SearchPath, RelativePath, RequestingModule,
1025 SuggestedModule, /*IsMapped=*/nullptr,
1026 /*IsFrameworkFound=*/nullptr, SkipCache)) {
1027 // Keep looking as if this file did a #include_next.
1028 TmpFromDir = TmpCurDir;
1029 ++TmpFromDir;
1030 if (&FE->getFileEntry() == FromFile) {
1031 // Found it.
1032 FromDir = TmpFromDir;
1033 CurDir = TmpCurDir;
1034 break;
1035 }
1036 }
1037 }
1038
1039 // Do a standard file entry lookup.
1040 OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1041 Filename, IncludeLoc: FilenameLoc, isAngled, FromDir, CurDir: &CurDir, Includers, SearchPath,
1042 RelativePath, RequestingModule, SuggestedModule, IsMapped,
1043 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1044 if (FE)
1045 return FE;
1046
1047 OptionalFileEntryRef CurFileEnt;
1048 // Otherwise, see if this is a subframework header. If so, this is relative
1049 // to one of the headers on the #include stack. Walk the list of the current
1050 // headers on the #include stack and pass them to HeaderInfo.
1051 if (IsFileLexer()) {
1052 if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1053 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1054 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1055 SuggestedModule)) {
1056 return FE;
1057 }
1058 }
1059 }
1060
1061 for (IncludeStackInfo &ISEntry : llvm::reverse(C&: IncludeMacroStack)) {
1062 if (IsFileLexer(I: ISEntry)) {
1063 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1064 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1065 Filename, ContextFileEnt: *CurFileEnt, SearchPath, RelativePath,
1066 RequestingModule, SuggestedModule)) {
1067 return FE;
1068 }
1069 }
1070 }
1071 }
1072
1073 // Otherwise, we really couldn't find the file.
1074 return std::nullopt;
1075}
1076
1077OptionalFileEntryRef
1078Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1079 const FileEntry *LookupFromFile) {
1080 FileManager &FM = this->getFileManager();
1081 if (llvm::sys::path::is_absolute(path: Filename)) {
1082 // lookup path or immediately fail
1083 llvm::Expected<FileEntryRef> ShouldBeEntry =
1084 FM.getFileRef(Filename, OpenFile);
1085 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1086 }
1087
1088 auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1089 StringRef StartingFrom, StringRef FileName,
1090 bool RemoveInitialFileComponentFromLookupPath) {
1091 llvm::sys::path::native(path: StartingFrom, result&: LookupPath);
1092 if (RemoveInitialFileComponentFromLookupPath)
1093 llvm::sys::path::remove_filename(path&: LookupPath);
1094 if (!LookupPath.empty() &&
1095 !llvm::sys::path::is_separator(value: LookupPath.back())) {
1096 LookupPath.push_back(Elt: llvm::sys::path::get_separator().front());
1097 }
1098 LookupPath.append(in_start: FileName.begin(), in_end: FileName.end());
1099 };
1100
1101 // Otherwise, it's search time!
1102 SmallString<512> LookupPath;
1103 // Non-angled lookup
1104 if (!isAngled) {
1105 if (LookupFromFile) {
1106 // Use file-based lookup.
1107 StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1108 if (!FullFileDir.empty()) {
1109 SeparateComponents(LookupPath, FullFileDir, Filename, true);
1110 llvm::Expected<FileEntryRef> ShouldBeEntry =
1111 FM.getFileRef(Filename: LookupPath, OpenFile);
1112 if (ShouldBeEntry)
1113 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1114 llvm::consumeError(Err: ShouldBeEntry.takeError());
1115 }
1116 }
1117
1118 // Otherwise, do working directory lookup.
1119 LookupPath.clear();
1120 auto MaybeWorkingDirEntry = FM.getDirectoryRef(DirName: ".");
1121 if (MaybeWorkingDirEntry) {
1122 DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1123 StringRef WorkingDir = WorkingDirEntry.getName();
1124 if (!WorkingDir.empty()) {
1125 SeparateComponents(LookupPath, WorkingDir, Filename, false);
1126 llvm::Expected<FileEntryRef> ShouldBeEntry =
1127 FM.getFileRef(Filename: LookupPath, OpenFile);
1128 if (ShouldBeEntry)
1129 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1130 llvm::consumeError(Err: ShouldBeEntry.takeError());
1131 }
1132 }
1133 }
1134
1135 for (const auto &Entry : PPOpts->EmbedEntries) {
1136 LookupPath.clear();
1137 SeparateComponents(LookupPath, Entry, Filename, false);
1138 llvm::Expected<FileEntryRef> ShouldBeEntry =
1139 FM.getFileRef(Filename: LookupPath, OpenFile);
1140 if (ShouldBeEntry)
1141 return llvm::expectedToOptional(E: std::move(ShouldBeEntry));
1142 llvm::consumeError(Err: ShouldBeEntry.takeError());
1143 }
1144 return std::nullopt;
1145}
1146
1147//===----------------------------------------------------------------------===//
1148// Preprocessor Directive Handling.
1149//===----------------------------------------------------------------------===//
1150
1151class Preprocessor::ResetMacroExpansionHelper {
1152public:
1153 ResetMacroExpansionHelper(Preprocessor *pp)
1154 : PP(pp), save(pp->DisableMacroExpansion) {
1155 if (pp->MacroExpansionInDirectivesOverride)
1156 pp->DisableMacroExpansion = false;
1157 }
1158
1159 ~ResetMacroExpansionHelper() {
1160 PP->DisableMacroExpansion = save;
1161 }
1162
1163private:
1164 Preprocessor *PP;
1165 bool save;
1166};
1167
1168/// Process a directive while looking for the through header or a #pragma
1169/// hdrstop. The following directives are handled:
1170/// #include (to check if it is the through header)
1171/// #define (to warn about macros that don't match the PCH)
1172/// #pragma (to check for pragma hdrstop).
1173/// All other directives are completely discarded.
1174void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1175 SourceLocation HashLoc) {
1176 if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1177 if (II->getPPKeywordID() == tok::pp_define) {
1178 return HandleDefineDirective(Tok&: Result,
1179 /*ImmediatelyAfterHeaderGuard=*/false);
1180 }
1181 if (SkippingUntilPCHThroughHeader &&
1182 II->getPPKeywordID() == tok::pp_include) {
1183 return HandleIncludeDirective(HashLoc, Tok&: Result);
1184 }
1185 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1186 Lex(Result);
1187 auto *II = Result.getIdentifierInfo();
1188 if (II && II->getName() == "hdrstop")
1189 return HandlePragmaHdrstop(Tok&: Result);
1190 }
1191 }
1192 DiscardUntilEndOfDirective();
1193}
1194
1195/// HandleDirective - This callback is invoked when the lexer sees a # token
1196/// at the start of a line. This consumes the directive, modifies the
1197/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1198/// read is the correct one.
1199void Preprocessor::HandleDirective(Token &Result) {
1200 // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1201
1202 // We just parsed a # character at the start of a line, so we're in directive
1203 // mode. Tell the lexer this so any newlines we see will be converted into an
1204 // EOD token (which terminates the directive).
1205 CurPPLexer->ParsingPreprocessorDirective = true;
1206 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1207
1208 bool ImmediatelyAfterTopLevelIfndef =
1209 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1210 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1211
1212 ++NumDirectives;
1213
1214 // We are about to read a token. For the multiple-include optimization FA to
1215 // work, we have to remember if we had read any tokens *before* this
1216 // pp-directive.
1217 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1218
1219 // Save the '#' token in case we need to return it later.
1220 Token SavedHash = Result;
1221
1222 // Read the next token, the directive flavor. This isn't expanded due to
1223 // C99 6.10.3p8.
1224 LexUnexpandedToken(Result);
1225
1226 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
1227 // #define A(x) #x
1228 // A(abc
1229 // #warning blah
1230 // def)
1231 // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1232 // not support this for #include-like directives, since that can result in
1233 // terrible diagnostics, and does not work in GCC.
1234 if (InMacroArgs) {
1235 if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1236 switch (II->getPPKeywordID()) {
1237 case tok::pp_include:
1238 case tok::pp_import:
1239 case tok::pp_include_next:
1240 case tok::pp___include_macros:
1241 case tok::pp_pragma:
1242 case tok::pp_embed:
1243 Diag(Tok: Result, DiagID: diag::err_embedded_directive) << II->getName();
1244 Diag(Tok: *ArgMacro, DiagID: diag::note_macro_expansion_here)
1245 << ArgMacro->getIdentifierInfo();
1246 DiscardUntilEndOfDirective();
1247 return;
1248 default:
1249 break;
1250 }
1251 }
1252 Diag(Tok: Result, DiagID: diag::ext_embedded_directive);
1253 }
1254
1255 // Temporarily enable macro expansion if set so
1256 // and reset to previous state when returning from this function.
1257 ResetMacroExpansionHelper helper(this);
1258
1259 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1260 return HandleSkippedDirectiveWhileUsingPCH(Result, HashLoc: SavedHash.getLocation());
1261
1262 switch (Result.getKind()) {
1263 case tok::eod:
1264 // Ignore the null directive with regards to the multiple-include
1265 // optimization, i.e. allow the null directive to appear outside of the
1266 // include guard and still enable the multiple-include optimization.
1267 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1268 return; // null directive.
1269 case tok::code_completion:
1270 setCodeCompletionReached();
1271 if (CodeComplete)
1272 CodeComplete->CodeCompleteDirective(
1273 InConditional: CurPPLexer->getConditionalStackDepth() > 0);
1274 return;
1275 case tok::numeric_constant: // # 7 GNU line marker directive.
1276 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1277 // directive. However do permit it in the predefines file, as we use line
1278 // markers to mark the builtin macros as being in a system header.
1279 if (getLangOpts().AsmPreprocessor &&
1280 SourceMgr.getFileID(SpellingLoc: SavedHash.getLocation()) != getPredefinesFileID())
1281 break;
1282 return HandleDigitDirective(Tok&: Result);
1283 default:
1284 IdentifierInfo *II = Result.getIdentifierInfo();
1285 if (!II) break; // Not an identifier.
1286
1287 // Ask what the preprocessor keyword ID is.
1288 switch (II->getPPKeywordID()) {
1289 default: break;
1290 // C99 6.10.1 - Conditional Inclusion.
1291 case tok::pp_if:
1292 return HandleIfDirective(IfToken&: Result, HashToken: SavedHash, ReadAnyTokensBeforeDirective);
1293 case tok::pp_ifdef:
1294 return HandleIfdefDirective(Result, HashToken: SavedHash, isIfndef: false,
1295 ReadAnyTokensBeforeDirective: true /*not valid for miopt*/);
1296 case tok::pp_ifndef:
1297 return HandleIfdefDirective(Result, HashToken: SavedHash, isIfndef: true,
1298 ReadAnyTokensBeforeDirective);
1299 case tok::pp_elif:
1300 case tok::pp_elifdef:
1301 case tok::pp_elifndef:
1302 return HandleElifFamilyDirective(ElifToken&: Result, HashToken: SavedHash, Kind: II->getPPKeywordID());
1303
1304 case tok::pp_else:
1305 return HandleElseDirective(Result, HashToken: SavedHash);
1306 case tok::pp_endif:
1307 return HandleEndifDirective(EndifToken&: Result);
1308
1309 // C99 6.10.2 - Source File Inclusion.
1310 case tok::pp_include:
1311 // Handle #include.
1312 return HandleIncludeDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1313 case tok::pp___include_macros:
1314 // Handle -imacros.
1315 return HandleIncludeMacrosDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1316
1317 // C99 6.10.3 - Macro Replacement.
1318 case tok::pp_define:
1319 return HandleDefineDirective(Tok&: Result, ImmediatelyAfterHeaderGuard: ImmediatelyAfterTopLevelIfndef);
1320 case tok::pp_undef:
1321 return HandleUndefDirective();
1322
1323 // C99 6.10.4 - Line Control.
1324 case tok::pp_line:
1325 return HandleLineDirective();
1326
1327 // C99 6.10.5 - Error Directive.
1328 case tok::pp_error:
1329 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: false);
1330
1331 // C99 6.10.6 - Pragma Directive.
1332 case tok::pp_pragma:
1333 return HandlePragmaDirective(Introducer: {.Kind: PIK_HashPragma, .Loc: SavedHash.getLocation()});
1334
1335 // GNU Extensions.
1336 case tok::pp_import:
1337 return HandleImportDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1338 case tok::pp_include_next:
1339 return HandleIncludeNextDirective(HashLoc: SavedHash.getLocation(), Tok&: Result);
1340
1341 case tok::pp_warning:
1342 if (LangOpts.CPlusPlus)
1343 Diag(Tok: Result, DiagID: LangOpts.CPlusPlus23
1344 ? diag::warn_cxx23_compat_warning_directive
1345 : diag::ext_pp_warning_directive)
1346 << /*C++23*/ 1;
1347 else
1348 Diag(Tok: Result, DiagID: LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1349 : diag::ext_pp_warning_directive)
1350 << /*C23*/ 0;
1351
1352 return HandleUserDiagnosticDirective(Tok&: Result, isWarning: true);
1353 case tok::pp_ident:
1354 return HandleIdentSCCSDirective(Tok&: Result);
1355 case tok::pp_sccs:
1356 return HandleIdentSCCSDirective(Tok&: Result);
1357 case tok::pp_embed:
1358 return HandleEmbedDirective(HashLoc: SavedHash.getLocation(), Tok&: Result,
1359 LookupFromFile: getCurrentFileLexer()
1360 ? *getCurrentFileLexer()->getFileEntry()
1361 : static_cast<FileEntry *>(nullptr));
1362 case tok::pp_assert:
1363 //isExtension = true; // FIXME: implement #assert
1364 break;
1365 case tok::pp_unassert:
1366 //isExtension = true; // FIXME: implement #unassert
1367 break;
1368
1369 case tok::pp___public_macro:
1370 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1371 return HandleMacroPublicDirective(Tok&: Result);
1372 break;
1373
1374 case tok::pp___private_macro:
1375 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1376 return HandleMacroPrivateDirective();
1377 break;
1378 }
1379 break;
1380 }
1381
1382 // If this is a .S file, treat unknown # directives as non-preprocessor
1383 // directives. This is important because # may be a comment or introduce
1384 // various pseudo-ops. Just return the # token and push back the following
1385 // token to be lexed next time.
1386 if (getLangOpts().AsmPreprocessor) {
1387 auto Toks = std::make_unique<Token[]>(num: 2);
1388 // Return the # and the token after it.
1389 Toks[0] = SavedHash;
1390 Toks[1] = Result;
1391
1392 // If the second token is a hashhash token, then we need to translate it to
1393 // unknown so the token lexer doesn't try to perform token pasting.
1394 if (Result.is(K: tok::hashhash))
1395 Toks[1].setKind(tok::unknown);
1396
1397 // Enter this token stream so that we re-lex the tokens. Make sure to
1398 // enable macro expansion, in case the token after the # is an identifier
1399 // that is expanded.
1400 EnterTokenStream(Toks: std::move(Toks), NumToks: 2, DisableMacroExpansion: false, /*IsReinject*/false);
1401 return;
1402 }
1403
1404 // If we reached here, the preprocessing token is not valid!
1405 // Start suggesting if a similar directive found.
1406 Diag(Tok: Result, DiagID: diag::err_pp_invalid_directive) << 0;
1407
1408 // Read the rest of the PP line.
1409 DiscardUntilEndOfDirective();
1410
1411 // Okay, we're done parsing the directive.
1412}
1413
1414/// GetLineValue - Convert a numeric token into an unsigned value, emitting
1415/// Diagnostic DiagID if it is invalid, and returning the value in Val.
1416static bool GetLineValue(Token &DigitTok, unsigned &Val,
1417 unsigned DiagID, Preprocessor &PP,
1418 bool IsGNULineDirective=false) {
1419 if (DigitTok.isNot(K: tok::numeric_constant)) {
1420 PP.Diag(Tok: DigitTok, DiagID);
1421
1422 if (DigitTok.isNot(K: tok::eod))
1423 PP.DiscardUntilEndOfDirective();
1424 return true;
1425 }
1426
1427 SmallString<64> IntegerBuffer;
1428 IntegerBuffer.resize(N: DigitTok.getLength());
1429 const char *DigitTokBegin = &IntegerBuffer[0];
1430 bool Invalid = false;
1431 unsigned ActualLength = PP.getSpelling(Tok: DigitTok, Buffer&: DigitTokBegin, Invalid: &Invalid);
1432 if (Invalid)
1433 return true;
1434
1435 // Verify that we have a simple digit-sequence, and compute the value. This
1436 // is always a simple digit string computed in decimal, so we do this manually
1437 // here.
1438 Val = 0;
1439 for (unsigned i = 0; i != ActualLength; ++i) {
1440 // C++1y [lex.fcon]p1:
1441 // Optional separating single quotes in a digit-sequence are ignored
1442 if (DigitTokBegin[i] == '\'')
1443 continue;
1444
1445 if (!isDigit(c: DigitTokBegin[i])) {
1446 PP.Diag(Loc: PP.AdvanceToTokenCharacter(TokStart: DigitTok.getLocation(), Char: i),
1447 DiagID: diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1448 PP.DiscardUntilEndOfDirective();
1449 return true;
1450 }
1451
1452 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1453 if (NextVal < Val) { // overflow.
1454 PP.Diag(Tok: DigitTok, DiagID);
1455 PP.DiscardUntilEndOfDirective();
1456 return true;
1457 }
1458 Val = NextVal;
1459 }
1460
1461 if (DigitTokBegin[0] == '0' && Val)
1462 PP.Diag(Loc: DigitTok.getLocation(), DiagID: diag::warn_pp_line_decimal)
1463 << IsGNULineDirective;
1464
1465 return false;
1466}
1467
1468/// Handle a \#line directive: C99 6.10.4.
1469///
1470/// The two acceptable forms are:
1471/// \verbatim
1472/// # line digit-sequence
1473/// # line digit-sequence "s-char-sequence"
1474/// \endverbatim
1475void Preprocessor::HandleLineDirective() {
1476 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are
1477 // expanded.
1478 Token DigitTok;
1479 Lex(Result&: DigitTok);
1480
1481 // Validate the number and convert it to an unsigned.
1482 unsigned LineNo;
1483 if (GetLineValue(DigitTok, Val&: LineNo, DiagID: diag::err_pp_line_requires_integer,PP&: *this))
1484 return;
1485
1486 if (LineNo == 0)
1487 Diag(Tok: DigitTok, DiagID: diag::ext_pp_line_zero);
1488
1489 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1490 // number greater than 2147483647". C90 requires that the line # be <= 32767.
1491 unsigned LineLimit = 32768U;
1492 if (LangOpts.C99 || LangOpts.CPlusPlus11)
1493 LineLimit = 2147483648U;
1494 if (LineNo >= LineLimit)
1495 Diag(Tok: DigitTok, DiagID: diag::ext_pp_line_too_big) << LineLimit;
1496 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1497 Diag(Tok: DigitTok, DiagID: diag::warn_cxx98_compat_pp_line_too_big);
1498
1499 int FilenameID = -1;
1500 Token StrTok;
1501 Lex(Result&: StrTok);
1502
1503 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1504 // string followed by eod.
1505 if (StrTok.is(K: tok::eod))
1506 ; // ok
1507 else if (StrTok.isNot(K: tok::string_literal)) {
1508 Diag(Tok: StrTok, DiagID: diag::err_pp_line_invalid_filename);
1509 DiscardUntilEndOfDirective();
1510 return;
1511 } else if (StrTok.hasUDSuffix()) {
1512 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1513 DiscardUntilEndOfDirective();
1514 return;
1515 } else {
1516 // Parse and validate the string, converting it into a unique ID.
1517 StringLiteralParser Literal(StrTok, *this);
1518 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1519 if (Literal.hadError) {
1520 DiscardUntilEndOfDirective();
1521 return;
1522 }
1523 if (Literal.Pascal) {
1524 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1525 DiscardUntilEndOfDirective();
1526 return;
1527 }
1528 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1529
1530 // Verify that there is nothing after the string, other than EOD. Because
1531 // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1532 CheckEndOfDirective(DirType: "line", EnableMacros: true);
1533 }
1534
1535 // Take the file kind of the file containing the #line directive. #line
1536 // directives are often used for generated sources from the same codebase, so
1537 // the new file should generally be classified the same way as the current
1538 // file. This is visible in GCC's pre-processed output, which rewrites #line
1539 // to GNU line markers.
1540 SrcMgr::CharacteristicKind FileKind =
1541 SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1542
1543 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry: false,
1544 IsFileExit: false, FileKind);
1545
1546 if (Callbacks)
1547 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(),
1548 Reason: PPCallbacks::RenameFile, FileType: FileKind);
1549}
1550
1551/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1552/// marker directive.
1553static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1554 SrcMgr::CharacteristicKind &FileKind,
1555 Preprocessor &PP) {
1556 unsigned FlagVal;
1557 Token FlagTok;
1558 PP.Lex(Result&: FlagTok);
1559 if (FlagTok.is(K: tok::eod)) return false;
1560 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag, PP))
1561 return true;
1562
1563 if (FlagVal == 1) {
1564 IsFileEntry = true;
1565
1566 PP.Lex(Result&: FlagTok);
1567 if (FlagTok.is(K: tok::eod)) return false;
1568 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag,PP))
1569 return true;
1570 } else if (FlagVal == 2) {
1571 IsFileExit = true;
1572
1573 SourceManager &SM = PP.getSourceManager();
1574 // If we are leaving the current presumed file, check to make sure the
1575 // presumed include stack isn't empty!
1576 FileID CurFileID =
1577 SM.getDecomposedExpansionLoc(Loc: FlagTok.getLocation()).first;
1578 PresumedLoc PLoc = SM.getPresumedLoc(Loc: FlagTok.getLocation());
1579 if (PLoc.isInvalid())
1580 return true;
1581
1582 // If there is no include loc (main file) or if the include loc is in a
1583 // different physical file, then we aren't in a "1" line marker flag region.
1584 SourceLocation IncLoc = PLoc.getIncludeLoc();
1585 if (IncLoc.isInvalid() ||
1586 SM.getDecomposedExpansionLoc(Loc: IncLoc).first != CurFileID) {
1587 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_pop);
1588 PP.DiscardUntilEndOfDirective();
1589 return true;
1590 }
1591
1592 PP.Lex(Result&: FlagTok);
1593 if (FlagTok.is(K: tok::eod)) return false;
1594 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag,PP))
1595 return true;
1596 }
1597
1598 // We must have 3 if there are still flags.
1599 if (FlagVal != 3) {
1600 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1601 PP.DiscardUntilEndOfDirective();
1602 return true;
1603 }
1604
1605 FileKind = SrcMgr::C_System;
1606
1607 PP.Lex(Result&: FlagTok);
1608 if (FlagTok.is(K: tok::eod)) return false;
1609 if (GetLineValue(DigitTok&: FlagTok, Val&: FlagVal, DiagID: diag::err_pp_linemarker_invalid_flag, PP))
1610 return true;
1611
1612 // We must have 4 if there is yet another flag.
1613 if (FlagVal != 4) {
1614 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1615 PP.DiscardUntilEndOfDirective();
1616 return true;
1617 }
1618
1619 FileKind = SrcMgr::C_ExternCSystem;
1620
1621 PP.Lex(Result&: FlagTok);
1622 if (FlagTok.is(K: tok::eod)) return false;
1623
1624 // There are no more valid flags here.
1625 PP.Diag(Tok: FlagTok, DiagID: diag::err_pp_linemarker_invalid_flag);
1626 PP.DiscardUntilEndOfDirective();
1627 return true;
1628}
1629
1630/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1631/// one of the following forms:
1632///
1633/// # 42
1634/// # 42 "file" ('1' | '2')?
1635/// # 42 "file" ('1' | '2')? '3' '4'?
1636///
1637void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1638 // Validate the number and convert it to an unsigned. GNU does not have a
1639 // line # limit other than it fit in 32-bits.
1640 unsigned LineNo;
1641 if (GetLineValue(DigitTok, Val&: LineNo, DiagID: diag::err_pp_linemarker_requires_integer,
1642 PP&: *this, IsGNULineDirective: true))
1643 return;
1644
1645 Token StrTok;
1646 Lex(Result&: StrTok);
1647
1648 bool IsFileEntry = false, IsFileExit = false;
1649 int FilenameID = -1;
1650 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1651
1652 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1653 // string followed by eod.
1654 if (StrTok.is(K: tok::eod)) {
1655 Diag(Tok: StrTok, DiagID: diag::ext_pp_gnu_line_directive);
1656 // Treat this like "#line NN", which doesn't change file characteristics.
1657 FileKind = SourceMgr.getFileCharacteristic(Loc: DigitTok.getLocation());
1658 } else if (StrTok.isNot(K: tok::string_literal)) {
1659 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1660 DiscardUntilEndOfDirective();
1661 return;
1662 } else if (StrTok.hasUDSuffix()) {
1663 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1664 DiscardUntilEndOfDirective();
1665 return;
1666 } else {
1667 // Parse and validate the string, converting it into a unique ID.
1668 StringLiteralParser Literal(StrTok, *this);
1669 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1670 if (Literal.hadError) {
1671 DiscardUntilEndOfDirective();
1672 return;
1673 }
1674 if (Literal.Pascal) {
1675 Diag(Tok: StrTok, DiagID: diag::err_pp_linemarker_invalid_filename);
1676 DiscardUntilEndOfDirective();
1677 return;
1678 }
1679
1680 // If a filename was present, read any flags that are present.
1681 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, PP&: *this))
1682 return;
1683 if (!SourceMgr.isWrittenInBuiltinFile(Loc: DigitTok.getLocation()) &&
1684 !SourceMgr.isWrittenInCommandLineFile(Loc: DigitTok.getLocation()))
1685 Diag(Tok: StrTok, DiagID: diag::ext_pp_gnu_line_directive);
1686
1687 // Exiting to an empty string means pop to the including file, so leave
1688 // FilenameID as -1 in that case.
1689 if (!(IsFileExit && Literal.GetString().empty()))
1690 FilenameID = SourceMgr.getLineTableFilenameID(Str: Literal.GetString());
1691 }
1692
1693 // Create a line note with this information.
1694 SourceMgr.AddLineNote(Loc: DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1695 IsFileExit, FileKind);
1696
1697 // If the preprocessor has callbacks installed, notify them of the #line
1698 // change. This is used so that the line marker comes out in -E mode for
1699 // example.
1700 if (Callbacks) {
1701 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1702 if (IsFileEntry)
1703 Reason = PPCallbacks::EnterFile;
1704 else if (IsFileExit)
1705 Reason = PPCallbacks::ExitFile;
1706
1707 Callbacks->FileChanged(Loc: CurPPLexer->getSourceLocation(), Reason, FileType: FileKind);
1708 }
1709}
1710
1711/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1712///
1713void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1714 bool isWarning) {
1715 // Read the rest of the line raw. We do this because we don't want macros
1716 // to be expanded and we don't require that the tokens be valid preprocessing
1717 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
1718 // collapse multiple consecutive white space between tokens, but this isn't
1719 // specified by the standard.
1720 SmallString<128> Message;
1721 CurLexer->ReadToEndOfLine(Result: &Message);
1722
1723 // Find the first non-whitespace character, so that we can make the
1724 // diagnostic more succinct.
1725 StringRef Msg = Message.str().ltrim(Char: ' ');
1726
1727 if (isWarning)
1728 Diag(Tok, DiagID: diag::pp_hash_warning) << Msg;
1729 else
1730 Diag(Tok, DiagID: diag::err_pp_hash_error) << Msg;
1731}
1732
1733/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1734///
1735void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1736 // Yes, this directive is an extension.
1737 Diag(Tok, DiagID: diag::ext_pp_ident_directive);
1738
1739 // Read the string argument.
1740 Token StrTok;
1741 Lex(Result&: StrTok);
1742
1743 // If the token kind isn't a string, it's a malformed directive.
1744 if (StrTok.isNot(K: tok::string_literal) &&
1745 StrTok.isNot(K: tok::wide_string_literal)) {
1746 Diag(Tok: StrTok, DiagID: diag::err_pp_malformed_ident);
1747 if (StrTok.isNot(K: tok::eod))
1748 DiscardUntilEndOfDirective();
1749 return;
1750 }
1751
1752 if (StrTok.hasUDSuffix()) {
1753 Diag(Tok: StrTok, DiagID: diag::err_invalid_string_udl);
1754 DiscardUntilEndOfDirective();
1755 return;
1756 }
1757
1758 // Verify that there is nothing after the string, other than EOD.
1759 CheckEndOfDirective(DirType: "ident");
1760
1761 if (Callbacks) {
1762 bool Invalid = false;
1763 std::string Str = getSpelling(Tok: StrTok, Invalid: &Invalid);
1764 if (!Invalid)
1765 Callbacks->Ident(Loc: Tok.getLocation(), str: Str);
1766 }
1767}
1768
1769/// Handle a #public directive.
1770void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1771 Token MacroNameTok;
1772 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1773
1774 // Error reading macro name? If so, diagnostic already issued.
1775 if (MacroNameTok.is(K: tok::eod))
1776 return;
1777
1778 // Check to see if this is the last token on the #__public_macro line.
1779 CheckEndOfDirective(DirType: "__public_macro");
1780
1781 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1782 // Okay, we finally have a valid identifier to undef.
1783 MacroDirective *MD = getLocalMacroDirective(II);
1784
1785 // If the macro is not defined, this is an error.
1786 if (!MD) {
1787 Diag(Tok: MacroNameTok, DiagID: diag::err_pp_visibility_non_macro) << II;
1788 return;
1789 }
1790
1791 // Note that this macro has now been exported.
1792 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1793 Loc: MacroNameTok.getLocation(), /*isPublic=*/true));
1794}
1795
1796/// Handle a #private directive.
1797void Preprocessor::HandleMacroPrivateDirective() {
1798 Token MacroNameTok;
1799 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
1800
1801 // Error reading macro name? If so, diagnostic already issued.
1802 if (MacroNameTok.is(K: tok::eod))
1803 return;
1804
1805 // Check to see if this is the last token on the #__private_macro line.
1806 CheckEndOfDirective(DirType: "__private_macro");
1807
1808 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1809 // Okay, we finally have a valid identifier to undef.
1810 MacroDirective *MD = getLocalMacroDirective(II);
1811
1812 // If the macro is not defined, this is an error.
1813 if (!MD) {
1814 Diag(Tok: MacroNameTok, DiagID: diag::err_pp_visibility_non_macro) << II;
1815 return;
1816 }
1817
1818 // Note that this macro has now been marked private.
1819 appendMacroDirective(II, MD: AllocateVisibilityMacroDirective(
1820 Loc: MacroNameTok.getLocation(), /*isPublic=*/false));
1821}
1822
1823//===----------------------------------------------------------------------===//
1824// Preprocessor Include Directive Handling.
1825//===----------------------------------------------------------------------===//
1826
1827/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1828/// checked and spelled filename, e.g. as an operand of \#include. This returns
1829/// true if the input filename was in <>'s or false if it were in ""'s. The
1830/// caller is expected to provide a buffer that is large enough to hold the
1831/// spelling of the filename, but is also expected to handle the case when
1832/// this method decides to use a different buffer.
1833bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1834 StringRef &Buffer) {
1835 // Get the text form of the filename.
1836 assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1837
1838 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1839 // C++20 [lex.header]/2:
1840 //
1841 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1842 // in C: behavior is undefined
1843 // in C++: program is conditionally-supported with implementation-defined
1844 // semantics
1845
1846 // Make sure the filename is <x> or "x".
1847 bool isAngled;
1848 if (Buffer[0] == '<') {
1849 if (Buffer.back() != '>') {
1850 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1851 Buffer = StringRef();
1852 return true;
1853 }
1854 isAngled = true;
1855 } else if (Buffer[0] == '"') {
1856 if (Buffer.back() != '"') {
1857 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1858 Buffer = StringRef();
1859 return true;
1860 }
1861 isAngled = false;
1862 } else {
1863 Diag(Loc, DiagID: diag::err_pp_expects_filename);
1864 Buffer = StringRef();
1865 return true;
1866 }
1867
1868 // Diagnose #include "" as invalid.
1869 if (Buffer.size() <= 2) {
1870 Diag(Loc, DiagID: diag::err_pp_empty_filename);
1871 Buffer = StringRef();
1872 return true;
1873 }
1874
1875 // Skip the brackets.
1876 Buffer = Buffer.substr(Start: 1, N: Buffer.size()-2);
1877 return isAngled;
1878}
1879
1880/// Push a token onto the token stream containing an annotation.
1881void Preprocessor::EnterAnnotationToken(SourceRange Range,
1882 tok::TokenKind Kind,
1883 void *AnnotationVal) {
1884 // FIXME: Produce this as the current token directly, rather than
1885 // allocating a new token for it.
1886 auto Tok = std::make_unique<Token[]>(num: 1);
1887 Tok[0].startToken();
1888 Tok[0].setKind(Kind);
1889 Tok[0].setLocation(Range.getBegin());
1890 Tok[0].setAnnotationEndLoc(Range.getEnd());
1891 Tok[0].setAnnotationValue(AnnotationVal);
1892 EnterTokenStream(Toks: std::move(Tok), NumToks: 1, DisableMacroExpansion: true, /*IsReinject*/ false);
1893}
1894
1895/// Produce a diagnostic informing the user that a #include or similar
1896/// was implicitly treated as a module import.
1897static void diagnoseAutoModuleImport(
1898 Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1899 ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1900 SourceLocation PathEnd) {
1901 SmallString<128> PathString;
1902 for (size_t I = 0, N = Path.size(); I != N; ++I) {
1903 if (I)
1904 PathString += '.';
1905 PathString += Path[I].first->getName();
1906 }
1907
1908 int IncludeKind = 0;
1909 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1910 case tok::pp_include:
1911 IncludeKind = 0;
1912 break;
1913
1914 case tok::pp_import:
1915 IncludeKind = 1;
1916 break;
1917
1918 case tok::pp_include_next:
1919 IncludeKind = 2;
1920 break;
1921
1922 case tok::pp___include_macros:
1923 IncludeKind = 3;
1924 break;
1925
1926 default:
1927 llvm_unreachable("unknown include directive kind");
1928 }
1929
1930 PP.Diag(Loc: HashLoc, DiagID: diag::remark_pp_include_directive_modular_translation)
1931 << IncludeKind << PathString;
1932}
1933
1934// Given a vector of path components and a string containing the real
1935// path to the file, build a properly-cased replacement in the vector,
1936// and return true if the replacement should be suggested.
1937static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1938 StringRef RealPathName,
1939 llvm::sys::path::Style Separator) {
1940 auto RealPathComponentIter = llvm::sys::path::rbegin(path: RealPathName);
1941 auto RealPathComponentEnd = llvm::sys::path::rend(path: RealPathName);
1942 int Cnt = 0;
1943 bool SuggestReplacement = false;
1944
1945 auto IsSep = [Separator](StringRef Component) {
1946 return Component.size() == 1 &&
1947 llvm::sys::path::is_separator(value: Component[0], style: Separator);
1948 };
1949
1950 // Below is a best-effort to handle ".." in paths. It is admittedly
1951 // not 100% correct in the presence of symlinks.
1952 for (auto &Component : llvm::reverse(C&: Components)) {
1953 if ("." == Component) {
1954 } else if (".." == Component) {
1955 ++Cnt;
1956 } else if (Cnt) {
1957 --Cnt;
1958 } else if (RealPathComponentIter != RealPathComponentEnd) {
1959 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1960 Component != *RealPathComponentIter) {
1961 // If these non-separator path components differ by more than just case,
1962 // then we may be looking at symlinked paths. Bail on this diagnostic to
1963 // avoid noisy false positives.
1964 SuggestReplacement =
1965 RealPathComponentIter->equals_insensitive(RHS: Component);
1966 if (!SuggestReplacement)
1967 break;
1968 Component = *RealPathComponentIter;
1969 }
1970 ++RealPathComponentIter;
1971 }
1972 }
1973 return SuggestReplacement;
1974}
1975
1976bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1977 const TargetInfo &TargetInfo,
1978 const Module &M,
1979 DiagnosticsEngine &Diags) {
1980 Module::Requirement Requirement;
1981 Module::UnresolvedHeaderDirective MissingHeader;
1982 Module *ShadowingModule = nullptr;
1983 if (M.isAvailable(LangOpts, Target: TargetInfo, Req&: Requirement, MissingHeader,
1984 ShadowingModule))
1985 return false;
1986
1987 if (MissingHeader.FileNameLoc.isValid()) {
1988 Diags.Report(Loc: MissingHeader.FileNameLoc, DiagID: diag::err_module_header_missing)
1989 << MissingHeader.IsUmbrella << MissingHeader.FileName;
1990 } else if (ShadowingModule) {
1991 Diags.Report(Loc: M.DefinitionLoc, DiagID: diag::err_module_shadowed) << M.Name;
1992 Diags.Report(Loc: ShadowingModule->DefinitionLoc,
1993 DiagID: diag::note_previous_definition);
1994 } else {
1995 // FIXME: Track the location at which the requirement was specified, and
1996 // use it here.
1997 Diags.Report(Loc: M.DefinitionLoc, DiagID: diag::err_module_unavailable)
1998 << M.getFullModuleName() << Requirement.RequiredState
1999 << Requirement.FeatureName;
2000 }
2001 return true;
2002}
2003
2004std::pair<ConstSearchDirIterator, const FileEntry *>
2005Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2006 // #include_next is like #include, except that we start searching after
2007 // the current found directory. If we can't do this, issue a
2008 // diagnostic.
2009 ConstSearchDirIterator Lookup = CurDirLookup;
2010 const FileEntry *LookupFromFile = nullptr;
2011
2012 if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2013 // If the main file is a header, then it's either for PCH/AST generation,
2014 // or libclang opened it. Either way, handle it as a normal include below
2015 // and do not complain about include_next.
2016 } else if (isInPrimaryFile()) {
2017 Lookup = nullptr;
2018 Diag(Tok: IncludeNextTok, DiagID: diag::pp_include_next_in_primary);
2019 } else if (CurLexerSubmodule) {
2020 // Start looking up in the directory *after* the one in which the current
2021 // file would be found, if any.
2022 assert(CurPPLexer && "#include_next directive in macro?");
2023 if (auto FE = CurPPLexer->getFileEntry())
2024 LookupFromFile = *FE;
2025 Lookup = nullptr;
2026 } else if (!Lookup) {
2027 // The current file was not found by walking the include path. Either it
2028 // is the primary file (handled above), or it was found by absolute path,
2029 // or it was found relative to such a file.
2030 // FIXME: Track enough information so we know which case we're in.
2031 Diag(Tok: IncludeNextTok, DiagID: diag::pp_include_next_absolute_path);
2032 } else {
2033 // Start looking up in the next directory.
2034 ++Lookup;
2035 }
2036
2037 return {Lookup, LookupFromFile};
2038}
2039
2040/// HandleIncludeDirective - The "\#include" tokens have just been read, read
2041/// the file to be included from the lexer, then include it! This is a common
2042/// routine with functionality shared between \#include, \#include_next and
2043/// \#import. LookupFrom is set when this is a \#include_next directive, it
2044/// specifies the file to start searching from.
2045void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2046 Token &IncludeTok,
2047 ConstSearchDirIterator LookupFrom,
2048 const FileEntry *LookupFromFile) {
2049 Token FilenameTok;
2050 if (LexHeaderName(Result&: FilenameTok))
2051 return;
2052
2053 if (FilenameTok.isNot(K: tok::header_name)) {
2054 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_pp_expects_filename);
2055 if (FilenameTok.isNot(K: tok::eod))
2056 DiscardUntilEndOfDirective();
2057 return;
2058 }
2059
2060 // Verify that there is nothing after the filename, other than EOD. Note
2061 // that we allow macros that expand to nothing after the filename, because
2062 // this falls into the category of "#include pp-tokens new-line" specified
2063 // in C99 6.10.2p4.
2064 SourceLocation EndLoc =
2065 CheckEndOfDirective(DirType: IncludeTok.getIdentifierInfo()->getNameStart(), EnableMacros: true);
2066
2067 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2068 EndLoc, LookupFrom, LookupFromFile);
2069 switch (Action.Kind) {
2070 case ImportAction::None:
2071 case ImportAction::SkippedModuleImport:
2072 break;
2073 case ImportAction::ModuleBegin:
2074 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2075 Kind: tok::annot_module_begin, AnnotationVal: Action.ModuleForHeader);
2076 break;
2077 case ImportAction::HeaderUnitImport:
2078 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc), Kind: tok::annot_header_unit,
2079 AnnotationVal: Action.ModuleForHeader);
2080 break;
2081 case ImportAction::ModuleImport:
2082 EnterAnnotationToken(Range: SourceRange(HashLoc, EndLoc),
2083 Kind: tok::annot_module_include, AnnotationVal: Action.ModuleForHeader);
2084 break;
2085 case ImportAction::Failure:
2086 assert(TheModuleLoader.HadFatalFailure &&
2087 "This should be an early exit only to a fatal error");
2088 TheModuleLoader.HadFatalFailure = true;
2089 IncludeTok.setKind(tok::eof);
2090 CurLexer->cutOffLexing();
2091 return;
2092 }
2093}
2094
2095OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2096 ConstSearchDirIterator *CurDir, StringRef &Filename,
2097 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2098 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2099 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2100 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2101 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2102 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2103 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2104 if (LangOpts.AsmPreprocessor)
2105 return;
2106
2107 Module *RequestingModule = getModuleForLocation(
2108 Loc: FilenameLoc, AllowTextual: LangOpts.ModulesValidateTextualHeaderIncludes);
2109 bool RequestingModuleIsModuleInterface =
2110 !SourceMgr.isInMainFile(Loc: FilenameLoc);
2111
2112 HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2113 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2114 Filename, File: FE);
2115 };
2116
2117 OptionalFileEntryRef File = LookupFile(
2118 FilenameLoc, Filename: LookupFilename, isAngled, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2119 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2120 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped, IsFrameworkFound: &IsFrameworkFound);
2121 if (File) {
2122 DiagnoseHeaderInclusion(*File);
2123 return File;
2124 }
2125
2126 // Give the clients a chance to silently skip this include.
2127 if (Callbacks && Callbacks->FileNotFound(FileName: Filename))
2128 return std::nullopt;
2129
2130 if (SuppressIncludeNotFoundError)
2131 return std::nullopt;
2132
2133 // If the file could not be located and it was included via angle
2134 // brackets, we can attempt a lookup as though it were a quoted path to
2135 // provide the user with a possible fixit.
2136 if (isAngled) {
2137 OptionalFileEntryRef File = LookupFile(
2138 FilenameLoc, Filename: LookupFilename, isAngled: false, FromDir: LookupFrom, FromFile: LookupFromFile, CurDirArg: CurDir,
2139 SearchPath: Callbacks ? &SearchPath : nullptr, RelativePath: Callbacks ? &RelativePath : nullptr,
2140 SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2141 /*IsFrameworkFound=*/nullptr);
2142 if (File) {
2143 DiagnoseHeaderInclusion(*File);
2144 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found_angled_include_not_fatal)
2145 << Filename << IsImportDecl
2146 << FixItHint::CreateReplacement(RemoveRange: FilenameRange,
2147 Code: "\"" + Filename.str() + "\"");
2148 return File;
2149 }
2150 }
2151
2152 // Check for likely typos due to leading or trailing non-isAlphanumeric
2153 // characters
2154 StringRef OriginalFilename = Filename;
2155 if (LangOpts.SpellChecking) {
2156 // A heuristic to correct a typo file name by removing leading and
2157 // trailing non-isAlphanumeric characters.
2158 auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2159 Filename = Filename.drop_until(F: isAlphanumeric);
2160 while (!Filename.empty() && !isAlphanumeric(c: Filename.back())) {
2161 Filename = Filename.drop_back();
2162 }
2163 return Filename;
2164 };
2165 StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2166 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2167
2168 OptionalFileEntryRef File = LookupFile(
2169 FilenameLoc, Filename: TypoCorrectionLookupName, isAngled, FromDir: LookupFrom,
2170 FromFile: LookupFromFile, CurDirArg: CurDir, SearchPath: Callbacks ? &SearchPath : nullptr,
2171 RelativePath: Callbacks ? &RelativePath : nullptr, SuggestedModule: &SuggestedModule, IsMapped: &IsMapped,
2172 /*IsFrameworkFound=*/nullptr);
2173 if (File) {
2174 DiagnoseHeaderInclusion(*File);
2175 auto Hint =
2176 isAngled ? FixItHint::CreateReplacement(
2177 RemoveRange: FilenameRange, Code: "<" + TypoCorrectionName.str() + ">")
2178 : FixItHint::CreateReplacement(
2179 RemoveRange: FilenameRange, Code: "\"" + TypoCorrectionName.str() + "\"");
2180 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found_typo_not_fatal)
2181 << OriginalFilename << TypoCorrectionName << Hint;
2182 // We found the file, so set the Filename to the name after typo
2183 // correction.
2184 Filename = TypoCorrectionName;
2185 LookupFilename = TypoCorrectionLookupName;
2186 return File;
2187 }
2188 }
2189
2190 // If the file is still not found, just go with the vanilla diagnostic
2191 assert(!File && "expected missing file");
2192 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found)
2193 << OriginalFilename << FilenameRange;
2194 if (IsFrameworkFound) {
2195 size_t SlashPos = OriginalFilename.find(C: '/');
2196 assert(SlashPos != StringRef::npos &&
2197 "Include with framework name should have '/' in the filename");
2198 StringRef FrameworkName = OriginalFilename.substr(Start: 0, N: SlashPos);
2199 FrameworkCacheEntry &CacheEntry =
2200 HeaderInfo.LookupFrameworkCache(FWName: FrameworkName);
2201 assert(CacheEntry.Directory && "Found framework should be in cache");
2202 Diag(Tok: FilenameTok, DiagID: diag::note_pp_framework_without_header)
2203 << OriginalFilename.substr(Start: SlashPos + 1) << FrameworkName
2204 << CacheEntry.Directory->getName();
2205 }
2206
2207 return std::nullopt;
2208}
2209
2210/// Handle either a #include-like directive or an import declaration that names
2211/// a header file.
2212///
2213/// \param HashLoc The location of the '#' token for an include, or
2214/// SourceLocation() for an import declaration.
2215/// \param IncludeTok The include / include_next / import token.
2216/// \param FilenameTok The header-name token.
2217/// \param EndLoc The location at which any imported macros become visible.
2218/// \param LookupFrom For #include_next, the starting directory for the
2219/// directory lookup.
2220/// \param LookupFromFile For #include_next, the starting file for the directory
2221/// lookup.
2222Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2223 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2224 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2225 const FileEntry *LookupFromFile) {
2226 SmallString<128> FilenameBuffer;
2227 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
2228 SourceLocation CharEnd = FilenameTok.getEndLoc();
2229
2230 CharSourceRange FilenameRange
2231 = CharSourceRange::getCharRange(B: FilenameTok.getLocation(), E: CharEnd);
2232 StringRef OriginalFilename = Filename;
2233 bool isAngled =
2234 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
2235
2236 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2237 // error.
2238 if (Filename.empty())
2239 return {ImportAction::None};
2240
2241 bool IsImportDecl = HashLoc.isInvalid();
2242 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2243
2244 // Complain about attempts to #include files in an audit pragma.
2245 if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2246 Diag(Loc: StartLoc, DiagID: diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2247 Diag(Loc: PragmaARCCFCodeAuditedInfo.second, DiagID: diag::note_pragma_entered_here);
2248
2249 // Immediately leave the pragma.
2250 PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2251 }
2252
2253 // Complain about attempts to #include files in an assume-nonnull pragma.
2254 if (PragmaAssumeNonNullLoc.isValid()) {
2255 Diag(Loc: StartLoc, DiagID: diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2256 Diag(Loc: PragmaAssumeNonNullLoc, DiagID: diag::note_pragma_entered_here);
2257
2258 // Immediately leave the pragma.
2259 PragmaAssumeNonNullLoc = SourceLocation();
2260 }
2261
2262 if (HeaderInfo.HasIncludeAliasMap()) {
2263 // Map the filename with the brackets still attached. If the name doesn't
2264 // map to anything, fall back on the filename we've already gotten the
2265 // spelling for.
2266 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(Source: OriginalFilename);
2267 if (!NewName.empty())
2268 Filename = NewName;
2269 }
2270
2271 // Search include directories.
2272 bool IsMapped = false;
2273 bool IsFrameworkFound = false;
2274 ConstSearchDirIterator CurDir = nullptr;
2275 SmallString<1024> SearchPath;
2276 SmallString<1024> RelativePath;
2277 // We get the raw path only if we have 'Callbacks' to which we later pass
2278 // the path.
2279 ModuleMap::KnownHeader SuggestedModule;
2280 SourceLocation FilenameLoc = FilenameTok.getLocation();
2281 StringRef LookupFilename = Filename;
2282
2283 // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2284 // is unnecessary on Windows since the filesystem there handles backslashes.
2285 SmallString<128> NormalizedPath;
2286 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2287 if (is_style_posix(S: BackslashStyle) && LangOpts.MicrosoftExt) {
2288 NormalizedPath = Filename.str();
2289 llvm::sys::path::native(path&: NormalizedPath);
2290 LookupFilename = NormalizedPath;
2291 BackslashStyle = llvm::sys::path::Style::windows;
2292 }
2293
2294 OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2295 CurDir: &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2296 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2297 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2298
2299 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2300 if (File && isPCHThroughHeader(FE: &File->getFileEntry()))
2301 SkippingUntilPCHThroughHeader = false;
2302 return {ImportAction::None};
2303 }
2304
2305 // Should we enter the source file? Set to Skip if either the source file is
2306 // known to have no effect beyond its effect on module visibility -- that is,
2307 // if it's got an include guard that is already defined, set to Import if it
2308 // is a modular header we've already built and should import.
2309
2310 // For C++20 Modules
2311 // [cpp.include]/7 If the header identified by the header-name denotes an
2312 // importable header, it is implementation-defined whether the #include
2313 // preprocessing directive is instead replaced by an import directive.
2314 // For this implementation, the translation is permitted when we are parsing
2315 // the Global Module Fragment, and not otherwise (the cases where it would be
2316 // valid to replace an include with an import are highly constrained once in
2317 // named module purview; this choice avoids considerable complexity in
2318 // determining valid cases).
2319
2320 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2321
2322 if (PPOpts->SingleFileParseMode)
2323 Action = IncludeLimitReached;
2324
2325 // If we've reached the max allowed include depth, it is usually due to an
2326 // include cycle. Don't enter already processed files again as it can lead to
2327 // reaching the max allowed include depth again.
2328 if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2329 alreadyIncluded(File: *File))
2330 Action = IncludeLimitReached;
2331
2332 // FIXME: We do not have a good way to disambiguate C++ clang modules from
2333 // C++ standard modules (other than use/non-use of Header Units).
2334
2335 Module *ModuleToImport = SuggestedModule.getModule();
2336
2337 bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2338 !ModuleToImport->isForBuilding(LangOpts: getLangOpts());
2339
2340 // Maybe a usable Header Unit
2341 bool UsableHeaderUnit = false;
2342 if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2343 ModuleToImport->isHeaderUnit()) {
2344 if (TrackGMFState.inGMF() || IsImportDecl)
2345 UsableHeaderUnit = true;
2346 else if (!IsImportDecl) {
2347 // This is a Header Unit that we do not include-translate
2348 ModuleToImport = nullptr;
2349 }
2350 }
2351 // Maybe a usable clang header module.
2352 bool UsableClangHeaderModule =
2353 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2354 ModuleToImport && !ModuleToImport->isHeaderUnit();
2355
2356 // Determine whether we should try to import the module for this #include, if
2357 // there is one. Don't do so if precompiled module support is disabled or we
2358 // are processing this module textually (because we're building the module).
2359 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2360 // If this include corresponds to a module but that module is
2361 // unavailable, diagnose the situation and bail out.
2362 // FIXME: Remove this; loadModule does the same check (but produces
2363 // slightly worse diagnostics).
2364 if (checkModuleIsAvailable(LangOpts: getLangOpts(), TargetInfo: getTargetInfo(), M: *ModuleToImport,
2365 Diags&: getDiagnostics())) {
2366 Diag(Loc: FilenameTok.getLocation(),
2367 DiagID: diag::note_implicit_top_level_module_import_here)
2368 << ModuleToImport->getTopLevelModuleName();
2369 return {ImportAction::None};
2370 }
2371
2372 // Compute the module access path corresponding to this module.
2373 // FIXME: Should we have a second loadModule() overload to avoid this
2374 // extra lookup step?
2375 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2376 for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2377 Path.push_back(Elt: std::make_pair(x: getIdentifierInfo(Name: Mod->Name),
2378 y: FilenameTok.getLocation()));
2379 std::reverse(first: Path.begin(), last: Path.end());
2380
2381 // Warn that we're replacing the include/import with a module import.
2382 if (!IsImportDecl)
2383 diagnoseAutoModuleImport(PP&: *this, HashLoc: StartLoc, IncludeTok, Path, PathEnd: CharEnd);
2384
2385 // Load the module to import its macros. We'll make the declarations
2386 // visible when the parser gets here.
2387 // FIXME: Pass ModuleToImport in here rather than converting it to a path
2388 // and making the module loader convert it back again.
2389 ModuleLoadResult Imported = TheModuleLoader.loadModule(
2390 ImportLoc: IncludeTok.getLocation(), Path, Visibility: Module::Hidden,
2391 /*IsInclusionDirective=*/true);
2392 assert((Imported == nullptr || Imported == ModuleToImport) &&
2393 "the imported module is different than the suggested one");
2394
2395 if (Imported) {
2396 Action = Import;
2397 } else if (Imported.isMissingExpected()) {
2398 markClangModuleAsAffecting(
2399 M: static_cast<Module *>(Imported)->getTopLevelModule());
2400 // We failed to find a submodule that we assumed would exist (because it
2401 // was in the directory of an umbrella header, for instance), but no
2402 // actual module containing it exists (because the umbrella header is
2403 // incomplete). Treat this as a textual inclusion.
2404 ModuleToImport = nullptr;
2405 } else if (Imported.isConfigMismatch()) {
2406 // On a configuration mismatch, enter the header textually. We still know
2407 // that it's part of the corresponding module.
2408 } else {
2409 // We hit an error processing the import. Bail out.
2410 if (hadModuleLoaderFatalFailure()) {
2411 // With a fatal failure in the module loader, we abort parsing.
2412 Token &Result = IncludeTok;
2413 assert(CurLexer && "#include but no current lexer set!");
2414 Result.startToken();
2415 CurLexer->FormTokenWithChars(Result, TokEnd: CurLexer->BufferEnd, Kind: tok::eof);
2416 CurLexer->cutOffLexing();
2417 }
2418 return {ImportAction::None};
2419 }
2420 }
2421
2422 // The #included file will be considered to be a system header if either it is
2423 // in a system include directory, or if the #includer is a system include
2424 // header.
2425 SrcMgr::CharacteristicKind FileCharacter =
2426 SourceMgr.getFileCharacteristic(Loc: FilenameTok.getLocation());
2427 if (File)
2428 FileCharacter = std::max(a: HeaderInfo.getFileDirFlavor(File: *File), b: FileCharacter);
2429
2430 // If this is a '#import' or an import-declaration, don't re-enter the file.
2431 //
2432 // FIXME: If we have a suggested module for a '#include', and we've already
2433 // visited this file, don't bother entering it again. We know it has no
2434 // further effect.
2435 bool EnterOnce =
2436 IsImportDecl ||
2437 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2438
2439 bool IsFirstIncludeOfFile = false;
2440
2441 // Ask HeaderInfo if we should enter this #include file. If not, #including
2442 // this file will have no effect.
2443 if (Action == Enter && File &&
2444 !HeaderInfo.ShouldEnterIncludeFile(PP&: *this, File: *File, isImport: EnterOnce,
2445 ModulesEnabled: getLangOpts().Modules, M: ModuleToImport,
2446 IsFirstIncludeOfFile)) {
2447 // C++ standard modules:
2448 // If we are not in the GMF, then we textually include only
2449 // clang modules:
2450 // Even if we've already preprocessed this header once and know that we
2451 // don't need to see its contents again, we still need to import it if it's
2452 // modular because we might not have imported it from this submodule before.
2453 //
2454 // FIXME: We don't do this when compiling a PCH because the AST
2455 // serialization layer can't cope with it. This means we get local
2456 // submodule visibility semantics wrong in that case.
2457 if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2458 Action = TrackGMFState.inGMF() ? Import : Skip;
2459 else
2460 Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2461 }
2462
2463 // Check for circular inclusion of the main file.
2464 // We can't generate a consistent preamble with regard to the conditional
2465 // stack if the main file is included again as due to the preamble bounds
2466 // some directives (e.g. #endif of a header guard) will never be seen.
2467 // Since this will lead to confusing errors, avoid the inclusion.
2468 if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2469 SourceMgr.isMainFile(SourceFile: File->getFileEntry())) {
2470 Diag(Loc: FilenameTok.getLocation(),
2471 DiagID: diag::err_pp_including_mainfile_in_preamble);
2472 return {ImportAction::None};
2473 }
2474
2475 if (Callbacks && !IsImportDecl) {
2476 // Notify the callback object that we've seen an inclusion directive.
2477 // FIXME: Use a different callback for a pp-import?
2478 Callbacks->InclusionDirective(HashLoc, IncludeTok, FileName: LookupFilename, IsAngled: isAngled,
2479 FilenameRange, File, SearchPath, RelativePath,
2480 SuggestedModule: SuggestedModule.getModule(), ModuleImported: Action == Import,
2481 FileType: FileCharacter);
2482 if (Action == Skip && File)
2483 Callbacks->FileSkipped(SkippedFile: *File, FilenameTok, FileType: FileCharacter);
2484 }
2485
2486 if (!File)
2487 return {ImportAction::None};
2488
2489 // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2490 // module corresponding to the named header.
2491 if (IsImportDecl && !ModuleToImport) {
2492 Diag(Tok: FilenameTok, DiagID: diag::err_header_import_not_header_unit)
2493 << OriginalFilename << File->getName();
2494 return {ImportAction::None};
2495 }
2496
2497 // Issue a diagnostic if the name of the file on disk has a different case
2498 // than the one we're about to open.
2499 const bool CheckIncludePathPortability =
2500 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2501
2502 if (CheckIncludePathPortability) {
2503 StringRef Name = LookupFilename;
2504 StringRef NameWithoriginalSlashes = Filename;
2505#if defined(_WIN32)
2506 // Skip UNC prefix if present. (tryGetRealPathName() always
2507 // returns a path with the prefix skipped.)
2508 bool NameWasUNC = Name.consume_front("\\\\?\\");
2509 NameWithoriginalSlashes.consume_front("\\\\?\\");
2510#endif
2511 StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2512 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(path: Name),
2513 llvm::sys::path::end(path: Name));
2514#if defined(_WIN32)
2515 // -Wnonportable-include-path is designed to diagnose includes using
2516 // case even on systems with a case-insensitive file system.
2517 // On Windows, RealPathName always starts with an upper-case drive
2518 // letter for absolute paths, but Name might start with either
2519 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2520 // ("foo" will always have on-disk case, no matter which case was
2521 // used in the cd command). To not emit this warning solely for
2522 // the drive letter, whose case is dependent on if `cd` is used
2523 // with upper- or lower-case drive letters, always consider the
2524 // given drive letter case as correct for the purpose of this warning.
2525 SmallString<128> FixedDriveRealPath;
2526 if (llvm::sys::path::is_absolute(Name) &&
2527 llvm::sys::path::is_absolute(RealPathName) &&
2528 toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2529 isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2530 assert(Components.size() >= 3 && "should have drive, backslash, name");
2531 assert(Components[0].size() == 2 && "should start with drive");
2532 assert(Components[0][1] == ':' && "should have colon");
2533 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2534 RealPathName = FixedDriveRealPath;
2535 }
2536#endif
2537
2538 if (trySimplifyPath(Components, RealPathName, Separator: BackslashStyle)) {
2539 SmallString<128> Path;
2540 Path.reserve(N: Name.size()+2);
2541 Path.push_back(Elt: isAngled ? '<' : '"');
2542
2543 const auto IsSep = [BackslashStyle](char c) {
2544 return llvm::sys::path::is_separator(value: c, style: BackslashStyle);
2545 };
2546
2547 for (auto Component : Components) {
2548 // On POSIX, Components will contain a single '/' as first element
2549 // exactly if Name is an absolute path.
2550 // On Windows, it will contain "C:" followed by '\' for absolute paths.
2551 // The drive letter is optional for absolute paths on Windows, but
2552 // clang currently cannot process absolute paths in #include lines that
2553 // don't have a drive.
2554 // If the first entry in Components is a directory separator,
2555 // then the code at the bottom of this loop that keeps the original
2556 // directory separator style copies it. If the second entry is
2557 // a directory separator (the C:\ case), then that separator already
2558 // got copied when the C: was processed and we want to skip that entry.
2559 if (!(Component.size() == 1 && IsSep(Component[0])))
2560 Path.append(RHS: Component);
2561 else if (Path.size() != 1)
2562 continue;
2563
2564 // Append the separator(s) the user used, or the close quote
2565 if (Path.size() > NameWithoriginalSlashes.size()) {
2566 Path.push_back(Elt: isAngled ? '>' : '"');
2567 continue;
2568 }
2569 assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2570 do
2571 Path.push_back(Elt: NameWithoriginalSlashes[Path.size()-1]);
2572 while (Path.size() <= NameWithoriginalSlashes.size() &&
2573 IsSep(NameWithoriginalSlashes[Path.size()-1]));
2574 }
2575
2576#if defined(_WIN32)
2577 // Restore UNC prefix if it was there.
2578 if (NameWasUNC)
2579 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2580#endif
2581
2582 // For user files and known standard headers, issue a diagnostic.
2583 // For other system headers, don't. They can be controlled separately.
2584 auto DiagId =
2585 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Include: Name))
2586 ? diag::pp_nonportable_path
2587 : diag::pp_nonportable_system_path;
2588 Diag(Tok: FilenameTok, DiagID: DiagId) << Path <<
2589 FixItHint::CreateReplacement(RemoveRange: FilenameRange, Code: Path);
2590 }
2591 }
2592
2593 switch (Action) {
2594 case Skip:
2595 // If we don't need to enter the file, stop now.
2596 if (ModuleToImport)
2597 return {ImportAction::SkippedModuleImport, ModuleToImport};
2598 return {ImportAction::None};
2599
2600 case IncludeLimitReached:
2601 // If we reached our include limit and don't want to enter any more files,
2602 // don't go any further.
2603 return {ImportAction::None};
2604
2605 case Import: {
2606 // If this is a module import, make it visible if needed.
2607 assert(ModuleToImport && "no module to import");
2608
2609 makeModuleVisible(M: ModuleToImport, Loc: EndLoc);
2610
2611 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2612 tok::pp___include_macros)
2613 return {ImportAction::None};
2614
2615 return {ImportAction::ModuleImport, ModuleToImport};
2616 }
2617
2618 case Enter:
2619 break;
2620 }
2621
2622 // Check that we don't have infinite #include recursion.
2623 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2624 Diag(Tok: FilenameTok, DiagID: diag::err_pp_include_too_deep);
2625 HasReachedMaxIncludeDepth = true;
2626 return {ImportAction::None};
2627 }
2628
2629 if (isAngled && isInNamedModule())
2630 Diag(Tok: FilenameTok, DiagID: diag::warn_pp_include_angled_in_module_purview)
2631 << getNamedModuleName();
2632
2633 // Look up the file, create a File ID for it.
2634 SourceLocation IncludePos = FilenameTok.getLocation();
2635 // If the filename string was the result of macro expansions, set the include
2636 // position on the file where it will be included and after the expansions.
2637 if (IncludePos.isMacroID())
2638 IncludePos = SourceMgr.getExpansionRange(Loc: IncludePos).getEnd();
2639 FileID FID = SourceMgr.createFileID(SourceFile: *File, IncludePos, FileCharacter);
2640 if (!FID.isValid()) {
2641 TheModuleLoader.HadFatalFailure = true;
2642 return ImportAction::Failure;
2643 }
2644
2645 // If all is good, enter the new file!
2646 if (EnterSourceFile(FID, Dir: CurDir, Loc: FilenameTok.getLocation(),
2647 IsFirstIncludeOfFile))
2648 return {ImportAction::None};
2649
2650 // Determine if we're switching to building a new submodule, and which one.
2651 // This does not apply for C++20 modules header units.
2652 if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2653 if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2654 // We are building a submodule that belongs to a shadowed module. This
2655 // means we find header files in the shadowed module.
2656 Diag(Loc: ModuleToImport->DefinitionLoc,
2657 DiagID: diag::err_module_build_shadowed_submodule)
2658 << ModuleToImport->getFullModuleName();
2659 Diag(Loc: ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2660 DiagID: diag::note_previous_definition);
2661 return {ImportAction::None};
2662 }
2663 // When building a pch, -fmodule-name tells the compiler to textually
2664 // include headers in the specified module. We are not building the
2665 // specified module.
2666 //
2667 // FIXME: This is the wrong way to handle this. We should produce a PCH
2668 // that behaves the same as the header would behave in a compilation using
2669 // that PCH, which means we should enter the submodule. We need to teach
2670 // the AST serialization layer to deal with the resulting AST.
2671 if (getLangOpts().CompilingPCH &&
2672 ModuleToImport->isForBuilding(LangOpts: getLangOpts()))
2673 return {ImportAction::None};
2674
2675 assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2676 CurLexerSubmodule = ModuleToImport;
2677
2678 // Let the macro handling code know that any future macros are within
2679 // the new submodule.
2680 EnterSubmodule(M: ModuleToImport, ImportLoc: EndLoc, /*ForPragma*/ false);
2681
2682 // Let the parser know that any future declarations are within the new
2683 // submodule.
2684 // FIXME: There's no point doing this if we're handling a #__include_macros
2685 // directive.
2686 return {ImportAction::ModuleBegin, ModuleToImport};
2687 }
2688
2689 assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2690 return {ImportAction::None};
2691}
2692
2693/// HandleIncludeNextDirective - Implements \#include_next.
2694///
2695void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2696 Token &IncludeNextTok) {
2697 Diag(Tok: IncludeNextTok, DiagID: diag::ext_pp_include_next_directive);
2698
2699 ConstSearchDirIterator Lookup = nullptr;
2700 const FileEntry *LookupFromFile;
2701 std::tie(args&: Lookup, args&: LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2702
2703 return HandleIncludeDirective(HashLoc, IncludeTok&: IncludeNextTok, LookupFrom: Lookup,
2704 LookupFromFile);
2705}
2706
2707/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2708void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2709 // The Microsoft #import directive takes a type library and generates header
2710 // files from it, and includes those. This is beyond the scope of what clang
2711 // does, so we ignore it and error out. However, #import can optionally have
2712 // trailing attributes that span multiple lines. We're going to eat those
2713 // so we can continue processing from there.
2714 Diag(Tok, DiagID: diag::err_pp_import_directive_ms );
2715
2716 // Read tokens until we get to the end of the directive. Note that the
2717 // directive can be split over multiple lines using the backslash character.
2718 DiscardUntilEndOfDirective();
2719}
2720
2721/// HandleImportDirective - Implements \#import.
2722///
2723void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2724 Token &ImportTok) {
2725 if (!LangOpts.ObjC) { // #import is standard for ObjC.
2726 if (LangOpts.MSVCCompat)
2727 return HandleMicrosoftImportDirective(Tok&: ImportTok);
2728 Diag(Tok: ImportTok, DiagID: diag::ext_pp_import_directive);
2729 }
2730 return HandleIncludeDirective(HashLoc, IncludeTok&: ImportTok);
2731}
2732
2733/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2734/// pseudo directive in the predefines buffer. This handles it by sucking all
2735/// tokens through the preprocessor and discarding them (only keeping the side
2736/// effects on the preprocessor).
2737void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2738 Token &IncludeMacrosTok) {
2739 // This directive should only occur in the predefines buffer. If not, emit an
2740 // error and reject it.
2741 SourceLocation Loc = IncludeMacrosTok.getLocation();
2742 if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2743 Diag(Loc: IncludeMacrosTok.getLocation(),
2744 DiagID: diag::pp_include_macros_out_of_predefines);
2745 DiscardUntilEndOfDirective();
2746 return;
2747 }
2748
2749 // Treat this as a normal #include for checking purposes. If this is
2750 // successful, it will push a new lexer onto the include stack.
2751 HandleIncludeDirective(HashLoc, IncludeTok&: IncludeMacrosTok);
2752
2753 Token TmpTok;
2754 do {
2755 Lex(Result&: TmpTok);
2756 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2757 } while (TmpTok.isNot(K: tok::hashhash));
2758}
2759
2760//===----------------------------------------------------------------------===//
2761// Preprocessor Macro Directive Handling.
2762//===----------------------------------------------------------------------===//
2763
2764/// ReadMacroParameterList - The ( starting a parameter list of a macro
2765/// definition has just been read. Lex the rest of the parameters and the
2766/// closing ), updating MI with what we learn. Return true if an error occurs
2767/// parsing the param list.
2768bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2769 SmallVector<IdentifierInfo*, 32> Parameters;
2770
2771 while (true) {
2772 LexUnexpandedNonComment(Result&: Tok);
2773 switch (Tok.getKind()) {
2774 case tok::r_paren:
2775 // Found the end of the parameter list.
2776 if (Parameters.empty()) // #define FOO()
2777 return false;
2778 // Otherwise we have #define FOO(A,)
2779 Diag(Tok, DiagID: diag::err_pp_expected_ident_in_arg_list);
2780 return true;
2781 case tok::ellipsis: // #define X(... -> C99 varargs
2782 if (!LangOpts.C99)
2783 Diag(Tok, DiagID: LangOpts.CPlusPlus11 ?
2784 diag::warn_cxx98_compat_variadic_macro :
2785 diag::ext_variadic_macro);
2786
2787 // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2788 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2789 Diag(Tok, DiagID: diag::ext_pp_opencl_variadic_macros);
2790 }
2791
2792 // Lex the token after the identifier.
2793 LexUnexpandedNonComment(Result&: Tok);
2794 if (Tok.isNot(K: tok::r_paren)) {
2795 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2796 return true;
2797 }
2798 // Add the __VA_ARGS__ identifier as a parameter.
2799 Parameters.push_back(Elt: Ident__VA_ARGS__);
2800 MI->setIsC99Varargs();
2801 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2802 return false;
2803 case tok::eod: // #define X(
2804 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2805 return true;
2806 default:
2807 // Handle keywords and identifiers here to accept things like
2808 // #define Foo(for) for.
2809 IdentifierInfo *II = Tok.getIdentifierInfo();
2810 if (!II) {
2811 // #define X(1
2812 Diag(Tok, DiagID: diag::err_pp_invalid_tok_in_arg_list);
2813 return true;
2814 }
2815
2816 // If this is already used as a parameter, it is used multiple times (e.g.
2817 // #define X(A,A.
2818 if (llvm::is_contained(Range&: Parameters, Element: II)) { // C99 6.10.3p6
2819 Diag(Tok, DiagID: diag::err_pp_duplicate_name_in_arg_list) << II;
2820 return true;
2821 }
2822
2823 // Add the parameter to the macro info.
2824 Parameters.push_back(Elt: II);
2825
2826 // Lex the token after the identifier.
2827 LexUnexpandedNonComment(Result&: Tok);
2828
2829 switch (Tok.getKind()) {
2830 default: // #define X(A B
2831 Diag(Tok, DiagID: diag::err_pp_expected_comma_in_arg_list);
2832 return true;
2833 case tok::r_paren: // #define X(A)
2834 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2835 return false;
2836 case tok::comma: // #define X(A,
2837 break;
2838 case tok::ellipsis: // #define X(A... -> GCC extension
2839 // Diagnose extension.
2840 Diag(Tok, DiagID: diag::ext_named_variadic_macro);
2841
2842 // Lex the token after the identifier.
2843 LexUnexpandedNonComment(Result&: Tok);
2844 if (Tok.isNot(K: tok::r_paren)) {
2845 Diag(Tok, DiagID: diag::err_pp_missing_rparen_in_macro_def);
2846 return true;
2847 }
2848
2849 MI->setIsGNUVarargs();
2850 MI->setParameterList(List: Parameters, PPAllocator&: BP);
2851 return false;
2852 }
2853 }
2854 }
2855}
2856
2857static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2858 const LangOptions &LOptions) {
2859 if (MI->getNumTokens() == 1) {
2860 const Token &Value = MI->getReplacementToken(Tok: 0);
2861
2862 // Macro that is identity, like '#define inline inline' is a valid pattern.
2863 if (MacroName.getKind() == Value.getKind())
2864 return true;
2865
2866 // Macro that maps a keyword to the same keyword decorated with leading/
2867 // trailing underscores is a valid pattern:
2868 // #define inline __inline
2869 // #define inline __inline__
2870 // #define inline _inline (in MS compatibility mode)
2871 StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2872 if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2873 if (!II->isKeyword(LangOpts: LOptions))
2874 return false;
2875 StringRef ValueText = II->getName();
2876 StringRef TrimmedValue = ValueText;
2877 if (!ValueText.starts_with(Prefix: "__")) {
2878 if (ValueText.starts_with(Prefix: "_"))
2879 TrimmedValue = TrimmedValue.drop_front(N: 1);
2880 else
2881 return false;
2882 } else {
2883 TrimmedValue = TrimmedValue.drop_front(N: 2);
2884 if (TrimmedValue.ends_with(Suffix: "__"))
2885 TrimmedValue = TrimmedValue.drop_back(N: 2);
2886 }
2887 return TrimmedValue == MacroText;
2888 } else {
2889 return false;
2890 }
2891 }
2892
2893 // #define inline
2894 return MacroName.isOneOf(K1: tok::kw_extern, Ks: tok::kw_inline, Ks: tok::kw_static,
2895 Ks: tok::kw_const) &&
2896 MI->getNumTokens() == 0;
2897}
2898
2899// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2900// entire line) of the macro's tokens and adds them to MacroInfo, and while
2901// doing so performs certain validity checks including (but not limited to):
2902// - # (stringization) is followed by a macro parameter
2903//
2904// Returns a nullptr if an invalid sequence of tokens is encountered or returns
2905// a pointer to a MacroInfo object.
2906
2907MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2908 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2909
2910 Token LastTok = MacroNameTok;
2911 // Create the new macro.
2912 MacroInfo *const MI = AllocateMacroInfo(L: MacroNameTok.getLocation());
2913
2914 Token Tok;
2915 LexUnexpandedToken(Result&: Tok);
2916
2917 // Ensure we consume the rest of the macro body if errors occur.
2918 auto _ = llvm::make_scope_exit(F: [&]() {
2919 // The flag indicates if we are still waiting for 'eod'.
2920 if (CurLexer->ParsingPreprocessorDirective)
2921 DiscardUntilEndOfDirective();
2922 });
2923
2924 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2925 // within their appropriate context.
2926 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2927
2928 // If this is a function-like macro definition, parse the argument list,
2929 // marking each of the identifiers as being used as macro arguments. Also,
2930 // check other constraints on the first token of the macro body.
2931 if (Tok.is(K: tok::eod)) {
2932 if (ImmediatelyAfterHeaderGuard) {
2933 // Save this macro information since it may part of a header guard.
2934 CurPPLexer->MIOpt.SetDefinedMacro(M: MacroNameTok.getIdentifierInfo(),
2935 Loc: MacroNameTok.getLocation());
2936 }
2937 // If there is no body to this macro, we have no special handling here.
2938 } else if (Tok.hasLeadingSpace()) {
2939 // This is a normal token with leading space. Clear the leading space
2940 // marker on the first token to get proper expansion.
2941 Tok.clearFlag(Flag: Token::LeadingSpace);
2942 } else if (Tok.is(K: tok::l_paren)) {
2943 // This is a function-like macro definition. Read the argument list.
2944 MI->setIsFunctionLike();
2945 if (ReadMacroParameterList(MI, Tok&: LastTok))
2946 return nullptr;
2947
2948 // If this is a definition of an ISO C/C++ variadic function-like macro (not
2949 // using the GNU named varargs extension) inform our variadic scope guard
2950 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2951 // allowed only within the definition of a variadic macro.
2952
2953 if (MI->isC99Varargs()) {
2954 VariadicMacroScopeGuard.enterScope();
2955 }
2956
2957 // Read the first token after the arg list for down below.
2958 LexUnexpandedToken(Result&: Tok);
2959 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2960 // C99 requires whitespace between the macro definition and the body. Emit
2961 // a diagnostic for something like "#define X+".
2962 Diag(Tok, DiagID: diag::ext_c99_whitespace_required_after_macro_name);
2963 } else {
2964 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2965 // first character of a replacement list is not a character required by
2966 // subclause 5.2.1, then there shall be white-space separation between the
2967 // identifier and the replacement list.". 5.2.1 lists this set:
2968 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2969 // is irrelevant here.
2970 bool isInvalid = false;
2971 if (Tok.is(K: tok::at)) // @ is not in the list above.
2972 isInvalid = true;
2973 else if (Tok.is(K: tok::unknown)) {
2974 // If we have an unknown token, it is something strange like "`". Since
2975 // all of valid characters would have lexed into a single character
2976 // token of some sort, we know this is not a valid case.
2977 isInvalid = true;
2978 }
2979 if (isInvalid)
2980 Diag(Tok, DiagID: diag::ext_missing_whitespace_after_macro_name);
2981 else
2982 Diag(Tok, DiagID: diag::warn_missing_whitespace_after_macro_name);
2983 }
2984
2985 if (!Tok.is(K: tok::eod))
2986 LastTok = Tok;
2987
2988 SmallVector<Token, 16> Tokens;
2989
2990 // Read the rest of the macro body.
2991 if (MI->isObjectLike()) {
2992 // Object-like macros are very simple, just read their body.
2993 while (Tok.isNot(K: tok::eod)) {
2994 LastTok = Tok;
2995 Tokens.push_back(Elt: Tok);
2996 // Get the next token of the macro.
2997 LexUnexpandedToken(Result&: Tok);
2998 }
2999 } else {
3000 // Otherwise, read the body of a function-like macro. While we are at it,
3001 // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3002 // parameters in function-like macro expansions.
3003
3004 VAOptDefinitionContext VAOCtx(*this);
3005
3006 while (Tok.isNot(K: tok::eod)) {
3007 LastTok = Tok;
3008
3009 if (!Tok.isOneOf(K1: tok::hash, Ks: tok::hashat, Ks: tok::hashhash)) {
3010 Tokens.push_back(Elt: Tok);
3011
3012 if (VAOCtx.isVAOptToken(T: Tok)) {
3013 // If we're already within a VAOPT, emit an error.
3014 if (VAOCtx.isInVAOpt()) {
3015 Diag(Tok, DiagID: diag::err_pp_vaopt_nested_use);
3016 return nullptr;
3017 }
3018 // Ensure VAOPT is followed by a '(' .
3019 LexUnexpandedToken(Result&: Tok);
3020 if (Tok.isNot(K: tok::l_paren)) {
3021 Diag(Tok, DiagID: diag::err_pp_missing_lparen_in_vaopt_use);
3022 return nullptr;
3023 }
3024 Tokens.push_back(Elt: Tok);
3025 VAOCtx.sawVAOptFollowedByOpeningParens(LParenLoc: Tok.getLocation());
3026 LexUnexpandedToken(Result&: Tok);
3027 if (Tok.is(K: tok::hashhash)) {
3028 Diag(Tok, DiagID: diag::err_vaopt_paste_at_start);
3029 return nullptr;
3030 }
3031 continue;
3032 } else if (VAOCtx.isInVAOpt()) {
3033 if (Tok.is(K: tok::r_paren)) {
3034 if (VAOCtx.sawClosingParen()) {
3035 assert(Tokens.size() >= 3 &&
3036 "Must have seen at least __VA_OPT__( "
3037 "and a subsequent tok::r_paren");
3038 if (Tokens[Tokens.size() - 2].is(K: tok::hashhash)) {
3039 Diag(Tok, DiagID: diag::err_vaopt_paste_at_end);
3040 return nullptr;
3041 }
3042 }
3043 } else if (Tok.is(K: tok::l_paren)) {
3044 VAOCtx.sawOpeningParen(LParenLoc: Tok.getLocation());
3045 }
3046 }
3047 // Get the next token of the macro.
3048 LexUnexpandedToken(Result&: Tok);
3049 continue;
3050 }
3051
3052 // If we're in -traditional mode, then we should ignore stringification
3053 // and token pasting. Mark the tokens as unknown so as not to confuse
3054 // things.
3055 if (getLangOpts().TraditionalCPP) {
3056 Tok.setKind(tok::unknown);
3057 Tokens.push_back(Elt: Tok);
3058
3059 // Get the next token of the macro.
3060 LexUnexpandedToken(Result&: Tok);
3061 continue;
3062 }
3063
3064 if (Tok.is(K: tok::hashhash)) {
3065 // If we see token pasting, check if it looks like the gcc comma
3066 // pasting extension. We'll use this information to suppress
3067 // diagnostics later on.
3068
3069 // Get the next token of the macro.
3070 LexUnexpandedToken(Result&: Tok);
3071
3072 if (Tok.is(K: tok::eod)) {
3073 Tokens.push_back(Elt: LastTok);
3074 break;
3075 }
3076
3077 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3078 Tokens[Tokens.size() - 1].is(K: tok::comma))
3079 MI->setHasCommaPasting();
3080
3081 // Things look ok, add the '##' token to the macro.
3082 Tokens.push_back(Elt: LastTok);
3083 continue;
3084 }
3085
3086 // Our Token is a stringization operator.
3087 // Get the next token of the macro.
3088 LexUnexpandedToken(Result&: Tok);
3089
3090 // Check for a valid macro arg identifier or __VA_OPT__.
3091 if (!VAOCtx.isVAOptToken(T: Tok) &&
3092 (Tok.getIdentifierInfo() == nullptr ||
3093 MI->getParameterNum(Arg: Tok.getIdentifierInfo()) == -1)) {
3094
3095 // If this is assembler-with-cpp mode, we accept random gibberish after
3096 // the '#' because '#' is often a comment character. However, change
3097 // the kind of the token to tok::unknown so that the preprocessor isn't
3098 // confused.
3099 if (getLangOpts().AsmPreprocessor && Tok.isNot(K: tok::eod)) {
3100 LastTok.setKind(tok::unknown);
3101 Tokens.push_back(Elt: LastTok);
3102 continue;
3103 } else {
3104 Diag(Tok, DiagID: diag::err_pp_stringize_not_parameter)
3105 << LastTok.is(K: tok::hashat);
3106 return nullptr;
3107 }
3108 }
3109
3110 // Things look ok, add the '#' and param name tokens to the macro.
3111 Tokens.push_back(Elt: LastTok);
3112
3113 // If the token following '#' is VAOPT, let the next iteration handle it
3114 // and check it for correctness, otherwise add the token and prime the
3115 // loop with the next one.
3116 if (!VAOCtx.isVAOptToken(T: Tok)) {
3117 Tokens.push_back(Elt: Tok);
3118 LastTok = Tok;
3119
3120 // Get the next token of the macro.
3121 LexUnexpandedToken(Result&: Tok);
3122 }
3123 }
3124 if (VAOCtx.isInVAOpt()) {
3125 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3126 Diag(Tok, DiagID: diag::err_pp_expected_after)
3127 << LastTok.getKind() << tok::r_paren;
3128 Diag(Loc: VAOCtx.getUnmatchedOpeningParenLoc(), DiagID: diag::note_matching) << tok::l_paren;
3129 return nullptr;
3130 }
3131 }
3132 MI->setDefinitionEndLoc(LastTok.getLocation());
3133
3134 MI->setTokens(Tokens, PPAllocator&: BP);
3135 return MI;
3136}
3137
3138static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3139 return II->isStr(Str: "__strong") || II->isStr(Str: "__weak") ||
3140 II->isStr(Str: "__unsafe_unretained") || II->isStr(Str: "__autoreleasing");
3141}
3142
3143/// HandleDefineDirective - Implements \#define. This consumes the entire macro
3144/// line then lets the caller lex the next real token.
3145void Preprocessor::HandleDefineDirective(
3146 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3147 ++NumDefined;
3148
3149 Token MacroNameTok;
3150 bool MacroShadowsKeyword;
3151 ReadMacroName(MacroNameTok, isDefineUndef: MU_Define, ShadowFlag: &MacroShadowsKeyword);
3152
3153 // Error reading macro name? If so, diagnostic already issued.
3154 if (MacroNameTok.is(K: tok::eod))
3155 return;
3156
3157 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3158 // Issue a final pragma warning if we're defining a macro that was has been
3159 // undefined and is being redefined.
3160 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3161 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3162
3163 // If we are supposed to keep comments in #defines, reenable comment saving
3164 // mode.
3165 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3166
3167 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3168 MacroNameTok, ImmediatelyAfterHeaderGuard);
3169
3170 if (!MI) return;
3171
3172 if (MacroShadowsKeyword &&
3173 !isConfigurationPattern(MacroName&: MacroNameTok, MI, LOptions: getLangOpts())) {
3174 Diag(Tok: MacroNameTok, DiagID: diag::warn_pp_macro_hides_keyword);
3175 }
3176 // Check that there is no paste (##) operator at the beginning or end of the
3177 // replacement list.
3178 unsigned NumTokens = MI->getNumTokens();
3179 if (NumTokens != 0) {
3180 if (MI->getReplacementToken(Tok: 0).is(K: tok::hashhash)) {
3181 Diag(Tok: MI->getReplacementToken(Tok: 0), DiagID: diag::err_paste_at_start);
3182 return;
3183 }
3184 if (MI->getReplacementToken(Tok: NumTokens-1).is(K: tok::hashhash)) {
3185 Diag(Tok: MI->getReplacementToken(Tok: NumTokens-1), DiagID: diag::err_paste_at_end);
3186 return;
3187 }
3188 }
3189
3190 // When skipping just warn about macros that do not match.
3191 if (SkippingUntilPCHThroughHeader) {
3192 const MacroInfo *OtherMI = getMacroInfo(II: MacroNameTok.getIdentifierInfo());
3193 if (!OtherMI || !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3194 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt))
3195 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::warn_pp_macro_def_mismatch_with_pch)
3196 << MacroNameTok.getIdentifierInfo();
3197 // Issue the diagnostic but allow the change if msvc extensions are enabled
3198 if (!LangOpts.MicrosoftExt)
3199 return;
3200 }
3201
3202 // Finally, if this identifier already had a macro defined for it, verify that
3203 // the macro bodies are identical, and issue diagnostics if they are not.
3204 if (const MacroInfo *OtherMI=getMacroInfo(II: MacroNameTok.getIdentifierInfo())) {
3205 // Final macros are hard-mode: they always warn. Even if the bodies are
3206 // identical. Even if they are in system headers. Even if they are things we
3207 // would silently allow in the past.
3208 if (MacroNameTok.getIdentifierInfo()->isFinal())
3209 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/false);
3210
3211 // In Objective-C, ignore attempts to directly redefine the builtin
3212 // definitions of the ownership qualifiers. It's still possible to
3213 // #undef them.
3214 if (getLangOpts().ObjC &&
3215 SourceMgr.getFileID(SpellingLoc: OtherMI->getDefinitionLoc()) ==
3216 getPredefinesFileID() &&
3217 isObjCProtectedMacro(II: MacroNameTok.getIdentifierInfo())) {
3218 // Warn if it changes the tokens.
3219 if ((!getDiagnostics().getSuppressSystemWarnings() ||
3220 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) &&
3221 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this,
3222 /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3223 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::warn_pp_objc_macro_redef_ignored);
3224 }
3225 assert(!OtherMI->isWarnIfUnused());
3226 return;
3227 }
3228
3229 // It is very common for system headers to have tons of macro redefinitions
3230 // and for warnings to be disabled in system headers. If this is the case,
3231 // then don't bother calling MacroInfo::isIdenticalTo.
3232 if (!getDiagnostics().getSuppressSystemWarnings() ||
3233 !SourceMgr.isInSystemHeader(Loc: DefineTok.getLocation())) {
3234
3235 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3236 Diag(Loc: OtherMI->getDefinitionLoc(), DiagID: diag::pp_macro_not_used);
3237
3238 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3239 // C++ [cpp.predefined]p4, but allow it as an extension.
3240 if (isLanguageDefinedBuiltin(SourceMgr, MI: OtherMI, MacroName: II->getName()))
3241 Diag(Tok: MacroNameTok, DiagID: diag::ext_pp_redef_builtin_macro);
3242 // Macros must be identical. This means all tokens and whitespace
3243 // separation must be the same. C99 6.10.3p2.
3244 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3245 !MI->isIdenticalTo(Other: *OtherMI, PP&: *this, /*Syntactic=*/Syntactically: LangOpts.MicrosoftExt)) {
3246 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::ext_pp_macro_redef)
3247 << MacroNameTok.getIdentifierInfo();
3248 Diag(Loc: OtherMI->getDefinitionLoc(), DiagID: diag::note_previous_definition);
3249 }
3250 }
3251 if (OtherMI->isWarnIfUnused())
3252 WarnUnusedMacroLocs.erase(V: OtherMI->getDefinitionLoc());
3253 }
3254
3255 DefMacroDirective *MD =
3256 appendDefMacroDirective(II: MacroNameTok.getIdentifierInfo(), MI);
3257
3258 assert(!MI->isUsed());
3259 // If we need warning for not using the macro, add its location in the
3260 // warn-because-unused-macro set. If it gets used it will be removed from set.
3261 if (getSourceManager().isInMainFile(Loc: MI->getDefinitionLoc()) &&
3262 !Diags->isIgnored(DiagID: diag::pp_macro_not_used, Loc: MI->getDefinitionLoc()) &&
3263 !MacroExpansionInDirectivesOverride &&
3264 getSourceManager().getFileID(SpellingLoc: MI->getDefinitionLoc()) !=
3265 getPredefinesFileID()) {
3266 MI->setIsWarnIfUnused(true);
3267 WarnUnusedMacroLocs.insert(V: MI->getDefinitionLoc());
3268 }
3269
3270 // If the callbacks want to know, tell them about the macro definition.
3271 if (Callbacks)
3272 Callbacks->MacroDefined(MacroNameTok, MD);
3273
3274 // If we're in MS compatibility mode and the macro being defined is the
3275 // assert macro, implicitly add a macro definition for static_assert to work
3276 // around their broken assert.h header file in C. Only do so if there isn't
3277 // already a static_assert macro defined.
3278 if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3279 MacroNameTok.getIdentifierInfo()->isStr(Str: "assert") &&
3280 !isMacroDefined(Id: "static_assert")) {
3281 MacroInfo *MI = AllocateMacroInfo(L: SourceLocation());
3282
3283 Token Tok;
3284 Tok.startToken();
3285 Tok.setKind(tok::kw__Static_assert);
3286 Tok.setIdentifierInfo(getIdentifierInfo(Name: "_Static_assert"));
3287 MI->setTokens(Tokens: {Tok}, PPAllocator&: BP);
3288 (void)appendDefMacroDirective(II: getIdentifierInfo(Name: "static_assert"), MI);
3289 }
3290}
3291
3292/// HandleUndefDirective - Implements \#undef.
3293///
3294void Preprocessor::HandleUndefDirective() {
3295 ++NumUndefined;
3296
3297 Token MacroNameTok;
3298 ReadMacroName(MacroNameTok, isDefineUndef: MU_Undef);
3299
3300 // Error reading macro name? If so, diagnostic already issued.
3301 if (MacroNameTok.is(K: tok::eod))
3302 return;
3303
3304 // Check to see if this is the last token on the #undef line.
3305 CheckEndOfDirective(DirType: "undef");
3306
3307 // Okay, we have a valid identifier to undef.
3308 auto *II = MacroNameTok.getIdentifierInfo();
3309 auto MD = getMacroDefinition(II);
3310 UndefMacroDirective *Undef = nullptr;
3311
3312 if (II->isFinal())
3313 emitFinalMacroWarning(Identifier: MacroNameTok, /*IsUndef=*/true);
3314
3315 // If the macro is not defined, this is a noop undef.
3316 if (const MacroInfo *MI = MD.getMacroInfo()) {
3317 if (!MI->isUsed() && MI->isWarnIfUnused())
3318 Diag(Loc: MI->getDefinitionLoc(), DiagID: diag::pp_macro_not_used);
3319
3320 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3321 // C++ [cpp.predefined]p4, but allow it as an extension.
3322 if (isLanguageDefinedBuiltin(SourceMgr, MI, MacroName: II->getName()))
3323 Diag(Tok: MacroNameTok, DiagID: diag::ext_pp_undef_builtin_macro);
3324
3325 if (MI->isWarnIfUnused())
3326 WarnUnusedMacroLocs.erase(V: MI->getDefinitionLoc());
3327
3328 Undef = AllocateUndefMacroDirective(UndefLoc: MacroNameTok.getLocation());
3329 }
3330
3331 // If the callbacks want to know, tell them about the macro #undef.
3332 // Note: no matter if the macro was defined or not.
3333 if (Callbacks)
3334 Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3335
3336 if (Undef)
3337 appendMacroDirective(II, MD: Undef);
3338}
3339
3340//===----------------------------------------------------------------------===//
3341// Preprocessor Conditional Directive Handling.
3342//===----------------------------------------------------------------------===//
3343
3344/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef
3345/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is
3346/// true if any tokens have been returned or pp-directives activated before this
3347/// \#ifndef has been lexed.
3348///
3349void Preprocessor::HandleIfdefDirective(Token &Result,
3350 const Token &HashToken,
3351 bool isIfndef,
3352 bool ReadAnyTokensBeforeDirective) {
3353 ++NumIf;
3354 Token DirectiveTok = Result;
3355
3356 Token MacroNameTok;
3357 ReadMacroName(MacroNameTok);
3358
3359 // Error reading macro name? If so, diagnostic already issued.
3360 if (MacroNameTok.is(K: tok::eod)) {
3361 // Skip code until we get to #endif. This helps with recovery by not
3362 // emitting an error when the #endif is reached.
3363 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3364 IfTokenLoc: DirectiveTok.getLocation(),
3365 /*Foundnonskip*/ FoundNonSkipPortion: false, /*FoundElse*/ false);
3366 return;
3367 }
3368
3369 emitMacroExpansionWarnings(Identifier: MacroNameTok, /*IsIfnDef=*/true);
3370
3371 // Check to see if this is the last token on the #if[n]def line.
3372 CheckEndOfDirective(DirType: isIfndef ? "ifndef" : "ifdef");
3373
3374 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3375 auto MD = getMacroDefinition(II: MII);
3376 MacroInfo *MI = MD.getMacroInfo();
3377
3378 if (CurPPLexer->getConditionalStackDepth() == 0) {
3379 // If the start of a top-level #ifdef and if the macro is not defined,
3380 // inform MIOpt that this might be the start of a proper include guard.
3381 // Otherwise it is some other form of unknown conditional which we can't
3382 // handle.
3383 if (!ReadAnyTokensBeforeDirective && !MI) {
3384 assert(isIfndef && "#ifdef shouldn't reach here");
3385 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: MII, Loc: MacroNameTok.getLocation());
3386 } else
3387 CurPPLexer->MIOpt.EnterTopLevelConditional();
3388 }
3389
3390 // If there is a macro, process it.
3391 if (MI) // Mark it used.
3392 markMacroAsUsed(MI);
3393
3394 if (Callbacks) {
3395 if (isIfndef)
3396 Callbacks->Ifndef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3397 else
3398 Callbacks->Ifdef(Loc: DirectiveTok.getLocation(), MacroNameTok, MD);
3399 }
3400
3401 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3402 getSourceManager().isInMainFile(Loc: DirectiveTok.getLocation());
3403
3404 // Should we include the stuff contained by this directive?
3405 if (PPOpts->SingleFileParseMode && !MI) {
3406 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3407 // the directive blocks.
3408 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3409 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: false,
3410 /*foundelse*/FoundElse: false);
3411 } else if (!MI == isIfndef || RetainExcludedCB) {
3412 // Yes, remember that we are inside a conditional, then lex the next token.
3413 CurPPLexer->pushConditionalLevel(DirectiveStart: DirectiveTok.getLocation(),
3414 /*wasskip*/WasSkipping: false, /*foundnonskip*/FoundNonSkip: true,
3415 /*foundelse*/FoundElse: false);
3416 } else {
3417 // No, skip the contents of this block.
3418 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(),
3419 IfTokenLoc: DirectiveTok.getLocation(),
3420 /*Foundnonskip*/ FoundNonSkipPortion: false,
3421 /*FoundElse*/ false);
3422 }
3423}
3424
3425/// HandleIfDirective - Implements the \#if directive.
3426///
3427void Preprocessor::HandleIfDirective(Token &IfToken,
3428 const Token &HashToken,
3429 bool ReadAnyTokensBeforeDirective) {
3430 ++NumIf;
3431
3432 // Parse and evaluate the conditional expression.
3433 IdentifierInfo *IfNDefMacro = nullptr;
3434 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3435 const bool ConditionalTrue = DER.Conditional;
3436 // Lexer might become invalid if we hit code completion point while evaluating
3437 // expression.
3438 if (!CurPPLexer)
3439 return;
3440
3441 // If this condition is equivalent to #ifndef X, and if this is the first
3442 // directive seen, handle it for the multiple-include optimization.
3443 if (CurPPLexer->getConditionalStackDepth() == 0) {
3444 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3445 // FIXME: Pass in the location of the macro name, not the 'if' token.
3446 CurPPLexer->MIOpt.EnterTopLevelIfndef(M: IfNDefMacro, Loc: IfToken.getLocation());
3447 else
3448 CurPPLexer->MIOpt.EnterTopLevelConditional();
3449 }
3450
3451 if (Callbacks)
3452 Callbacks->If(
3453 Loc: IfToken.getLocation(), ConditionRange: DER.ExprRange,
3454 ConditionValue: (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3455
3456 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3457 getSourceManager().isInMainFile(Loc: IfToken.getLocation());
3458
3459 // Should we include the stuff contained by this directive?
3460 if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3461 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3462 // the directive blocks.
3463 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3464 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3465 } else if (ConditionalTrue || RetainExcludedCB) {
3466 // Yes, remember that we are inside a conditional, then lex the next token.
3467 CurPPLexer->pushConditionalLevel(DirectiveStart: IfToken.getLocation(), /*wasskip*/WasSkipping: false,
3468 /*foundnonskip*/FoundNonSkip: true, /*foundelse*/FoundElse: false);
3469 } else {
3470 // No, skip the contents of this block.
3471 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: IfToken.getLocation(),
3472 /*Foundnonskip*/ FoundNonSkipPortion: false,
3473 /*FoundElse*/ false);
3474 }
3475}
3476
3477/// HandleEndifDirective - Implements the \#endif directive.
3478///
3479void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3480 ++NumEndif;
3481
3482 // Check that this is the whole directive.
3483 CheckEndOfDirective(DirType: "endif");
3484
3485 PPConditionalInfo CondInfo;
3486 if (CurPPLexer->popConditionalLevel(CI&: CondInfo)) {
3487 // No conditionals on the stack: this is an #endif without an #if.
3488 Diag(Tok: EndifToken, DiagID: diag::err_pp_endif_without_if);
3489 return;
3490 }
3491
3492 // If this the end of a top-level #endif, inform MIOpt.
3493 if (CurPPLexer->getConditionalStackDepth() == 0)
3494 CurPPLexer->MIOpt.ExitTopLevelConditional();
3495
3496 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3497 "This code should only be reachable in the non-skipping case!");
3498
3499 if (Callbacks)
3500 Callbacks->Endif(Loc: EndifToken.getLocation(), IfLoc: CondInfo.IfLoc);
3501}
3502
3503/// HandleElseDirective - Implements the \#else directive.
3504///
3505void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3506 ++NumElse;
3507
3508 // #else directive in a non-skipping conditional... start skipping.
3509 CheckEndOfDirective(DirType: "else");
3510
3511 PPConditionalInfo CI;
3512 if (CurPPLexer->popConditionalLevel(CI)) {
3513 Diag(Tok: Result, DiagID: diag::pp_err_else_without_if);
3514 return;
3515 }
3516
3517 // If this is a top-level #else, inform the MIOpt.
3518 if (CurPPLexer->getConditionalStackDepth() == 0)
3519 CurPPLexer->MIOpt.EnterTopLevelConditional();
3520
3521 // If this is a #else with a #else before it, report the error.
3522 if (CI.FoundElse) Diag(Tok: Result, DiagID: diag::pp_err_else_after_else);
3523
3524 if (Callbacks)
3525 Callbacks->Else(Loc: Result.getLocation(), IfLoc: CI.IfLoc);
3526
3527 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3528 getSourceManager().isInMainFile(Loc: Result.getLocation());
3529
3530 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3531 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3532 // the directive blocks.
3533 CurPPLexer->pushConditionalLevel(DirectiveStart: CI.IfLoc, /*wasskip*/WasSkipping: false,
3534 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: true);
3535 return;
3536 }
3537
3538 // Finally, skip the rest of the contents of this block.
3539 SkipExcludedConditionalBlock(HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc,
3540 /*Foundnonskip*/ FoundNonSkipPortion: true,
3541 /*FoundElse*/ true, ElseLoc: Result.getLocation());
3542}
3543
3544/// Implements the \#elif, \#elifdef, and \#elifndef directives.
3545void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3546 const Token &HashToken,
3547 tok::PPKeywordKind Kind) {
3548 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif
3549 : Kind == tok::pp_elifdef ? PED_Elifdef
3550 : PED_Elifndef;
3551 ++NumElse;
3552
3553 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3554 switch (DirKind) {
3555 case PED_Elifdef:
3556 case PED_Elifndef:
3557 unsigned DiagID;
3558 if (LangOpts.CPlusPlus)
3559 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3560 : diag::ext_cxx23_pp_directive;
3561 else
3562 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3563 : diag::ext_c23_pp_directive;
3564 Diag(Tok: ElifToken, DiagID) << DirKind;
3565 break;
3566 default:
3567 break;
3568 }
3569
3570 // #elif directive in a non-skipping conditional... start skipping.
3571 // We don't care what the condition is, because we will always skip it (since
3572 // the block immediately before it was included).
3573 SourceRange ConditionRange = DiscardUntilEndOfDirective();
3574
3575 PPConditionalInfo CI;
3576 if (CurPPLexer->popConditionalLevel(CI)) {
3577 Diag(Tok: ElifToken, DiagID: diag::pp_err_elif_without_if) << DirKind;
3578 return;
3579 }
3580
3581 // If this is a top-level #elif, inform the MIOpt.
3582 if (CurPPLexer->getConditionalStackDepth() == 0)
3583 CurPPLexer->MIOpt.EnterTopLevelConditional();
3584
3585 // If this is a #elif with a #else before it, report the error.
3586 if (CI.FoundElse)
3587 Diag(Tok: ElifToken, DiagID: diag::pp_err_elif_after_else) << DirKind;
3588
3589 if (Callbacks) {
3590 switch (Kind) {
3591 case tok::pp_elif:
3592 Callbacks->Elif(Loc: ElifToken.getLocation(), ConditionRange,
3593 ConditionValue: PPCallbacks::CVK_NotEvaluated, IfLoc: CI.IfLoc);
3594 break;
3595 case tok::pp_elifdef:
3596 Callbacks->Elifdef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3597 break;
3598 case tok::pp_elifndef:
3599 Callbacks->Elifndef(Loc: ElifToken.getLocation(), ConditionRange, IfLoc: CI.IfLoc);
3600 break;
3601 default:
3602 assert(false && "unexpected directive kind");
3603 break;
3604 }
3605 }
3606
3607 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3608 getSourceManager().isInMainFile(Loc: ElifToken.getLocation());
3609
3610 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3611 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3612 // the directive blocks.
3613 CurPPLexer->pushConditionalLevel(DirectiveStart: ElifToken.getLocation(), /*wasskip*/WasSkipping: false,
3614 /*foundnonskip*/FoundNonSkip: false, /*foundelse*/FoundElse: false);
3615 return;
3616 }
3617
3618 // Finally, skip the rest of the contents of this block.
3619 SkipExcludedConditionalBlock(
3620 HashTokenLoc: HashToken.getLocation(), IfTokenLoc: CI.IfLoc, /*Foundnonskip*/ FoundNonSkipPortion: true,
3621 /*FoundElse*/ CI.FoundElse, ElseLoc: ElifToken.getLocation());
3622}
3623
3624std::optional<LexEmbedParametersResult>
3625Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3626 LexEmbedParametersResult Result{};
3627 SmallVector<Token, 2> ParameterTokens;
3628 tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3629
3630 auto DiagMismatchedBracesAndSkipToEOD =
3631 [&](tok::TokenKind Expected,
3632 std::pair<tok::TokenKind, SourceLocation> Matches) {
3633 Diag(Tok: CurTok, DiagID: diag::err_expected) << Expected;
3634 Diag(Loc: Matches.second, DiagID: diag::note_matching) << Matches.first;
3635 if (CurTok.isNot(K: tok::eod))
3636 DiscardUntilEndOfDirective(Tmp&: CurTok);
3637 };
3638
3639 auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3640 if (CurTok.isNot(K: Kind)) {
3641 Diag(Tok: CurTok, DiagID: diag::err_expected) << Kind;
3642 if (CurTok.isNot(K: tok::eod))
3643 DiscardUntilEndOfDirective(Tmp&: CurTok);
3644 return false;
3645 }
3646 return true;
3647 };
3648
3649 // C23 6.10:
3650 // pp-parameter-name:
3651 // pp-standard-parameter
3652 // pp-prefixed-parameter
3653 //
3654 // pp-standard-parameter:
3655 // identifier
3656 //
3657 // pp-prefixed-parameter:
3658 // identifier :: identifier
3659 auto LexPPParameterName = [&]() -> std::optional<std::string> {
3660 // We expect the current token to be an identifier; if it's not, things
3661 // have gone wrong.
3662 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3663 return std::nullopt;
3664
3665 const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3666
3667 // Lex another token; it is either a :: or we're done with the parameter
3668 // name.
3669 LexNonComment(Result&: CurTok);
3670 if (CurTok.is(K: tok::coloncolon)) {
3671 // We found a ::, so lex another identifier token.
3672 LexNonComment(Result&: CurTok);
3673 if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3674 return std::nullopt;
3675
3676 const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3677
3678 // Lex another token so we're past the name.
3679 LexNonComment(Result&: CurTok);
3680 return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3681 }
3682 return Prefix->getName().str();
3683 };
3684
3685 // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3686 // this document as an identifier pp_param and an identifier of the form
3687 // __pp_param__ shall behave the same when used as a preprocessor parameter,
3688 // except for the spelling.
3689 auto NormalizeParameterName = [](StringRef Name) {
3690 if (Name.size() > 4 && Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "__"))
3691 return Name.substr(Start: 2, N: Name.size() - 4);
3692 return Name;
3693 };
3694
3695 auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3696 // we have a limit parameter and its internals are processed using
3697 // evaluation rules from #if.
3698 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3699 return std::nullopt;
3700
3701 // We do not consume the ( because EvaluateDirectiveExpression will lex
3702 // the next token for us.
3703 IdentifierInfo *ParameterIfNDef = nullptr;
3704 bool EvaluatedDefined;
3705 DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3706 IfNDefMacro&: ParameterIfNDef, Tok&: CurTok, EvaluatedDefined, /*CheckForEOD=*/CheckForEoD: false);
3707
3708 if (!LimitEvalResult.Value) {
3709 // If there was an error evaluating the directive expression, we expect
3710 // to be at the end of directive token.
3711 assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3712 return std::nullopt;
3713 }
3714
3715 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3716 return std::nullopt;
3717
3718 // Eat the ).
3719 LexNonComment(Result&: CurTok);
3720
3721 // C23 6.10.3.2p2: The token defined shall not appear within the constant
3722 // expression.
3723 if (EvaluatedDefined) {
3724 Diag(Tok: CurTok, DiagID: diag::err_defined_in_pp_embed);
3725 return std::nullopt;
3726 }
3727
3728 if (LimitEvalResult.Value) {
3729 const llvm::APSInt &Result = *LimitEvalResult.Value;
3730 if (Result.isNegative()) {
3731 Diag(Tok: CurTok, DiagID: diag::err_requires_positive_value)
3732 << toString(I: Result, Radix: 10) << /*positive*/ 0;
3733 return std::nullopt;
3734 }
3735 return Result.getLimitedValue();
3736 }
3737 return std::nullopt;
3738 };
3739
3740 auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3741 switch (Kind) {
3742 case tok::l_paren:
3743 return tok::r_paren;
3744 case tok::l_brace:
3745 return tok::r_brace;
3746 case tok::l_square:
3747 return tok::r_square;
3748 default:
3749 llvm_unreachable("should not get here");
3750 }
3751 };
3752
3753 auto LexParenthesizedBalancedTokenSoup =
3754 [&](llvm::SmallVectorImpl<Token> &Tokens) {
3755 std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3756
3757 // We expect the current token to be a left paren.
3758 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3759 return false;
3760 LexNonComment(Result&: CurTok); // Eat the (
3761
3762 bool WaitingForInnerCloseParen = false;
3763 while (CurTok.isNot(K: tok::eod) &&
3764 (WaitingForInnerCloseParen || CurTok.isNot(K: tok::r_paren))) {
3765 switch (CurTok.getKind()) {
3766 default: // Shutting up diagnostics about not fully-covered switch.
3767 break;
3768 case tok::l_paren:
3769 WaitingForInnerCloseParen = true;
3770 [[fallthrough]];
3771 case tok::l_brace:
3772 case tok::l_square:
3773 BracketStack.push_back(x: {CurTok.getKind(), CurTok.getLocation()});
3774 break;
3775 case tok::r_paren:
3776 WaitingForInnerCloseParen = false;
3777 [[fallthrough]];
3778 case tok::r_brace:
3779 case tok::r_square: {
3780 tok::TokenKind Matching =
3781 GetMatchingCloseBracket(BracketStack.back().first);
3782 if (BracketStack.empty() || CurTok.getKind() != Matching) {
3783 DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3784 return false;
3785 }
3786 BracketStack.pop_back();
3787 } break;
3788 }
3789 Tokens.push_back(Elt: CurTok);
3790 LexNonComment(Result&: CurTok);
3791 }
3792
3793 // When we're done, we want to eat the closing paren.
3794 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3795 return false;
3796
3797 LexNonComment(Result&: CurTok); // Eat the )
3798 return true;
3799 };
3800
3801 LexNonComment(Result&: CurTok); // Prime the pump.
3802 while (!CurTok.isOneOf(K1: EndTokenKind, K2: tok::eod)) {
3803 SourceLocation ParamStartLoc = CurTok.getLocation();
3804 std::optional<std::string> ParamName = LexPPParameterName();
3805 if (!ParamName)
3806 return std::nullopt;
3807 StringRef Parameter = NormalizeParameterName(*ParamName);
3808
3809 // Lex the parameters (dependent on the parameter type we want!).
3810 //
3811 // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3812 // one time in the embed parameter sequence.
3813 if (Parameter == "limit") {
3814 if (Result.MaybeLimitParam)
3815 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3816
3817 std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3818 if (!Limit)
3819 return std::nullopt;
3820 Result.MaybeLimitParam =
3821 PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3822 } else if (Parameter == "clang::offset") {
3823 if (Result.MaybeOffsetParam)
3824 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3825
3826 std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3827 if (!Offset)
3828 return std::nullopt;
3829 Result.MaybeOffsetParam = PPEmbedParameterOffset{
3830 *Offset, {ParamStartLoc, CurTok.getLocation()}};
3831 } else if (Parameter == "prefix") {
3832 if (Result.MaybePrefixParam)
3833 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3834
3835 SmallVector<Token, 4> Soup;
3836 if (!LexParenthesizedBalancedTokenSoup(Soup))
3837 return std::nullopt;
3838 Result.MaybePrefixParam = PPEmbedParameterPrefix{
3839 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3840 } else if (Parameter == "suffix") {
3841 if (Result.MaybeSuffixParam)
3842 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3843
3844 SmallVector<Token, 4> Soup;
3845 if (!LexParenthesizedBalancedTokenSoup(Soup))
3846 return std::nullopt;
3847 Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3848 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3849 } else if (Parameter == "if_empty") {
3850 if (Result.MaybeIfEmptyParam)
3851 Diag(Tok: CurTok, DiagID: diag::err_pp_embed_dup_params) << Parameter;
3852
3853 SmallVector<Token, 4> Soup;
3854 if (!LexParenthesizedBalancedTokenSoup(Soup))
3855 return std::nullopt;
3856 Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3857 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3858 } else {
3859 ++Result.UnrecognizedParams;
3860
3861 // If there's a left paren, we need to parse a balanced token sequence
3862 // and just eat those tokens.
3863 if (CurTok.is(K: tok::l_paren)) {
3864 SmallVector<Token, 4> Soup;
3865 if (!LexParenthesizedBalancedTokenSoup(Soup))
3866 return std::nullopt;
3867 }
3868 if (!ForHasEmbed) {
3869 Diag(Tok: CurTok, DiagID: diag::err_pp_unknown_parameter) << 1 << Parameter;
3870 return std::nullopt;
3871 }
3872 }
3873 }
3874 return Result;
3875}
3876
3877void Preprocessor::HandleEmbedDirectiveImpl(
3878 SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3879 StringRef BinaryContents) {
3880 if (BinaryContents.empty()) {
3881 // If we have no binary contents, the only thing we need to emit are the
3882 // if_empty tokens, if any.
3883 // FIXME: this loses AST fidelity; nothing in the compiler will see that
3884 // these tokens came from #embed. We have to hack around this when printing
3885 // preprocessed output. The same is true for prefix and suffix tokens.
3886 if (Params.MaybeIfEmptyParam) {
3887 ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3888 size_t TokCount = Toks.size();
3889 auto NewToks = std::make_unique<Token[]>(num: TokCount);
3890 llvm::copy(Range&: Toks, Out: NewToks.get());
3891 EnterTokenStream(Toks: std::move(NewToks), NumToks: TokCount, DisableMacroExpansion: true, IsReinject: true);
3892 }
3893 return;
3894 }
3895
3896 size_t NumPrefixToks = Params.PrefixTokenCount(),
3897 NumSuffixToks = Params.SuffixTokenCount();
3898 size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3899 size_t CurIdx = 0;
3900 auto Toks = std::make_unique<Token[]>(num: TotalNumToks);
3901
3902 // Add the prefix tokens, if any.
3903 if (Params.MaybePrefixParam) {
3904 llvm::copy(Range: Params.MaybePrefixParam->Tokens, Out: &Toks[CurIdx]);
3905 CurIdx += NumPrefixToks;
3906 }
3907
3908 EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3909 Data->BinaryData = BinaryContents;
3910
3911 Toks[CurIdx].startToken();
3912 Toks[CurIdx].setKind(tok::annot_embed);
3913 Toks[CurIdx].setAnnotationRange(HashLoc);
3914 Toks[CurIdx++].setAnnotationValue(Data);
3915
3916 // Now add the suffix tokens, if any.
3917 if (Params.MaybeSuffixParam) {
3918 llvm::copy(Range: Params.MaybeSuffixParam->Tokens, Out: &Toks[CurIdx]);
3919 CurIdx += NumSuffixToks;
3920 }
3921
3922 assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3923 EnterTokenStream(Toks: std::move(Toks), NumToks: TotalNumToks, DisableMacroExpansion: true, IsReinject: true);
3924}
3925
3926void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3927 const FileEntry *LookupFromFile) {
3928 // Give the usual extension/compatibility warnings.
3929 if (LangOpts.C23)
3930 Diag(Tok: EmbedTok, DiagID: diag::warn_compat_pp_embed_directive);
3931 else
3932 Diag(Tok: EmbedTok, DiagID: diag::ext_pp_embed_directive)
3933 << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3934
3935 // Parse the filename header
3936 Token FilenameTok;
3937 if (LexHeaderName(Result&: FilenameTok))
3938 return;
3939
3940 if (FilenameTok.isNot(K: tok::header_name)) {
3941 Diag(Loc: FilenameTok.getLocation(), DiagID: diag::err_pp_expects_filename);
3942 if (FilenameTok.isNot(K: tok::eod))
3943 DiscardUntilEndOfDirective();
3944 return;
3945 }
3946
3947 // Parse the optional sequence of
3948 // directive-parameters:
3949 // identifier parameter-name-list[opt] directive-argument-list[opt]
3950 // directive-argument-list:
3951 // '(' balanced-token-sequence ')'
3952 // parameter-name-list:
3953 // '::' identifier parameter-name-list[opt]
3954 Token CurTok;
3955 std::optional<LexEmbedParametersResult> Params =
3956 LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3957
3958 assert((Params || CurTok.is(tok::eod)) &&
3959 "expected success or to be at the end of the directive");
3960 if (!Params)
3961 return;
3962
3963 // Now, splat the data out!
3964 SmallString<128> FilenameBuffer;
3965 StringRef Filename = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
3966 StringRef OriginalFilename = Filename;
3967 bool isAngled =
3968 GetIncludeFilenameSpelling(Loc: FilenameTok.getLocation(), Buffer&: Filename);
3969 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
3970 // error.
3971 assert(!Filename.empty());
3972 OptionalFileEntryRef MaybeFileRef =
3973 this->LookupEmbedFile(Filename, isAngled, OpenFile: true, LookupFromFile);
3974 if (!MaybeFileRef) {
3975 // could not find file
3976 if (Callbacks && Callbacks->EmbedFileNotFound(FileName: OriginalFilename)) {
3977 return;
3978 }
3979 Diag(Tok: FilenameTok, DiagID: diag::err_pp_file_not_found) << Filename;
3980 return;
3981 }
3982 std::optional<llvm::MemoryBufferRef> MaybeFile =
3983 getSourceManager().getMemoryBufferForFileOrNone(File: *MaybeFileRef);
3984 if (!MaybeFile) {
3985 // could not find file
3986 Diag(Tok: FilenameTok, DiagID: diag::err_cannot_open_file)
3987 << Filename << "a buffer to the contents could not be created";
3988 return;
3989 }
3990 StringRef BinaryContents = MaybeFile->getBuffer();
3991
3992 // The order is important between 'offset' and 'limit'; we want to offset
3993 // first and then limit second; otherwise we may reduce the notional resource
3994 // size to something too small to offset into.
3995 if (Params->MaybeOffsetParam) {
3996 // FIXME: just like with the limit() and if_empty() parameters, this loses
3997 // source fidelity in the AST; it has no idea that there was an offset
3998 // involved.
3999 // offsets all the way to the end of the file make for an empty file.
4000 BinaryContents = BinaryContents.substr(Start: Params->MaybeOffsetParam->Offset);
4001 }
4002
4003 if (Params->MaybeLimitParam) {
4004 // FIXME: just like with the clang::offset() and if_empty() parameters,
4005 // this loses source fidelity in the AST; it has no idea there was a limit
4006 // involved.
4007 BinaryContents = BinaryContents.substr(Start: 0, N: Params->MaybeLimitParam->Limit);
4008 }
4009
4010 if (Callbacks)
4011 Callbacks->EmbedDirective(HashLoc, FileName: Filename, IsAngled: isAngled, File: MaybeFileRef,
4012 Params: *Params);
4013 HandleEmbedDirectiveImpl(HashLoc, Params: *Params, BinaryContents);
4014}
4015