TokenLexer.cpp source code [llvm_projects/clang/lib/Lex/TokenLexer.cpp]

1	//===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the TokenLexer interface.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "clang/Lex/TokenLexer.h"
14	#include "clang/Basic/Diagnostic.h"
15	#include "clang/Basic/IdentifierTable.h"
16	#include "clang/Basic/LangOptions.h"
17	#include "clang/Basic/SourceLocation.h"
18	#include "clang/Basic/SourceManager.h"
19	#include "clang/Basic/TokenKinds.h"
20	#include "clang/Lex/LexDiagnostic.h"
21	#include "clang/Lex/Lexer.h"
22	#include "clang/Lex/MacroArgs.h"
23	#include "clang/Lex/MacroInfo.h"
24	#include "clang/Lex/Preprocessor.h"
25	#include "clang/Lex/Token.h"
26	#include "clang/Lex/VariadicMacroSupport.h"
27	#include "llvm/ADT/ArrayRef.h"
28	#include "llvm/ADT/STLExtras.h"
29	#include "llvm/ADT/SmallVector.h"
30	#include "llvm/ADT/iterator_range.h"
31	#include <cassert>
32	#include <cstring>
33	#include <optional>
34
35	using namespace clang;
36
37	/// Create a TokenLexer for the specified macro with the specified actual
38	/// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
39	void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
40	MacroArgs *Actuals) {
41	// If the client is reusing a TokenLexer, make sure to free any memory
42	// associated with it.
43	destroy();
44
45	Macro = MI;
46	ActualArgs = Actuals;
47	CurTokenIdx = `0`;
48
49	ExpandLocStart = Tok.getLocation();
50	ExpandLocEnd = ELEnd;
51	AtStartOfLine = Tok.isAtStartOfLine();
52	HasLeadingSpace = Tok.hasLeadingSpace();
53	NextTokGetsSpace = false;
54	Tokens = &*Macro->tokens_begin();
55	OwnsTokens = false;
56	DisableMacroExpansion = false;
57	IsReinject = false;
58	NumTokens = Macro->tokens_end()-Macro->tokens_begin();
59	MacroExpansionStart = SourceLocation ();
60	LexingCXXModuleDirective = false;
61
62	SourceManager &SM = PP.getSourceManager();
63	MacroStartSLocOffset = SM.getNextLocalOffset();
64
65	if (NumTokens > `0`) {
66	assert(Tokens[`0`].getLocation().isValid());
67	assert((Tokens[`0`].getLocation().isFileID() \|\| Tokens[`0`].is(tok::comment)) &&
68	"Macro defined in macro?");
69	assert(ExpandLocStart.isValid());
70
71	// Reserve a source location entry chunk for the length of the macro
72	// definition. Tokens that get lexed directly from the definition will
73	// have their locations pointing inside this chunk. This is to avoid
74	// creating separate source location entries for each token.
75	MacroDefStart = SM.getExpansionLoc(Loc: Tokens[`0`].getLocation());
76	MacroDefLength = Macro->getDefinitionLength(SM);
77	MacroExpansionStart = SM.createExpansionLoc(SpellingLoc: MacroDefStart,
78	ExpansionLocStart: ExpandLocStart,
79	ExpansionLocEnd: ExpandLocEnd,
80	Length: MacroDefLength);
81	}
82
83	// If this is a function-like macro, expand the arguments and change
84	// Tokens to point to the expanded tokens.
85	if (Macro->isFunctionLike() && Macro->getNumParams())
86	ExpandFunctionArguments();
87
88	// Mark the macro as currently disabled, so that it is not recursively
89	// expanded. The macro must be disabled only after argument pre-expansion of
90	// function-like macro arguments occurs.
91	Macro->DisableMacro();
92	}
93
94	/// Create a TokenLexer for the specified token stream. This does not
95	/// take ownership of the specified token vector.
96	void TokenLexer::Init(const Token TokArray, unsigned* NumToks,
97	bool disableMacroExpansion, bool ownsTokens,
98	bool isReinject) {
99	assert(!isReinject \|\| disableMacroExpansion);
100	// If the client is reusing a TokenLexer, make sure to free any memory
101	// associated with it.
102	destroy();
103
104	Macro = nullptr;
105	ActualArgs = nullptr;
106	Tokens = TokArray;
107	OwnsTokens = ownsTokens;
108	DisableMacroExpansion = disableMacroExpansion;
109	IsReinject = isReinject;
110	NumTokens = NumToks;
111	CurTokenIdx = `0`;
112	ExpandLocStart = ExpandLocEnd = SourceLocation ();
113	AtStartOfLine = false;
114	HasLeadingSpace = false;
115	NextTokGetsSpace = false;
116	MacroExpansionStart = SourceLocation ();
117	LexingCXXModuleDirective = false;
118
119	// Set HasLeadingSpace/AtStartOfLine so that the first token will be
120	// returned unmodified.
121	if (NumToks != `0`) {
122	AtStartOfLine = TokArray[`0`].isAtStartOfLine();
123	HasLeadingSpace = TokArray[`0`].hasLeadingSpace();
124	}
125	}
126
127	void TokenLexer::destroy() {
128	// If this was a function-like macro that actually uses its arguments, delete
129	// the expanded tokens.
130	if (OwnsTokens) {
131	delete [] Tokens;
132	Tokens = nullptr;
133	OwnsTokens = false;
134	}
135
136	// TokenLexer owns its formal arguments.
137	if (ActualArgs) ActualArgs->destroy(PP);
138	}
139
140	bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
141	SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
142	unsigned MacroArgNo, Preprocessor &PP) {
143	// Is the macro argument __VA_ARGS__?
144	if (!Macro->isVariadic() \|\| MacroArgNo != Macro->getNumParams()-`1`)
145	return false;
146
147	// In Microsoft-compatibility mode, a comma is removed in the expansion
148	// of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
149	// not supported by gcc.
150	if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
151	return false;
152
153	// GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
154	// __VA_ARGS__ is empty, but not in strict C99 mode where there are no
155	// named arguments, where it remains. In all other modes, including C99
156	// with GNU extensions, it is removed regardless of named arguments.
157	// Microsoft also appears to support this extension, unofficially.
158	if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
159	&& Macro->getNumParams() < `2`)
160	return false;
161
162	// Is a comma available to be removed?
163	if (ResultToks.empty() \|\| !ResultToks.back().is(K: tok::comma))
164	return false;
165
166	// Issue an extension diagnostic for the paste operator.
167	if (HasPasteOperator)
168	PP.Diag(Loc: ResultToks.back().getLocation(), DiagID: diag::ext_paste_comma);
169
170	// Remove the comma.
171	ResultToks.pop_back();
172
173	if (!ResultToks.empty()) {
174	// If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
175	// then removal of the comma should produce a placemarker token (in C99
176	// terms) which we model by popping off the previous ##, giving us a plain
177	// "X" when __VA_ARGS__ is empty.
178	if (ResultToks.back().is(K: tok::hashhash))
179	ResultToks.pop_back();
180
181	// Remember that this comma was elided.
182	ResultToks.back().setFlag(Token::CommaAfterElided);
183	}
184
185	// Never add a space, even if the comma, ##, or arg had a space.
186	NextTokGetsSpace = false;
187	return true;
188	}
189
190	void TokenLexer::stringifyVAOPTContents(
191	SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx,
192	const SourceLocation VAOPTClosingParenLoc) {
193	const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt();
194	const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt;
195	Token *const VAOPTTokens =
196	NumVAOptTokens ? &ResultToks [NumToksPriorToVAOpt] : nullptr;
197
198	SmallVector<Token, `64`> ConcatenatedVAOPTResultToks;
199	// FIXME: Should we keep track within VCtx that we did or didnot
200	// encounter pasting - and only then perform this loop.
201
202	// Perform token pasting (concatenation) prior to stringization.
203	for (unsigned int CurTokenIdx = `0`; CurTokenIdx != NumVAOptTokens;
204	++CurTokenIdx) {
205	if (VAOPTTokens[CurTokenIdx].is(K: tok::hashhash)) {
206	assert(CurTokenIdx != `0` &&
207	"Can not have __VAOPT__ contents begin with a ##");
208	Token &LHS = VAOPTTokens[CurTokenIdx - `1`];
209	pasteTokens(LHSTok&: LHS, TokenStream: llvm::ArrayRef(VAOPTTokens, NumVAOptTokens),
210	CurIdx&: CurTokenIdx);
211	// Replace the token prior to the first ## in this iteration.
212	ConcatenatedVAOPTResultToks.back() = LHS;
213	if (CurTokenIdx == NumVAOptTokens)
214	break;
215	}
216	ConcatenatedVAOPTResultToks.push_back(Elt: VAOPTTokens[CurTokenIdx]);
217	}
218
219	ConcatenatedVAOPTResultToks.push_back(Elt: VCtx.getEOFTok());
220	// Get the SourceLocation that represents the start location within
221	// the macro definition that marks where this string is substituted
222	// into: i.e. the __VA_OPT__ and the ')' within the spelling of the
223	// macro definition, and use it to indicate that the stringified token
224	// was generated from that location.
225	const SourceLocation ExpansionLocStartWithinMacro =
226	getExpansionLocForMacroDefLoc(loc: VCtx.getVAOptLoc());
227	const SourceLocation ExpansionLocEndWithinMacro =
228	getExpansionLocForMacroDefLoc(loc: VAOPTClosingParenLoc);
229
230	Token StringifiedVAOPT = MacroArgs::StringifyArgument(
231	ArgToks: &ConcatenatedVAOPTResultToks [`0`], PP, Charify: VCtx.hasCharifyBefore() /Charify/,
232	ExpansionLocStart: ExpansionLocStartWithinMacro, ExpansionLocEnd: ExpansionLocEndWithinMacro);
233
234	if (VCtx.getLeadingSpaceForStringifiedToken())
235	StringifiedVAOPT.setFlag(Token::LeadingSpace);
236
237	StringifiedVAOPT.setFlag(Token::StringifiedInMacro);
238	// Resize (shrink) the token stream to just capture this stringified token.
239	ResultToks.resize(N: NumToksPriorToVAOpt + `1`);
240	ResultToks.back() = StringifiedVAOPT;
241	}
242
243	/// Expand the arguments of a function-like macro so that we can quickly
244	/// return preexpanded tokens from Tokens.
245	void TokenLexer::ExpandFunctionArguments() {
246	SmallVector<Token, `128`> ResultToks;
247
248	// Loop through 'Tokens', expanding them into ResultToks. Keep
249	// track of whether we change anything. If not, no need to keep them. If so,
250	// we install the newly expanded sequence as the new 'Tokens' list.
251	bool MadeChange = false;
252
253	std::optional<bool> CalledWithVariadicArguments;
254
255	VAOptExpansionContext VCtx(PP);
256
257	for (unsigned I = `0`, E = NumTokens; I != E; ++I) {
258	const Token &CurTok = Tokens[I];
259	// We don't want a space for the next token after a paste
260	// operator. In valid code, the token will get smooshed onto the
261	// preceding one anyway. In assembler-with-cpp mode, invalid
262	// pastes are allowed through: in this case, we do not want the
263	// extra whitespace to be added. For example, we want ". ## foo"
264	// -> ".foo" not ". foo".
265	if (I != `0` && !Tokens[I-`1`].is(K: tok::hashhash) && CurTok.hasLeadingSpace())
266	NextTokGetsSpace = true;
267
268	if (VCtx.isVAOptToken(T: CurTok)) {
269	MadeChange = true;
270	assert(Tokens[I + `1`].is(tok::l_paren) &&
271	"__VA_OPT__ must be followed by '('");
272
273	++I; // Skip the l_paren
274	VCtx.sawVAOptFollowedByOpeningParens(VAOptLoc: CurTok.getLocation(),
275	NumPriorTokens: ResultToks.size());
276
277	continue;
278	}
279
280	// We have entered into the __VA_OPT__ context, so handle tokens
281	// appropriately.
282	if (VCtx.isInVAOpt()) {
283	// If we are about to process a token that is either an argument to
284	// __VA_OPT__ or its closing rparen, then:
285	// 1) If the token is the closing rparen that exits us out of __VA_OPT__,
286	// perform any necessary stringification or placemarker processing,
287	// and/or skip to the next token.
288	// 2) else if macro was invoked without variadic arguments skip this
289	// token.
290	// 3) else (macro was invoked with variadic arguments) process the token
291	// normally.
292
293	if (Tokens[I].is(K: tok::l_paren))
294	VCtx.sawOpeningParen(LParenLoc: Tokens[I].getLocation());
295	// Continue skipping tokens within __VA_OPT__ if the macro was not
296	// called with variadic arguments, else let the rest of the loop handle
297	// this token. Note sawClosingParen() returns true only if the r_paren matches
298	// the closing r_paren of the __VA_OPT__.
299	if (!Tokens[I].is(K: tok::r_paren) \|\| !VCtx.sawClosingParen()) {
300	// Lazily expand __VA_ARGS__ when we see the first __VA_OPT__.
301	if (!CalledWithVariadicArguments) {
302	CalledWithVariadicArguments =
303	ActualArgs->invokedWithVariadicArgument(MI: Macro, PP);
304	}
305	if (!*CalledWithVariadicArguments) {
306	// Skip this token.
307	continue;
308	}
309	// ... else the macro was called with variadic arguments, and we do not
310	// have a closing rparen - so process this token normally.
311	} else {
312	// Current token is the closing r_paren which marks the end of the
313	// __VA_OPT__ invocation, so handle any place-marker pasting (if
314	// empty) by removing hashhash either before (if exists) or after. And
315	// also stringify the entire contents if VAOPT was preceded by a hash,
316	// but do so only after any token concatenation that needs to occur
317	// within the contents of VAOPT.
318
319	if (VCtx.hasStringifyOrCharifyBefore()) {
320	// Replace all the tokens just added from within VAOPT into a single
321	// stringified token. This requires token-pasting to eagerly occur
322	// within these tokens. If either the contents of VAOPT were empty
323	// or the macro wasn't called with any variadic arguments, the result
324	// is a token that represents an empty string.
325	stringifyVAOPTContents(ResultToks, VCtx,
326	/ClosingParenLoc/ VAOPTClosingParenLoc: Tokens[I].getLocation());
327
328	} else if (/No tokens within VAOPT/
329	ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) {
330	// Treat VAOPT as a placemarker token. Eat either the '##' before the
331	// RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
332	// hashhash was not a placemarker) or the '##'
333	// after VAOPT, but not both.
334
335	if (ResultToks.size() && ResultToks.back().is(K: tok::hashhash)) {
336	ResultToks.pop_back();
337	} else if ((I + `1` != E) && Tokens[I + `1`].is(K: tok::hashhash)) {
338	++I; // Skip the following hashhash.
339	}
340	} else {
341	// If there's a ## before the __VA_OPT__, we might have discovered
342	// that the __VA_OPT__ begins with a placeholder. We delay action on
343	// that to now to avoid messing up our stashed count of tokens before
344	// __VA_OPT__.
345	if (VCtx.beginsWithPlaceholder()) {
346	assert(VCtx.getNumberOfTokensPriorToVAOpt() > `0` &&
347	ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() &&
348	ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - `1`].is(
349	tok::hashhash) &&
350	"no token paste before __VA_OPT__");
351	ResultToks.erase(CI: ResultToks.begin() +
352	VCtx.getNumberOfTokensPriorToVAOpt() - `1`);
353	}
354	// If the expansion of __VA_OPT__ ends with a placeholder, eat any
355	// following '##' token.
356	if (VCtx.endsWithPlaceholder() && I + `1` != E &&
357	Tokens[I + `1`].is(K: tok::hashhash)) {
358	++I;
359	}
360	}
361	VCtx.reset();
362	// We processed __VA_OPT__'s closing paren (and the exit out of
363	// __VA_OPT__), so skip to the next token.
364	continue;
365	}
366	}
367
368	// If we found the stringify operator, get the argument stringified. The
369	// preprocessor already verified that the following token is a macro
370	// parameter or __VA_OPT__ when the #define was lexed.
371
372	if (CurTok.isOneOf(Ks: tok::hash, Ks: tok::hashat)) {
373	int ArgNo = Macro->getParameterNum(Arg: Tokens[I+`1`].getIdentifierInfo());
374	assert((ArgNo != -`1` \|\| VCtx.isVAOptToken(Tokens[I + `1`])) &&
375	"Token following # is not an argument or __VA_OPT__!");
376
377	if (ArgNo == -`1`) {
378	// Handle the __VA_OPT__ case.
379	VCtx.sawHashOrHashAtBefore(HasLeadingSpace: NextTokGetsSpace,
380	IsHashAt: CurTok.is(K: tok::hashat));
381	continue;
382	}
383	// Else handle the simple argument case.
384	SourceLocation ExpansionLocStart =
385	getExpansionLocForMacroDefLoc(loc: CurTok.getLocation());
386	SourceLocation ExpansionLocEnd =
387	getExpansionLocForMacroDefLoc(loc: Tokens[I+`1`].getLocation());
388
389	bool Charify = CurTok.is(K: tok::hashat);
390	const Token *UnexpArg = ActualArgs->getUnexpArgument(Arg: ArgNo);
391	Token Res = MacroArgs::StringifyArgument(
392	ArgToks: UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd);
393	Res.setFlag(Token::StringifiedInMacro);
394
395	// The stringified/charified string leading space flag gets set to match
396	// the #/#@ operator.
397	if (NextTokGetsSpace)
398	Res.setFlag(Token::LeadingSpace);
399
400	ResultToks.push_back(Elt: Res);
401	MadeChange = true;
402	++I; // Skip arg name.
403	NextTokGetsSpace = false;
404	continue;
405	}
406
407	// Find out if there is a paste (##) operator before or after the token.
408	bool NonEmptyPasteBefore =
409	!ResultToks.empty() && ResultToks.back().is(K: tok::hashhash);
410	bool PasteBefore = I != `0` && Tokens[I-`1`].is(K: tok::hashhash);
411	bool PasteAfter = I+`1` != E && Tokens[I+`1`].is(K: tok::hashhash);
412	bool RParenAfter = I+`1` != E && Tokens[I+`1`].is(K: tok::r_paren);
413
414	assert((!NonEmptyPasteBefore \|\| PasteBefore \|\| VCtx.isInVAOpt()) &&
415	"unexpected ## in ResultToks");
416
417	// Otherwise, if this is not an argument token, just add the token to the
418	// output buffer.
419	IdentifierInfo *II = CurTok.getIdentifierInfo();
420	int ArgNo = II ? Macro->getParameterNum(Arg: II) : -`1`;
421	if (ArgNo == -`1`) {
422	// This isn't an argument, just add it.
423	ResultToks.push_back(Elt: CurTok);
424
425	if (NextTokGetsSpace) {
426	ResultToks.back().setFlag(Token::LeadingSpace);
427	NextTokGetsSpace = false;
428	} else if (PasteBefore && !NonEmptyPasteBefore)
429	ResultToks.back().clearFlag(Flag: Token::LeadingSpace);
430
431	continue;
432	}
433
434	// An argument is expanded somehow, the result is different than the
435	// input.
436	MadeChange = true;
437
438	// Otherwise, this is a use of the argument.
439
440	// In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
441	// are no trailing commas if __VA_ARGS__ is empty.
442	if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
443	MaybeRemoveCommaBeforeVaArgs(ResultToks,
444	/HasPasteOperator=/false,
445	Macro, MacroArgNo: ArgNo, PP))
446	continue;
447
448	// If it is not the LHS/RHS of a ## operator, we must pre-expand the
449	// argument and substitute the expanded tokens into the result. This is
450	// C99 6.10.3.1p1.
451	if (!PasteBefore && !PasteAfter) {
452	const Token *ResultArgToks;
453
454	// Only preexpand the argument if it could possibly need it. This
455	// avoids some work in common cases.
456	const Token *ArgTok = ActualArgs->getUnexpArgument(Arg: ArgNo);
457	if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
458	ResultArgToks = &ActualArgs->getPreExpArgument(Arg: ArgNo, PP)[`0`];
459	else
460	ResultArgToks = ArgTok; // Use non-preexpanded tokens.
461
462	// If the arg token expanded into anything, append it.
463	if (ResultArgToks->isNot(K: tok::eof)) {
464	size_t FirstResult = ResultToks.size();
465	unsigned NumToks = MacroArgs::getArgLength(ArgPtr: ResultArgToks);
466	ResultToks.append(in_start: ResultArgToks, in_end: ResultArgToks+NumToks);
467
468	// In Microsoft-compatibility mode, we follow MSVC's preprocessing
469	// behavior by not considering single commas from nested macro
470	// expansions as argument separators. Set a flag on the token so we can
471	// test for this later when the macro expansion is processed.
472	if (PP.getLangOpts().MSVCCompat && NumToks == `1` &&
473	ResultToks.back().is(K: tok::comma))
474	ResultToks.back().setFlag(Token::IgnoredComma);
475
476	// If the '##' came from expanding an argument, turn it into 'unknown'
477	// to avoid pasting.
478	for (Token &Tok : llvm::drop_begin(RangeOrContainer&: ResultToks, N: FirstResult))
479	if (Tok.is(K: tok::hashhash))
480	Tok.setKind(tok::unknown);
481
482	if(ExpandLocStart.isValid()) {
483	updateLocForMacroArgTokens(ArgIdSpellLoc: CurTok.getLocation(),
484	begin_tokens: ResultToks.begin()+FirstResult,
485	end_tokens: ResultToks.end());
486	}
487
488	// If any tokens were substituted from the argument, the whitespace
489	// before the first token should match the whitespace of the arg
490	// identifier.
491	ResultToks [FirstResult].setFlagValue(Flag: Token::LeadingSpace,
492	Val: NextTokGetsSpace);
493	ResultToks [FirstResult].setFlagValue(Flag: Token::StartOfLine, Val: false);
494	NextTokGetsSpace = false;
495	} else {
496	// We're creating a placeholder token. Usually this doesn't matter,
497	// but it can affect paste behavior when at the start or end of a
498	// __VA_OPT__.
499	if (NonEmptyPasteBefore) {
500	// We're imagining a placeholder token is inserted here. If this is
501	// the first token in a __VA_OPT__ after a ##, delete the ##.
502	assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__");
503	VCtx.hasPlaceholderAfterHashhashAtStart();
504	} else if (RParenAfter)
505	VCtx.hasPlaceholderBeforeRParen();
506	}
507	continue;
508	}
509
510	// Okay, we have a token that is either the LHS or RHS of a paste (##)
511	// argument. It gets substituted as its non-pre-expanded tokens.
512	const Token *ArgToks = ActualArgs->getUnexpArgument(Arg: ArgNo);
513	unsigned NumToks = MacroArgs::getArgLength(ArgPtr: ArgToks);
514	if (NumToks) { // Not an empty argument?
515	bool VaArgsPseudoPaste = false;
516	// If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
517	// that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
518	// the expander tries to paste ',' with the first token of the __VA_ARGS__
519	// expansion.
520	if (NonEmptyPasteBefore && ResultToks.size() >= `2` &&
521	ResultToks [ResultToks.size()-`2`].is(K: tok::comma) &&
522	(unsigned)ArgNo == Macro->getNumParams()-`1` &&
523	Macro->isVariadic()) {
524	VaArgsPseudoPaste = true;
525	// Remove the paste operator, report use of the extension.
526	PP.Diag(Loc: ResultToks.pop_back_val().getLocation(), DiagID: diag::ext_paste_comma);
527	}
528
529	ResultToks.append(in_start: ArgToks, in_end: ArgToks+NumToks);
530
531	// If the '##' came from expanding an argument, turn it into 'unknown'
532	// to avoid pasting.
533	for (Token &Tok : llvm::make_range(x: ResultToks.end() - NumToks,
534	y: ResultToks.end())) {
535	if (Tok.is(K: tok::hashhash))
536	Tok.setKind(tok::unknown);
537	}
538
539	if (ExpandLocStart.isValid()) {
540	updateLocForMacroArgTokens(ArgIdSpellLoc: CurTok.getLocation(),
541	begin_tokens: ResultToks.end()-NumToks, end_tokens: ResultToks.end());
542	}
543
544	// Transfer the leading whitespace information from the token
545	// (the macro argument) onto the first token of the
546	// expansion. Note that we don't do this for the GNU
547	// pseudo-paste extension ", ## __VA_ARGS__".
548	if (!VaArgsPseudoPaste) {
549	ResultToks [ResultToks.size() - NumToks].setFlagValue(Flag: Token::StartOfLine,
550	Val: false);
551	ResultToks [ResultToks.size() - NumToks].setFlagValue(
552	Flag: Token::LeadingSpace, Val: NextTokGetsSpace);
553	}
554
555	NextTokGetsSpace = false;
556	continue;
557	}
558
559	// If an empty argument is on the LHS or RHS of a paste, the standard (C99
560	// 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
561	// implement this by eating ## operators when a LHS or RHS expands to
562	// empty.
563	if (PasteAfter) {
564	// Discard the argument token and skip (don't copy to the expansion
565	// buffer) the paste operator after it.
566	++I;
567	continue;
568	}
569
570	if (RParenAfter && !NonEmptyPasteBefore)
571	VCtx.hasPlaceholderBeforeRParen();
572
573	// If this is on the RHS of a paste operator, we've already copied the
574	// paste operator to the ResultToks list, unless the LHS was empty too.
575	// Remove it.
576	assert(PasteBefore);
577	if (NonEmptyPasteBefore) {
578	assert(ResultToks.back().is(tok::hashhash));
579	// Do not remove the paste operator if it is the one before __VA_OPT__
580	// (and we are still processing tokens within VA_OPT). We handle the case
581	// of removing the paste operator if __VA_OPT__ reduces to the notional
582	// placemarker above when we encounter the closing paren of VA_OPT.
583	if (!VCtx.isInVAOpt() \|\|
584	ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
585	ResultToks.pop_back();
586	else
587	VCtx.hasPlaceholderAfterHashhashAtStart();
588	}
589
590	// If this is the __VA_ARGS__ token, and if the argument wasn't provided,
591	// and if the macro had at least one real argument, and if the token before
592	// the ## was a comma, remove the comma. This is a GCC extension which is
593	// disabled when using -std=c99.
594	if (ActualArgs->isVarargsElidedUse())
595	MaybeRemoveCommaBeforeVaArgs(ResultToks,
596	/HasPasteOperator=/true,
597	Macro, MacroArgNo: ArgNo, PP);
598	}
599
600	// If anything changed, install this as the new Tokens list.
601	if (MadeChange) {
602	assert(!OwnsTokens && "This would leak if we already own the token list");
603	// This is deleted in the dtor.
604	NumTokens = ResultToks.size();
605	// The tokens will be added to Preprocessor's cache and will be removed
606	// when this TokenLexer finishes lexing them.
607	Tokens = PP.cacheMacroExpandedTokens(tokLexer: this, tokens: ResultToks);
608
609	// The preprocessor cache of macro expanded tokens owns these tokens,not us.
610	OwnsTokens = false;
611	}
612	}
613
614	/// Checks if two tokens form wide string literal.
615	static bool isWideStringLiteralFromMacro(const Token &FirstTok,
616	const Token &SecondTok) {
617	return FirstTok.is(K: tok::identifier) &&
618	FirstTok.getIdentifierInfo()->isStr(Str: "L") && SecondTok.isLiteral() &&
619	SecondTok.stringifiedInMacro();
620	}
621
622	/// Lex - Lex and return a token from this macro stream.
623	bool TokenLexer::Lex(Token &Tok) {
624	// Lexing off the end of the macro, pop this macro off the expansion stack.
625	if (isAtEnd()) {
626	// If this is a macro (not a token stream), mark the macro enabled now
627	// that it is no longer being expanded.
628	if (Macro) Macro->EnableMacro();
629
630	// CWG2947: Allow the following code:
631	//
632	// export module m; int x;
633	// extern "C++" int y = &x;*
634	//
635	// The 'extern' token should has 'StartOfLine' flag when current TokenLexer
636	// exits and propagate line start/leading space info.
637	if (!Macro && isLexingCXXModuleDirective()) {
638	AtStartOfLine = true;
639	setLexingCXXModuleDirective(false);
640	}
641
642	Tok.startToken();
643	Tok.setFlagValue(Flag: Token::StartOfLine , Val: AtStartOfLine);
644	Tok.setFlagValue(Flag: Token::LeadingSpace, Val: HasLeadingSpace \|\| NextTokGetsSpace);
645	if (CurTokenIdx == `0`)
646	Tok.setFlag(Token::LeadingEmptyMacro);
647	return PP.HandleEndOfTokenLexer(Result&: Tok);
648	}
649
650	SourceManager &SM = PP.getSourceManager();
651
652	// If this is the first token of the expanded result, we inherit spacing
653	// properties later.
654	bool isFirstToken = CurTokenIdx == `0`;
655
656	// Get the next token to return.
657	Tok = Tokens[CurTokenIdx++];
658	if (IsReinject)
659	Tok.setFlag(Token::IsReinjected);
660
661	bool TokenIsFromPaste = false;
662
663	// If this token is followed by a token paste (##) operator, paste the tokens!
664	// Note that ## is a normal token when not expanding a macro.
665	if (!isAtEnd() && Macro &&
666	(Tokens[CurTokenIdx].is(K: tok::hashhash) \|\|
667	// Special processing of L#x macros in -fms-compatibility mode.
668	// Microsoft compiler is able to form a wide string literal from
669	// 'L#macro_arg' construct in a function-like macro.
670	(PP.getLangOpts().MSVCCompat &&
671	isWideStringLiteralFromMacro(FirstTok: Tok, SecondTok: Tokens[CurTokenIdx])))) {
672	// When handling the microsoft /##/ extension, the final token is
673	// returned by pasteTokens, not the pasted token.
674	if (pasteTokens(Tok))
675	return true;
676
677	TokenIsFromPaste = true;
678	}
679
680	// The token's current location indicate where the token was lexed from. We
681	// need this information to compute the spelling of the token, but any
682	// diagnostics for the expanded token should appear as if they came from
683	// ExpansionLoc. Pull this information together into a new SourceLocation
684	// that captures all of this.
685	if (ExpandLocStart.isValid() && // Don't do this for token streams.
686	// Check that the token's location was not already set properly.
687	SM.isBeforeInSLocAddrSpace(LHS: Tok.getLocation(), RHS: MacroStartSLocOffset)) {
688	SourceLocation instLoc;
689	if (Tok.is(K: tok::comment)) {
690	instLoc = SM.createExpansionLoc(SpellingLoc: Tok.getLocation(),
691	ExpansionLocStart: ExpandLocStart,
692	ExpansionLocEnd: ExpandLocEnd,
693	Length: Tok.getLength());
694	} else {
695	instLoc = getExpansionLocForMacroDefLoc(loc: Tok.getLocation());
696	}
697
698	Tok.setLocation(instLoc);
699	}
700
701	// If this is the first token, set the lexical properties of the token to
702	// match the lexical properties of the macro identifier.
703	if (isFirstToken) {
704	Tok.setFlagValue(Flag: Token::StartOfLine , Val: AtStartOfLine);
705	Tok.setFlagValue(Flag: Token::LeadingSpace, Val: HasLeadingSpace);
706	} else {
707	// If this is not the first token, we may still need to pass through
708	// leading whitespace if we've expanded a macro.
709	if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
710	if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
711	}
712	AtStartOfLine = false;
713	HasLeadingSpace = false;
714
715	// Handle recursive expansion!
716	if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr &&
717	(!PP.getLangOpts().CPlusPlusModules \|\|
718	!Tok.isModuleContextualKeyword())) {
719	// Change the kind of this identifier to the appropriate token kind, e.g.
720	// turning "for" into a keyword.
721	IdentifierInfo *II = Tok.getIdentifierInfo();
722	Tok.setKind(II->getTokenID());
723
724	// If this identifier was poisoned and from a paste, emit an error. This
725	// won't be handled by Preprocessor::HandleIdentifier because this is coming
726	// from a macro expansion.
727	if (II->isPoisoned() && TokenIsFromPaste) {
728	PP.HandlePoisonedIdentifier(Identifier&: Tok);
729	}
730
731	if (!DisableMacroExpansion && II->isHandleIdentifierCase())
732	return PP.HandleIdentifier(Identifier&: Tok);
733	}
734
735	// Otherwise, return a normal token.
736	return true;
737	}
738
739	bool TokenLexer::pasteTokens(Token &Tok) {
740	return pasteTokens(LHSTok&: Tok, TokenStream: llvm::ArrayRef(Tokens, NumTokens), CurIdx&: CurTokenIdx);
741	}
742
743	/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##
744	/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
745	/// are more ## after it, chomp them iteratively. Return the result as LHSTok.
746	/// If this returns true, the caller should immediately return the token.
747	bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
748	unsigned int &CurIdx) {
749	assert(CurIdx > `0` && "## can not be the first token within tokens");
750	assert((TokenStream[CurIdx].is(tok::hashhash) \|\|
751	(PP.getLangOpts().MSVCCompat &&
752	isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) &&
753	"Token at this Index must be ## or part of the MSVC 'L "
754	"#macro-arg' pasting pair");
755
756	// MSVC: If previous token was pasted, this must be a recovery from an invalid
757	// paste operation. Ignore spaces before this token to mimic MSVC output.
758	// Required for generating valid UUID strings in some MS headers.
759	if (PP.getLangOpts().MicrosoftExt && (CurIdx >= `2`) &&
760	TokenStream [CurIdx - `2`].is(K: tok::hashhash))
761	LHSTok.clearFlag(Flag: Token::LeadingSpace);
762
763	SmallString<`128`> Buffer;
764	const char ResultTokStrPtr = nullptr*;
765	SourceLocation StartLoc = LHSTok.getLocation();
766	SourceLocation PasteOpLoc;
767	bool HasUCNs = false;
768
769	auto IsAtEnd = [&TokenStream, &CurIdx] {
770	return TokenStream.size() == CurIdx;
771	};
772
773	do {
774	// Consume the ## operator if any.
775	PasteOpLoc = TokenStream [CurIdx].getLocation();
776	if (TokenStream [CurIdx].is(K: tok::hashhash))
777	++CurIdx;
778	assert(!IsAtEnd() && "No token on the RHS of a paste operator!");
779
780	// Get the RHS token.
781	const Token &RHS = TokenStream [CurIdx];
782
783	// Allocate space for the result token. This is guaranteed to be enough for
784	// the two tokens.
785	Buffer.resize(N: LHSTok.getLength() + RHS.getLength());
786
787	// Get the spelling of the LHS token in Buffer.
788	const char *BufPtr = &Buffer [`0`];
789	bool Invalid = false;
790	unsigned LHSLen = PP.getSpelling(Tok: LHSTok, Buffer&: BufPtr, Invalid: &Invalid);
791	if (BufPtr != &Buffer [`0`]) // Really, we want the chars in Buffer!
792	memcpy(dest: &Buffer [`0`], src: BufPtr, n: LHSLen);
793	if (Invalid)
794	return true;
795
796	BufPtr = Buffer.data() + LHSLen;
797	unsigned RHSLen = PP.getSpelling(Tok: RHS, Buffer&: BufPtr, Invalid: &Invalid);
798	if (Invalid)
799	return true;
800	if (RHSLen && BufPtr != &Buffer [LHSLen])
801	// Really, we want the chars in Buffer!
802	memcpy(dest: &Buffer [LHSLen], src: BufPtr, n: RHSLen);
803
804	// Trim excess space.
805	Buffer.resize(N: LHSLen+RHSLen);
806
807	// Plop the pasted result (including the trailing newline and null) into a
808	// scratch buffer where we can lex it.
809	Token ResultTokTmp;
810	ResultTokTmp.startToken();
811
812	// Claim that the tmp token is a string_literal so that we can get the
813	// character pointer back from CreateString in getLiteralData().
814	ResultTokTmp.setKind(tok::string_literal);
815	PP.CreateString(Str: Buffer, Tok&: ResultTokTmp);
816	SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
817	ResultTokStrPtr = ResultTokTmp.getLiteralData();
818
819	// Lex the resultant pasted token into Result.
820	Token Result;
821
822	if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
823	// Common paste case: identifier+identifier = identifier. Avoid creating
824	// a lexer and other overhead.
825	PP.IncrementPasteCounter(isFast: true);
826	Result.startToken();
827	Result.setKind(tok::raw_identifier);
828	Result.setRawIdentifierData(ResultTokStrPtr);
829	Result.setLocation(ResultTokLoc);
830	Result.setLength(LHSLen+RHSLen);
831	} else {
832	PP.IncrementPasteCounter(isFast: false);
833
834	assert(ResultTokLoc.isFileID() &&
835	"Should be a raw location into scratch buffer");
836	SourceManager &SourceMgr = PP.getSourceManager();
837	FileID LocFileID = SourceMgr.getFileID(SpellingLoc: ResultTokLoc);
838
839	bool Invalid = false;
840	const char *ScratchBufStart
841	= SourceMgr.getBufferData(FID: LocFileID, Invalid: &Invalid).data();
842	if (Invalid)
843	return false;
844
845	// Make a lexer to lex this string from. Lex just this one token.
846	// Make a lexer object so that we lex and expand the paste result.
847	Lexer TL(SourceMgr.getLocForStartOfFile(FID: LocFileID),
848	PP.getLangOpts(), ScratchBufStart,
849	ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
850
851	// Lex a token in raw mode. This way it won't look up identifiers
852	// automatically, lexing off the end will return an eof token, and
853	// warnings are disabled. This returns true if the result token is the
854	// entire buffer.
855	bool isInvalid = !TL.LexFromRawLexer(Result);
856
857	// If we got an EOF token, we didn't form even ONE token. For example, we
858	// did "/ ## /" to get "//".
859	isInvalid \|= Result.is(K: tok::eof);
860
861	// If pasting the two tokens didn't form a full new token, this is an
862	// error. This occurs with "x ## +" and other stuff. Return with LHSTok
863	// unmodified and with RHS as the next token to lex.
864	if (isInvalid) {
865	// Explicitly convert the token location to have proper expansion
866	// information so that the user knows where it came from.
867	SourceManager &SM = PP.getSourceManager();
868	SourceLocation Loc =
869	SM.createExpansionLoc(SpellingLoc: PasteOpLoc, ExpansionLocStart: ExpandLocStart, ExpansionLocEnd: ExpandLocEnd, Length: `2`);
870
871	// Test for the Microsoft extension of /##/ turning into // here on the
872	// error path.
873	if (PP.getLangOpts().MicrosoftExt && LHSTok.is(K: tok::slash) &&
874	RHS.is(K: tok::slash)) {
875	HandleMicrosoftCommentPaste(Tok&: LHSTok, OpLoc: Loc);
876	return true;
877	}
878
879	// Do not emit the error when preprocessing assembler code.
880	if (!PP.getLangOpts().AsmPreprocessor) {
881	// If we're in microsoft extensions mode, downgrade this from a hard
882	// error to an extension that defaults to an error. This allows
883	// disabling it.
884	PP.Diag(Loc, DiagID: PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
885	: diag::err_pp_bad_paste)
886	<< Buffer;
887	}
888
889	// An error has occurred so exit loop.
890	break;
891	}
892
893	// Turn ## into 'unknown' to avoid # ## # from looking like a paste
894	// operator.
895	if (Result.is(K: tok::hashhash))
896	Result.setKind(tok::unknown);
897	}
898
899	// Transfer properties of the LHS over the Result.
900	Result.setFlagValue(Flag: Token::StartOfLine , Val: LHSTok.isAtStartOfLine());
901	Result.setFlagValue(Flag: Token::LeadingSpace, Val: LHSTok.hasLeadingSpace());
902
903	// Finally, replace LHS with the result, consume the RHS, and iterate.
904	++CurIdx;
905
906	// Set Token::HasUCN flag if LHS or RHS contains any UCNs.
907	HasUCNs = LHSTok.hasUCN() \|\| RHS.hasUCN() \|\| HasUCNs;
908	LHSTok = Result;
909	} while (!IsAtEnd () && TokenStream [CurIdx].is(K: tok::hashhash));
910
911	SourceLocation EndLoc = TokenStream [CurIdx - `1`].getLocation();
912
913	// The token's current location indicate where the token was lexed from. We
914	// need this information to compute the spelling of the token, but any
915	// diagnostics for the expanded token should appear as if the token was
916	// expanded from the full ## expression. Pull this information together into
917	// a new SourceLocation that captures all of this.
918	SourceManager &SM = PP.getSourceManager();
919	if (StartLoc.isFileID())
920	StartLoc = getExpansionLocForMacroDefLoc(loc: StartLoc);
921	if (EndLoc.isFileID())
922	EndLoc = getExpansionLocForMacroDefLoc(loc: EndLoc);
923	FileID MacroFID = SM.getFileID(SpellingLoc: MacroExpansionStart);
924	while (SM.getFileID(SpellingLoc: StartLoc) != MacroFID)
925	StartLoc = SM.getImmediateExpansionRange(Loc: StartLoc).getBegin();
926	while (SM.getFileID(SpellingLoc: EndLoc) != MacroFID)
927	EndLoc = SM.getImmediateExpansionRange(Loc: EndLoc).getEnd();
928
929	LHSTok.setLocation(SM.createExpansionLoc(SpellingLoc: LHSTok.getLocation(), ExpansionLocStart: StartLoc, ExpansionLocEnd: EndLoc,
930	Length: LHSTok.getLength()));
931
932	// Now that we got the result token, it will be subject to expansion. Since
933	// token pasting re-lexes the result token in raw mode, identifier information
934	// isn't looked up. As such, if the result is an identifier, look up id info.
935	if (LHSTok.is(K: tok::raw_identifier)) {
936
937	// If there has any UNCs in concated token, we should mark this token
938	// with Token::HasUCN flag, then LookUpIdentifierInfo will expand UCNs in
939	// token.
940	if (HasUCNs)
941	LHSTok.setFlag(Token::HasUCN);
942
943	// Look up the identifier info for the token. We disabled identifier lookup
944	// by saying we're skipping contents, so we need to do this manually.
945	PP.LookUpIdentifierInfo(Identifier&: LHSTok);
946	}
947	return false;
948	}
949
950	/// isNextTokenLParen - If the next token lexed will pop this macro off the
951	/// expansion stack, return std::nullopt, otherwise return the next unexpanded
952	/// token.
953	std::optional<Token> TokenLexer::peekNextPPToken() const {
954	// Out of tokens?
955	if (isAtEnd())
956	return std::nullopt;
957	return Tokens[CurTokenIdx];
958	}
959
960	/// isParsingPreprocessorDirective - Return true if we are in the middle of a
961	/// preprocessor directive.
962	bool TokenLexer::isParsingPreprocessorDirective() const {
963	return Tokens[NumTokens-`1`].is(K: tok::eod) && !isAtEnd();
964	}
965
966	/// setLexingCXXModuleDirective - This is set to true if this TokenLexer is
967	/// created when handling C++ module directive.
968	void TokenLexer::setLexingCXXModuleDirective(bool Val) {
969	LexingCXXModuleDirective = Val;
970	}
971
972	/// isLexingCXXModuleDirective - Return true if we are lexing a C++ module or
973	/// import directive.
974	bool TokenLexer::isLexingCXXModuleDirective() const {
975	return LexingCXXModuleDirective;
976	}
977
978	/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
979	/// together to form a comment that comments out everything in the current
980	/// macro, other active macros, and anything left on the current physical
981	/// source line of the expanded buffer. Handle this by returning the
982	/// first token on the next line.
983	void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
984	PP.Diag(Loc: OpLoc, DiagID: diag::ext_comment_paste_microsoft);
985
986	// We 'comment out' the rest of this macro by just ignoring the rest of the
987	// tokens that have not been lexed yet, if any.
988
989	// Since this must be a macro, mark the macro enabled now that it is no longer
990	// being expanded.
991	assert(Macro && "Token streams can't paste comments");
992	Macro->EnableMacro();
993
994	PP.HandleMicrosoftCommentPaste(Tok);
995	}
996
997	/// If \arg loc is a file ID and points inside the current macro
998	/// definition, returns the appropriate source location pointing at the
999	/// macro expansion source location entry, otherwise it returns an invalid
1000	/// SourceLocation.
1001	SourceLocation
1002	TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
1003	assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
1004	"Not appropriate for token streams");
1005	assert(loc.isValid() && loc.isFileID());
1006
1007	SourceManager &SM = PP.getSourceManager();
1008	assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
1009	"Expected loc to come from the macro definition");
1010
1011	SourceLocation::UIntTy relativeOffset = `0`;
1012	SM.isInSLocAddrSpace(Loc: loc, Start: MacroDefStart, Length: MacroDefLength, RelativeOffset: &relativeOffset);
1013	return MacroExpansionStart.getLocWithOffset(Offset: relativeOffset);
1014	}
1015
1016	/// Finds the tokens that are consecutive (from the same FileID)
1017	/// creates a single SLocEntry, and assigns SourceLocations to each token that
1018	/// point to that SLocEntry. e.g for
1019	/// assert(foo == bar);
1020	/// There will be a single SLocEntry for the "foo == bar" chunk and locations
1021	/// for the 'foo', '==', 'bar' tokens will point inside that chunk.
1022	///
1023	/// \arg begin_tokens will be updated to a position past all the found
1024	/// consecutive tokens.
1025	static void updateConsecutiveMacroArgTokens(SourceManager &SM,
1026	SourceLocation ExpandLoc,
1027	Token *&begin_tokens,
1028	Token * end_tokens) {
1029	assert(begin_tokens + `1` < end_tokens);
1030	SourceLocation BeginLoc = begin_tokens->getLocation();
1031	llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
1032	llvm::MutableArrayRef<Token> Partition;
1033
1034	auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
1035	// The maximum distance between two consecutive tokens in a partition.
1036	// This is an important trick to avoid using too much SourceLocation address
1037	// space!
1038	static constexpr SourceLocation::IntTy MaxDistance = `50`;
1039	auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
1040	Last = Loc;
1041	return Distance <= MaxDistance;
1042	};
1043
1044	// Partition the tokens by their FileID.
1045	// This is a hot function, and calling getFileID can be expensive, the
1046	// implementation is optimized by reducing the number of getFileID.
1047	if (BeginLoc.isFileID()) {
1048	// Consecutive tokens not written in macros must be from the same file.
1049	// (Neither #include nor eof can occur inside a macro argument.)
1050	Partition = All.take_while(Pred: [&](const Token &T) {
1051	return T.getLocation().isFileID() && NearLast (T.getLocation());
1052	});
1053	} else {
1054	// Call getFileID once to calculate the bounds, and use the cheaper
1055	// sourcelocation-against-bounds comparison.
1056	FileID BeginFID = SM.getFileID(SpellingLoc: BeginLoc);
1057	SourceLocation Limit =
1058	SM.getComposedLoc(FID: BeginFID, Offset: SM.getFileIDSize(FID: BeginFID));
1059	Partition = All.take_while(Pred: [&](const Token &T) {
1060	// NOTE: the Limit is included! The lexer recovery only ever inserts a
1061	// single token past the end of the FileID, specifically the ) when a
1062	// macro-arg containing a comma should be guarded by parentheses.
1063	//
1064	// It is safe to include the Limit here because SourceManager allocates
1065	// FileSize + 1 for each SLocEntry.
1066	//
1067	// See https://github.com/llvm/llvm-project/issues/60722.
1068	return T.getLocation() >= BeginLoc && T.getLocation() <= Limit
1069	&& NearLast (T.getLocation());
1070	});
1071	}
1072	assert(!Partition.empty());
1073
1074	// For the consecutive tokens, find the length of the SLocEntry to contain
1075	// all of them.
1076	SourceLocation::UIntTy FullLength =
1077	Partition.back().getEndLoc().getRawEncoding() -
1078	Partition.front().getLocation().getRawEncoding();
1079	// Create a macro expansion SLocEntry that will "contain" all of the tokens.
1080	SourceLocation Expansion =
1081	SM.createMacroArgExpansionLoc(SpellingLoc: BeginLoc, ExpansionLoc: ExpandLoc, Length: FullLength);
1082
1083	#ifdef EXPENSIVE_CHECKS
1084	assert(llvm::all_of(Partition.drop_front(),
1085	[&SM, ID = SM.getFileID(Partition.front().getLocation())](
1086	const Token &T) {
1087	return ID == SM.getFileID(T.getLocation());
1088	}) &&
1089	"Must have the same FIleID!");
1090	#endif
1091	// Change the location of the tokens from the spelling location to the new
1092	// expanded location.
1093	for (Token& T : Partition) {
1094	SourceLocation::IntTy RelativeOffset =
1095	T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
1096	T.setLocation(Expansion.getLocWithOffset(Offset: RelativeOffset));
1097	}
1098	begin_tokens = &Partition.back() + `1`;
1099	}
1100
1101	/// Creates SLocEntries and updates the locations of macro argument
1102	/// tokens to their new expanded locations.
1103	///
1104	/// \param ArgIdSpellLoc the location of the macro argument id inside the macro
1105	/// definition.
1106	void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
1107	Token *begin_tokens,
1108	Token *end_tokens) {
1109	SourceManager &SM = PP.getSourceManager();
1110
1111	SourceLocation ExpandLoc =
1112	getExpansionLocForMacroDefLoc(loc: ArgIdSpellLoc);
1113
1114	while (begin_tokens < end_tokens) {
1115	// If there's only one token just create a SLocEntry for it.
1116	if (end_tokens - begin_tokens == `1`) {
1117	Token &Tok = *begin_tokens;
1118	Tok.setLocation(SM.createMacroArgExpansionLoc(SpellingLoc: Tok.getLocation(),
1119	ExpansionLoc: ExpandLoc,
1120	Length: Tok.getLength()));
1121	return;
1122	}
1123
1124	updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
1125	}
1126	}
1127
1128	void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
1129	AtStartOfLine = Result.isAtStartOfLine();
1130	HasLeadingSpace = Result.hasLeadingSpace();
1131	}
1132

Browse the source code of llvm_projects/clang/lib/Lex/TokenLexer.cpp