HeaderIncludes.cpp source code [llvm_projects/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp]

1	//===--- HeaderIncludes.cpp - Insert/Delete #includes --- C++ -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/Tooling/Inclusions/HeaderIncludes.h"
10	#include "clang/Basic/SourceManager.h"
11	#include "clang/Lex/Lexer.h"
12	#include "llvm/Support/FormatVariadic.h"
13	#include "llvm/Support/Path.h"
14	#include <optional>
15
16	namespace clang {
17	namespace tooling {
18	namespace {
19
20	LangOptions createLangOpts() {
21	LangOptions LangOpts;
22	LangOpts.CPlusPlus = `1`;
23	LangOpts.CPlusPlus11 = `1`;
24	LangOpts.CPlusPlus14 = `1`;
25	LangOpts.LineComment = `1`;
26	LangOpts.CXXOperatorNames = `1`;
27	LangOpts.Bool = `1`;
28	LangOpts.ObjC = `1`;
29	LangOpts.MicrosoftExt = `1`; // To get kw___try, kw___finally.
30	LangOpts.DeclSpecKeyword = `1`; // To get __declspec.
31	LangOpts.WChar = `1`; // To get wchar_t
32	return LangOpts;
33	}
34
35	// Returns the offset after skipping a sequence of tokens, matched by \p
36	// GetOffsetAfterSequence, from the start of the code.
37	// \p GetOffsetAfterSequence should be a function that matches a sequence of
38	// tokens and returns an offset after the sequence.
39	unsigned getOffsetAfterTokenSequence(
40	StringRef FileName, StringRef Code, const IncludeStyle &Style,
41	llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
42	GetOffsetAfterSequence) {
43	SourceManagerForFile VirtualSM(FileName, Code);
44	SourceManager &SM = VirtualSM.get();
45	LangOptions LangOpts = createLangOpts();
46	Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(FID: SM.getMainFileID()), SM,
47	LangOpts);
48	Token Tok;
49	// Get the first token.
50	Lex.LexFromRawLexer(Result&: Tok);
51	return GetOffsetAfterSequence (SM, Lex, Tok);
52	}
53
54	// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
55	// \p Tok will be the token after this directive; otherwise, it can be any token
56	// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
57	// (second) raw_identifier name is checked.
58	bool checkAndConsumeDirectiveWithName(
59	Lexer &Lex, StringRef Name, Token &Tok,
60	std::optional<StringRef> RawIDName = std::nullopt) {
61	bool Matched = Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) &&
62	Tok.is(K: tok::raw_identifier) &&
63	Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Result&: Tok) &&
64	Tok.is(K: tok::raw_identifier) &&
65	(!RawIDName \|\| Tok.getRawIdentifier() == *RawIDName);
66	if (Matched)
67	Lex.LexFromRawLexer(Result&: Tok);
68	return Matched;
69	}
70
71	void skipComments(Lexer &Lex, Token &Tok) {
72	while (Tok.is(K: tok::comment))
73	if (Lex.LexFromRawLexer(Result&: Tok))
74	return;
75	}
76
77	// Returns the offset after header guard directives and any comments
78	// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
79	// header guard is present in the code, this will return the offset after
80	// skipping all comments from the start of the code.
81	unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
82	StringRef Code,
83	const IncludeStyle &Style) {
84	// \p Consume returns location after header guard or 0 if no header guard is
85	// found.
86	auto ConsumeHeaderGuardAndComment =
87	[&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
88	Token Tok)>
89	Consume) {
90	return getOffsetAfterTokenSequence(
91	FileName, Code, Style,
92	GetOffsetAfterSequence: [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
93	skipComments(Lex, Tok);
94	unsigned InitialOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
95	return std::max(a: InitialOffset, b: Consume (SM, Lex, Tok));
96	});
97	};
98	return std::max(
99	// #ifndef/#define
100	a: ConsumeHeaderGuardAndComment (
101	[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
102	if (checkAndConsumeDirectiveWithName(Lex, Name: "ifndef", Tok)) {
103	skipComments(Lex, Tok);
104	if (checkAndConsumeDirectiveWithName(Lex, Name: "define", Tok) &&
105	Tok.isAtStartOfLine())
106	return SM.getFileOffset(SpellingLoc: Tok.getLocation());
107	}
108	return `0`;
109	}),
110	// #pragma once
111	b: ConsumeHeaderGuardAndComment (
112	[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
113	if (checkAndConsumeDirectiveWithName(Lex, Name: "pragma", Tok,
114	RawIDName: StringRef("once")))
115	return SM.getFileOffset(SpellingLoc: Tok.getLocation());
116	return `0`;
117	}));
118	}
119
120	// Check if a sequence of tokens is like
121	// "#include ("header.h" \| <header.h>)".
122	// If it is, \p Tok will be the token after this directive; otherwise, it can be
123	// any token after the given \p Tok (including \p Tok).
124	bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
125	auto Matched = [&]() {
126	Lex.LexFromRawLexer(Result&: Tok);
127	return true;
128	};
129	if (Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) &&
130	Tok.is(K: tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
131	if (Lex.LexFromRawLexer(Result&: Tok))
132	return false;
133	if (Tok.is(K: tok::string_literal))
134	return Matched ();
135	if (Tok.is(K: tok::less)) {
136	while (!Lex.LexFromRawLexer(Result&: Tok) && Tok.isNot(K: tok::greater)) {
137	}
138	if (Tok.is(K: tok::greater))
139	return Matched ();
140	}
141	}
142	return false;
143	}
144
145	// Returns the offset of the last #include directive after which a new
146	// #include can be inserted. This ignores #include's after the #include block(s)
147	// in the beginning of a file to avoid inserting headers into code sections
148	// where new #include's should not be added by default.
149	// These code sections include:
150	// - raw string literals (containing #include).
151	// - #if blocks.
152	// - Special #include's among declarations (e.g. functions).
153	//
154	// If no #include after which a new #include can be inserted, this returns the
155	// offset after skipping all comments from the start of the code.
156	// Inserting after an #include is not allowed if it comes after code that is not
157	// #include (e.g. pre-processing directive that is not #include, declarations).
158	unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
159	const IncludeStyle &Style) {
160	return getOffsetAfterTokenSequence(
161	FileName, Code, Style,
162	GetOffsetAfterSequence: [](const SourceManager &SM, Lexer &Lex, Token Tok) {
163	skipComments(Lex, Tok);
164	unsigned MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
165	while (checkAndConsumeInclusiveDirective(Lex, Tok))
166	MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
167	return MaxOffset;
168	});
169	}
170
171	inline StringRef trimInclude(StringRef IncludeName) {
172	return IncludeName.trim(Chars: "\"<>");
173	}
174
175	const char IncludeRegexPattern[] =
176	R"(^[\t\ ]#[\t\ ](import\|include)[^"<](["<][^">][">]))";
177
178	// The filename of Path excluding extension.
179	// Used to match implementation with headers, this differs from sys::path::stem:
180	// - in names with multiple dots (foo.cu.cc) it terminates at the first
181	// - an empty stem is never returned: /foo/.bar.x => .bar
182	// - we don't bother to handle . and .. specially
183	StringRef matchingStem(llvm::StringRef Path) {
184	StringRef Name = llvm::sys::path::filename(path: Path);
185	return Name.substr(Start: `0`, N: Name.find(C: `'.'`, From: `1`));
186	}
187
188	} // anonymous namespace
189
190	IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
191	StringRef FileName)
192	: Style (Style), FileName (FileName) {
193	for (const auto &Category : Style.IncludeCategories) {
194	CategoryRegexs.emplace_back(Args: Category.Regex, Args: Category.RegexIsCaseSensitive
195	? llvm::Regex::NoFlags
196	: llvm::Regex::IgnoreCase);
197	}
198	IsMainFile = FileName.ends_with(Suffix: ".c") \|\| FileName.ends_with(Suffix: ".cc") \|\|
199	FileName.ends_with(Suffix: ".cpp") \|\| FileName.ends_with(Suffix: ".c++") \|\|
200	FileName.ends_with(Suffix: ".cxx") \|\| FileName.ends_with(Suffix: ".m") \|\|
201	FileName.ends_with(Suffix: ".mm");
202	if (!Style.IncludeIsMainSourceRegex.empty()) {
203	llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
204	IsMainFile \|= MainFileRegex.match(String: FileName);
205	}
206	}
207
208	int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
209	bool CheckMainHeader) const {
210	int Ret = INT_MAX;
211	for (unsigned i = `0`, e = CategoryRegexs.size(); i != e; ++i)
212	if (CategoryRegexs [i].match(String: IncludeName)) {
213	Ret = Style.IncludeCategories [i].Priority;
214	break;
215	}
216	if (CheckMainHeader && IsMainFile && Ret > `0` && isMainHeader(IncludeName))
217	Ret = `0`;
218	return Ret;
219	}
220
221	int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
222	bool CheckMainHeader) const {
223	int Ret = INT_MAX;
224	for (unsigned i = `0`, e = CategoryRegexs.size(); i != e; ++i)
225	if (CategoryRegexs [i].match(String: IncludeName)) {
226	Ret = Style.IncludeCategories [i].SortPriority;
227	if (Ret == `0`)
228	Ret = Style.IncludeCategories [i].Priority;
229	break;
230	}
231	if (CheckMainHeader && IsMainFile && Ret > `0` && isMainHeader(IncludeName))
232	Ret = `0`;
233	return Ret;
234	}
235	bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
236	switch (Style.MainIncludeChar) {
237	case IncludeStyle::MICD_Quote:
238	if (!IncludeName.starts_with(Prefix: "\""))
239	return false;
240	break;
241	case IncludeStyle::MICD_AngleBracket:
242	if (!IncludeName.starts_with(Prefix: "<"))
243	return false;
244	break;
245	case IncludeStyle::MICD_Any:
246	break;
247	}
248
249	IncludeName =
250	IncludeName.drop_front(N: `1`).drop_back(N: `1`); // remove the surrounding "" or <>
251	// Not matchingStem: implementation files may have compound extensions but
252	// headers may not.
253	StringRef HeaderStem = llvm::sys::path::stem(path: IncludeName);
254	StringRef FileStem = llvm::sys::path::stem(path: FileName); // foo.cu for foo.cu.cc
255	StringRef MatchingFileStem = matchingStem(Path: FileName); // foo for foo.cu.cc
256	// main-header examples:
257	// 1) foo.h => foo.cc
258	// 2) foo.h => foo.cu.cc
259	// 3) foo.proto.h => foo.proto.cc
260	//
261	// non-main-header examples:
262	// 1) foo.h => bar.cc
263	// 2) foo.proto.h => foo.cc
264	StringRef Matching;
265	if (MatchingFileStem.starts_with_insensitive(Prefix: HeaderStem))
266	Matching = MatchingFileStem; // example 1), 2)
267	else if (FileStem.equals_insensitive(RHS: HeaderStem))
268	Matching = FileStem; // example 3)
269	if (!Matching.empty()) {
270	llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
271	llvm::Regex::IgnoreCase);
272	if (MainIncludeRegex.match(String: Matching))
273	return true;
274	}
275	return false;
276	}
277
278	const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
279
280	HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
281	const IncludeStyle &Style)
282	: FileName (FileName), Code (Code), FirstIncludeOffset(-`1`),
283	MinInsertOffset(
284	getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
285	MaxInsertOffset(MinInsertOffset +
286	getMaxHeaderInsertionOffset(
287	FileName, Code: Code.drop_front(N: MinInsertOffset), Style)),
288	MainIncludeFound(false),
289	Categories (Style, FileName) {
290	// Add 0 for main header and INT_MAX for headers that are not in any
291	// category.
292	Priorities = {`0`, INT_MAX};
293	for (const auto &Category : Style.IncludeCategories)
294	Priorities.insert(x: Category.Priority);
295	SmallVector<StringRef, `32`> Lines;
296	Code.drop_front(N: MinInsertOffset).split(A&: Lines, Separator: "\n");
297
298	unsigned Offset = MinInsertOffset;
299	unsigned NextLineOffset;
300	SmallVector<StringRef, `4`> Matches;
301	for (auto Line : Lines) {
302	NextLineOffset = std::min(a: Code.size(), b: Offset + Line.size() + `1`);
303	if (IncludeRegex.match(String: Line, Matches: &Matches)) {
304	// If this is the last line without trailing newline, we need to make
305	// sure we don't delete across the file boundary.
306	addExistingInclude(
307	IncludeToAdd: Include (Matches [`2`],
308	tooling::Range (
309	Offset, std::min(a: Line.size() + `1`, b: Code.size() - Offset)),
310	Matches [`1`] == "import" ? tooling::IncludeDirective::Import
311	: tooling::IncludeDirective::Include),
312	NextLineOffset);
313	}
314	Offset = NextLineOffset;
315	}
316
317	// Populate CategoryEndOfssets:
318	// - Ensure that CategoryEndOffset[Highest] is always populated.
319	// - If CategoryEndOffset[Priority] isn't set, use the next higher value
320	// that is set, up to CategoryEndOffset[Highest].
321	auto Highest = Priorities.begin();
322	auto [It, Inserted] = CategoryEndOffsets.try_emplace(k: *Highest);
323	if (Inserted)
324	It ->second = FirstIncludeOffset >= `0` ? FirstIncludeOffset : MinInsertOffset;
325	// By this point, CategoryEndOffset[Highest] is always set appropriately:
326	// - to an appropriate location before/after existing #includes, or
327	// - to right after the header guard, or
328	// - to the beginning of the file.
329	for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
330	if (CategoryEndOffsets.find(x: *I) == CategoryEndOffsets.end())
331	CategoryEndOffsets [I] = CategoryEndOffsets [std::prev(x: I)];
332	}
333
334	// \p Offset: the start of the line following this include directive.
335	void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
336	unsigned NextLineOffset) {
337	auto &Incs = ExistingIncludes [trimInclude(IncludeName: IncludeToAdd.Name)];
338	Incs.push_back(x: std::move(IncludeToAdd));
339	auto &CurInclude = Incs.back();
340	// The header name with quotes or angle brackets.
341	// Only record the offset of current #include if we can insert after it.
342	if (CurInclude.R.getOffset() <= MaxInsertOffset) {
343	int Priority = Categories.getIncludePriority(
344	IncludeName: CurInclude.Name, /CheckMainHeader=/!MainIncludeFound);
345	if (Priority == `0`)
346	MainIncludeFound = true;
347	CategoryEndOffsets [Priority] = NextLineOffset;
348	IncludesByPriority [Priority].push_back(Elt: &CurInclude);
349	if (FirstIncludeOffset < `0`)
350	FirstIncludeOffset = CurInclude.R.getOffset();
351	}
352	}
353
354	std::optional<tooling::Replacement>
355	HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
356	IncludeDirective Directive) const {
357	assert(IncludeName == trimInclude(IncludeName));
358	// If a <header> ("header") already exists in code, "header" (<header>) with
359	// different quotation and/or directive will still be inserted.
360	// FIXME: figure out if this is the best behavior.
361	auto It = ExistingIncludes.find(Key: IncludeName);
362	if (It != ExistingIncludes.end()) {
363	for (const auto &Inc : It ->second)
364	if (Inc.Directive == Directive &&
365	((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")) \|\|
366	(!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\""))))
367	return std::nullopt;
368	}
369	std::string Quoted =
370	std::string(llvm::formatv(Fmt: IsAngled ? "<{0}>" : "\"{0}\"", Vals&: IncludeName));
371	StringRef QuotedName = Quoted;
372	int Priority = Categories.getIncludePriority(
373	IncludeName: QuotedName, /CheckMainHeader=/!MainIncludeFound);
374	auto CatOffset = CategoryEndOffsets.find(x: Priority);
375	assert(CatOffset != CategoryEndOffsets.end());
376	unsigned InsertOffset = CatOffset ->second; // Fall back offset
377	auto Iter = IncludesByPriority.find(x: Priority);
378	if (Iter != IncludesByPriority.end()) {
379	for (const auto *Inc : Iter ->second) {
380	if (QuotedName < Inc->Name) {
381	InsertOffset = Inc->R.getOffset();
382	break;
383	}
384	}
385	}
386	assert(InsertOffset <= Code.size());
387	llvm::StringRef DirectiveSpelling =
388	Directive == IncludeDirective::Include ? "include" : "import";
389	std::string NewInclude =
390	llvm::formatv(Fmt: "#{0} {1}\n", Vals&: DirectiveSpelling, Vals&: QuotedName);
391	// When inserting headers at end of the code, also append '\n' to the code
392	// if it does not end with '\n'.
393	// FIXME: when inserting multiple #includes at the end of code, only one
394	// newline should be added.
395	if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != `'\n'`))
396	NewInclude = "\n" + NewInclude;
397	return tooling::Replacement (FileName, InsertOffset, `0`, NewInclude);
398	}
399
400	tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
401	bool IsAngled) const {
402	assert(IncludeName == trimInclude(IncludeName));
403	tooling::Replacements Result;
404	auto Iter = ExistingIncludes.find(Key: IncludeName);
405	if (Iter == ExistingIncludes.end())
406	return Result;
407	for (const auto &Inc : Iter ->second) {
408	if ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"")) \|\|
409	(!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")))
410	continue;
411	llvm::Error Err = Result.add(R: tooling::Replacement (
412	FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
413	if (Err) {
414	auto ErrMsg = "Unexpected conflicts in #include deletions: " +
415	llvm::toString(E: std::move(Err));
416	llvm_unreachable(ErrMsg.c_str());
417	}
418	}
419	return Result;
420	}
421
422	} // namespace tooling
423	} // namespace clang
424

Browse the source code of llvm_projects/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp