1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a utility class for instrumentation passes (like AddressSanitizer
10// or ThreadSanitizer) to avoid instrumenting some functions or global
11// variables, or to instrument some functions or global variables in a specific
12// way, based on a user-supplied list.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Support/SpecialCaseList.h"
17#include "llvm/ADT/RadixTree.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/ADT/iterator_range.h"
23#include "llvm/Support/Compiler.h"
24#include "llvm/Support/GlobPattern.h"
25#include "llvm/Support/LineIterator.h"
26#include "llvm/Support/MemoryBuffer.h"
27#include "llvm/Support/Regex.h"
28#include "llvm/Support/VirtualFileSystem.h"
29#include "llvm/Support/raw_ostream.h"
30#include <memory>
31#include <stdio.h>
32#include <string>
33#include <system_error>
34#include <utility>
35#include <variant>
36#include <vector>
37
38namespace llvm {
39
40namespace {
41
42// Lagacy v1 matcher.
43class RegexMatcher {
44public:
45 Error insert(StringRef Pattern, unsigned LineNumber);
46 unsigned match(StringRef Query) const;
47
48private:
49 struct Reg {
50 Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
51 : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
52 StringRef Name;
53 unsigned LineNo;
54 Regex Rg;
55 };
56
57 std::vector<Reg> RegExes;
58};
59
60class GlobMatcher {
61public:
62 Error insert(StringRef Pattern, unsigned LineNumber);
63 unsigned match(StringRef Query) const;
64
65private:
66 struct Glob {
67 Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern)
68 : Name(Name), LineNo(LineNo), Pattern(std::move(Pattern)) {}
69 StringRef Name;
70 unsigned LineNo;
71 GlobPattern Pattern;
72 };
73
74 void LazyInit() const;
75
76 std::vector<GlobMatcher::Glob> Globs;
77
78 mutable RadixTree<iterator_range<StringRef::const_iterator>,
79 RadixTree<iterator_range<StringRef::const_reverse_iterator>,
80 SmallVector<int, 1>>>
81 PrefixSuffixToGlob;
82
83 mutable RadixTree<iterator_range<StringRef::const_iterator>,
84 SmallVector<int, 1>>
85 SubstrToGlob;
86
87 mutable bool Initialized = false;
88};
89
90/// Represents a set of patterns and their line numbers
91class Matcher {
92public:
93 Matcher(bool UseGlobs, bool RemoveDotSlash);
94
95 Error insert(StringRef Pattern, unsigned LineNumber);
96 unsigned match(StringRef Query) const;
97
98 bool matchAny(StringRef Query) const { return match(Query); }
99
100 std::variant<RegexMatcher, GlobMatcher> M;
101 bool RemoveDotSlash;
102};
103
104Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) {
105 if (Pattern.empty())
106 return createStringError(EC: errc::invalid_argument,
107 S: "Supplied regex was blank");
108
109 // Replace * with .*
110 auto Regexp = Pattern.str();
111 for (size_t pos = 0; (pos = Regexp.find(c: '*', pos: pos)) != std::string::npos;
112 pos += strlen(s: ".*")) {
113 Regexp.replace(pos: pos, n1: strlen(s: "*"), s: ".*");
114 }
115
116 Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
117
118 // Check that the regexp is valid.
119 Regex CheckRE(Regexp);
120 std::string REError;
121 if (!CheckRE.isValid(Error&: REError))
122 return createStringError(EC: errc::invalid_argument, S: REError);
123
124 RegExes.emplace_back(args&: Pattern, args&: LineNumber, args: std::move(CheckRE));
125 return Error::success();
126}
127
128unsigned RegexMatcher::match(StringRef Query) const {
129 for (const auto &R : reverse(C: RegExes))
130 if (R.Rg.match(String: Query))
131 return R.LineNo;
132 return 0;
133}
134
135Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) {
136 if (Pattern.empty())
137 return createStringError(EC: errc::invalid_argument, S: "Supplied glob was blank");
138
139 auto Res = GlobPattern::create(Pat: Pattern, /*MaxSubPatterns=*/1024);
140 if (auto Err = Res.takeError())
141 return Err;
142 Globs.emplace_back(args&: Pattern, args&: LineNumber, args: std::move(Res.get()));
143 return Error::success();
144}
145
146void GlobMatcher::LazyInit() const {
147 if (LLVM_LIKELY(Initialized))
148 return;
149 Initialized = true;
150 for (const auto &[Idx, G] : enumerate(First: Globs)) {
151 StringRef Prefix = G.Pattern.prefix();
152 StringRef Suffix = G.Pattern.suffix();
153
154 if (Suffix.empty() && Prefix.empty()) {
155 // If both prefix and suffix are empty put into special tree to search by
156 // substring in a middle.
157 StringRef Substr = G.Pattern.longest_substr();
158 if (!Substr.empty()) {
159 // But only if substring is not empty. Searching this tree is more
160 // expensive.
161 auto &V = SubstrToGlob.emplace(Key: Substr).first->second;
162 V.emplace_back(Args&: Idx);
163 continue;
164 }
165 }
166
167 auto &SToGlob = PrefixSuffixToGlob.emplace(Key: Prefix).first->second;
168 auto &V = SToGlob.emplace(Key: reverse(C&: Suffix)).first->second;
169 V.emplace_back(Args&: Idx);
170 }
171}
172
173unsigned GlobMatcher::match(StringRef Query) const {
174 LazyInit();
175
176 int Best = -1;
177 if (!PrefixSuffixToGlob.empty()) {
178 for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Key: Query)) {
179 for (const auto &[_, V] : SToGlob.find_prefixes(Key: reverse(C&: Query))) {
180 for (int Idx : reverse(C: V)) {
181 if (Best > Idx)
182 break;
183 const GlobMatcher::Glob &G = Globs[Idx];
184 if (G.Pattern.match(S: Query)) {
185 Best = Idx;
186 // As soon as we find a match in the vector, we can break for this
187 // vector, since the globs are already sorted by priority within the
188 // prefix group. However, we continue searching other prefix groups
189 // in the map, as they may contain a better match overall.
190 break;
191 }
192 }
193 }
194 }
195 }
196
197 if (!SubstrToGlob.empty()) {
198 // As we don't know when substring exactly starts, we will try all
199 // possibilities. In most cases search will fail on first characters.
200 for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
201 for (const auto &[_, V] : SubstrToGlob.find_prefixes(Key: Q)) {
202 for (int Idx : reverse(C: V)) {
203 if (Best > Idx)
204 break;
205 const GlobMatcher::Glob &G = Globs[Idx];
206 if (G.Pattern.match(S: Query)) {
207 Best = Idx;
208 // As soon as we find a match in the vector, we can break for this
209 // vector, since the globs are already sorted by priority within the
210 // prefix group. However, we continue searching other prefix groups
211 // in the map, as they may contain a better match overall.
212 break;
213 }
214 }
215 }
216 }
217 }
218 return Best < 0 ? 0 : Globs[Best].LineNo;
219}
220
221Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
222 : RemoveDotSlash(RemoveDotSlash) {
223 if (UseGlobs)
224 M.emplace<GlobMatcher>();
225 else
226 M.emplace<RegexMatcher>();
227}
228
229Error Matcher::insert(StringRef Pattern, unsigned LineNumber) {
230 return std::visit(visitor: [&](auto &V) { return V.insert(Pattern, LineNumber); }, variants&: M);
231}
232
233unsigned Matcher::match(StringRef Query) const {
234 if (RemoveDotSlash)
235 Query = llvm::sys::path::remove_leading_dotslash(path: Query);
236 return std::visit(visitor: [&](auto &V) -> unsigned { return V.match(Query); }, variants: M);
237}
238} // namespace
239
240class SpecialCaseList::Section::SectionImpl {
241public:
242 const Matcher *findMatcher(StringRef Prefix, StringRef Category) const;
243
244 using SectionEntries = StringMap<StringMap<Matcher>>;
245
246 explicit SectionImpl(bool UseGlobs)
247 : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false) {}
248
249 Matcher SectionMatcher;
250 SectionEntries Entries;
251};
252
253// TODO: Refactor this to return Expected<...>
254std::unique_ptr<SpecialCaseList>
255SpecialCaseList::create(const std::vector<std::string> &Paths,
256 llvm::vfs::FileSystem &FS, std::string &Error) {
257 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
258 if (SCL->createInternal(Paths, VFS&: FS, Error))
259 return SCL;
260 return nullptr;
261}
262
263std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
264 std::string &Error) {
265 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
266 if (SCL->createInternal(MB, Error))
267 return SCL;
268 return nullptr;
269}
270
271std::unique_ptr<SpecialCaseList>
272SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
273 llvm::vfs::FileSystem &FS) {
274 std::string Error;
275 if (auto SCL = create(Paths, FS, Error))
276 return SCL;
277 report_fatal_error(reason: Twine(Error));
278}
279
280bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
281 vfs::FileSystem &VFS, std::string &Error) {
282 for (size_t i = 0; i < Paths.size(); ++i) {
283 const auto &Path = Paths[i];
284 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
285 VFS.getBufferForFile(Name: Path);
286 if (std::error_code EC = FileOrErr.getError()) {
287 Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
288 return false;
289 }
290 std::string ParseError;
291 if (!parse(FileIdx: i, MB: FileOrErr.get().get(), Error&: ParseError)) {
292 Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
293 return false;
294 }
295 }
296 return true;
297}
298
299bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
300 std::string &Error) {
301 if (!parse(FileIdx: 0, MB, Error))
302 return false;
303 return true;
304}
305
306Expected<SpecialCaseList::Section *>
307SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
308 unsigned LineNo, bool UseGlobs) {
309 SectionStr = SectionStr.copy(A&: StrAlloc);
310 Sections.emplace_back(args&: SectionStr, args&: FileNo, args&: UseGlobs);
311 auto &Section = Sections.back();
312
313 if (auto Err = Section.Impl->SectionMatcher.insert(Pattern: SectionStr, LineNumber: LineNo)) {
314 return createStringError(EC: errc::invalid_argument,
315 S: "malformed section at line " + Twine(LineNo) +
316 ": '" + SectionStr +
317 "': " + toString(E: std::move(Err)));
318 }
319
320 return &Section;
321}
322
323bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
324 std::string &Error) {
325 unsigned long long Version = 2;
326
327 StringRef Header = MB->getBuffer();
328 if (Header.consume_front(Prefix: "#!special-case-list-v"))
329 consumeUnsignedInteger(Str&: Header, Radix: 10, Result&: Version);
330
331 // In https://reviews.llvm.org/D154014 we added glob support and planned
332 // to remove regex support in patterns. We temporarily support the
333 // original behavior using regexes if "#!special-case-list-v1" is the
334 // first line of the file. For more details, see
335 // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
336 bool UseGlobs = Version > 1;
337
338 bool RemoveDotSlash = Version > 2;
339
340 auto ErrOrSection = addSection(SectionStr: "*", FileNo: FileIdx, LineNo: 1, UseGlobs: true);
341 if (auto Err = ErrOrSection.takeError()) {
342 Error = toString(E: std::move(Err));
343 return false;
344 }
345 Section::SectionImpl *CurrentImpl = ErrOrSection.get()->Impl.get();
346
347 // This is the current list of prefixes for all existing users matching file
348 // path. We may need parametrization in constructor in future.
349 constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"};
350
351 for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
352 !LineIt.is_at_eof(); LineIt++) {
353 unsigned LineNo = LineIt.line_number();
354 StringRef Line = LineIt->trim();
355 if (Line.empty())
356 continue;
357
358 // Save section names
359 if (Line.starts_with(Prefix: "[")) {
360 if (!Line.ends_with(Suffix: "]")) {
361 Error =
362 ("malformed section header on line " + Twine(LineNo) + ": " + Line)
363 .str();
364 return false;
365 }
366
367 auto ErrOrSection =
368 addSection(SectionStr: Line.drop_front().drop_back(), FileNo: FileIdx, LineNo, UseGlobs);
369 if (auto Err = ErrOrSection.takeError()) {
370 Error = toString(E: std::move(Err));
371 return false;
372 }
373 CurrentImpl = ErrOrSection.get()->Impl.get();
374 continue;
375 }
376
377 // Get our prefix and unparsed glob.
378 auto [Prefix, Postfix] = Line.split(Separator: ":");
379 if (Postfix.empty()) {
380 // Missing ':' in the line.
381 Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
382 return false;
383 }
384
385 auto [Pattern, Category] = Postfix.split(Separator: "=");
386 auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
387 Key: Category, Args&: UseGlobs,
388 Args: RemoveDotSlash && llvm::is_contained(Range: PathPrefixes, Element: Prefix));
389 Pattern = Pattern.copy(A&: StrAlloc);
390 if (auto Err = It->second.insert(Pattern, LineNumber: LineNo)) {
391 Error =
392 (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
393 Twine(LineNo) + ": '" + Pattern + "': " + toString(E: std::move(Err)))
394 .str();
395 return false;
396 }
397 }
398
399 return true;
400}
401
402SpecialCaseList::~SpecialCaseList() = default;
403
404bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
405 StringRef Query, StringRef Category) const {
406 auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
407 return LineNo;
408}
409
410std::pair<unsigned, unsigned>
411SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
412 StringRef Query, StringRef Category) const {
413 for (const auto &S : reverse(C: Sections)) {
414 if (S.Impl->SectionMatcher.matchAny(Query: Section)) {
415 unsigned Blame = S.getLastMatch(Prefix, Query, Category);
416 if (Blame)
417 return {S.FileIdx, Blame};
418 }
419 }
420 return NotFound;
421}
422
423SpecialCaseList::Section::Section(StringRef Str, unsigned FileIdx,
424 bool UseGlobs)
425 : Name(Str), FileIdx(FileIdx),
426 Impl(std::make_unique<SectionImpl>(args&: UseGlobs)) {}
427
428SpecialCaseList::Section::Section(Section &&) = default;
429
430SpecialCaseList::Section::~Section() = default;
431
432bool SpecialCaseList::Section::matchName(StringRef Name) const {
433 return Impl->SectionMatcher.matchAny(Query: Name);
434}
435
436const Matcher *
437SpecialCaseList::Section::SectionImpl::findMatcher(StringRef Prefix,
438 StringRef Category) const {
439 SectionEntries::const_iterator I = Entries.find(Key: Prefix);
440 if (I == Entries.end())
441 return nullptr;
442 StringMap<Matcher>::const_iterator II = I->second.find(Key: Category);
443 if (II == I->second.end())
444 return nullptr;
445
446 return &II->second;
447}
448
449unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix,
450 StringRef Query,
451 StringRef Category) const {
452 if (const Matcher *M = Impl->findMatcher(Prefix, Category))
453 return M->match(Query);
454 return 0;
455}
456
457bool SpecialCaseList::Section::hasPrefix(StringRef Prefix) const {
458 return Impl->Entries.contains(Key: Prefix);
459}
460
461} // namespace llvm
462