1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a utility class for instrumentation passes (like AddressSanitizer
10// or ThreadSanitizer) to avoid instrumenting some functions or global
11// variables, or to instrument some functions or global variables in a specific
12// way, based on a user-supplied list.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Support/SpecialCaseList.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/Support/LineIterator.h"
19#include "llvm/Support/MemoryBuffer.h"
20#include "llvm/Support/VirtualFileSystem.h"
21#include <stdio.h>
22#include <string>
23#include <system_error>
24#include <utility>
25
26namespace llvm {
27
28Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
29 bool UseGlobs) {
30 if (Pattern.empty())
31 return createStringError(EC: errc::invalid_argument,
32 S: Twine("Supplied ") +
33 (UseGlobs ? "glob" : "regex") + " was blank");
34
35 if (!UseGlobs) {
36 // Replace * with .*
37 auto Regexp = Pattern.str();
38 for (size_t pos = 0; (pos = Regexp.find(c: '*', pos: pos)) != std::string::npos;
39 pos += strlen(s: ".*")) {
40 Regexp.replace(pos: pos, n1: strlen(s: "*"), s: ".*");
41 }
42
43 Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
44
45 // Check that the regexp is valid.
46 Regex CheckRE(Regexp);
47 std::string REError;
48 if (!CheckRE.isValid(Error&: REError))
49 return createStringError(EC: errc::invalid_argument, S: REError);
50
51 RegExes.emplace_back(args: std::make_pair(
52 x: std::make_unique<Regex>(args: std::move(CheckRE)), y&: LineNumber));
53
54 return Error::success();
55 }
56
57 auto Glob = std::make_unique<Matcher::Glob>();
58 Glob->Name = Pattern.str();
59 Glob->LineNo = LineNumber;
60 // We must be sure to use the string in `Glob` rather than the provided
61 // reference which could be destroyed before match() is called
62 if (auto Err = GlobPattern::create(Pat: Glob->Name, /*MaxSubPatterns=*/1024)
63 .moveInto(Value&: Glob->Pattern))
64 return Err;
65 Globs.push_back(x: std::move(Glob));
66 return Error::success();
67}
68
69unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
70 for (const auto &Glob : reverse(C: Globs))
71 if (Glob->Pattern.match(S: Query))
72 return Glob->LineNo;
73 for (const auto &[Regex, LineNumber] : reverse(C: RegExes))
74 if (Regex->match(String: Query))
75 return LineNumber;
76 return 0;
77}
78
79// TODO: Refactor this to return Expected<...>
80std::unique_ptr<SpecialCaseList>
81SpecialCaseList::create(const std::vector<std::string> &Paths,
82 llvm::vfs::FileSystem &FS, std::string &Error) {
83 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
84 if (SCL->createInternal(Paths, VFS&: FS, Error))
85 return SCL;
86 return nullptr;
87}
88
89std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
90 std::string &Error) {
91 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
92 if (SCL->createInternal(MB, Error))
93 return SCL;
94 return nullptr;
95}
96
97std::unique_ptr<SpecialCaseList>
98SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
99 llvm::vfs::FileSystem &FS) {
100 std::string Error;
101 if (auto SCL = create(Paths, FS, Error))
102 return SCL;
103 report_fatal_error(reason: Twine(Error));
104}
105
106bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
107 vfs::FileSystem &VFS, std::string &Error) {
108 for (size_t i = 0; i < Paths.size(); ++i) {
109 const auto &Path = Paths[i];
110 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
111 VFS.getBufferForFile(Name: Path);
112 if (std::error_code EC = FileOrErr.getError()) {
113 Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
114 return false;
115 }
116 std::string ParseError;
117 if (!parse(FileIdx: i, MB: FileOrErr.get().get(), Error&: ParseError)) {
118 Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
119 return false;
120 }
121 }
122 return true;
123}
124
125bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
126 std::string &Error) {
127 if (!parse(FileIdx: 0, MB, Error))
128 return false;
129 return true;
130}
131
132Expected<SpecialCaseList::Section *>
133SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
134 unsigned LineNo, bool UseGlobs) {
135 Sections.emplace_back(args&: SectionStr, args&: FileNo);
136 auto &Section = Sections.back();
137
138 if (auto Err = Section.SectionMatcher->insert(Pattern: SectionStr, LineNumber: LineNo, UseGlobs)) {
139 return createStringError(EC: errc::invalid_argument,
140 S: "malformed section at line " + Twine(LineNo) +
141 ": '" + SectionStr +
142 "': " + toString(E: std::move(Err)));
143 }
144
145 return &Section;
146}
147
148bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
149 std::string &Error) {
150 Section *CurrentSection;
151 if (auto Err = addSection(SectionStr: "*", FileNo: FileIdx, LineNo: 1).moveInto(Value&: CurrentSection)) {
152 Error = toString(E: std::move(Err));
153 return false;
154 }
155
156 // In https://reviews.llvm.org/D154014 we added glob support and planned to
157 // remove regex support in patterns. We temporarily support the original
158 // behavior using regexes if "#!special-case-list-v1" is the first line of the
159 // file. For more details, see
160 // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
161 bool UseGlobs = !MB->getBuffer().starts_with(Prefix: "#!special-case-list-v1\n");
162
163 for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
164 !LineIt.is_at_eof(); LineIt++) {
165 unsigned LineNo = LineIt.line_number();
166 StringRef Line = LineIt->trim();
167 if (Line.empty())
168 continue;
169
170 // Save section names
171 if (Line.starts_with(Prefix: "[")) {
172 if (!Line.ends_with(Suffix: "]")) {
173 Error =
174 ("malformed section header on line " + Twine(LineNo) + ": " + Line)
175 .str();
176 return false;
177 }
178
179 if (auto Err = addSection(SectionStr: Line.drop_front().drop_back(), FileNo: FileIdx, LineNo,
180 UseGlobs)
181 .moveInto(Value&: CurrentSection)) {
182 Error = toString(E: std::move(Err));
183 return false;
184 }
185 continue;
186 }
187
188 // Get our prefix and unparsed glob.
189 auto [Prefix, Postfix] = Line.split(Separator: ":");
190 if (Postfix.empty()) {
191 // Missing ':' in the line.
192 Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
193 return false;
194 }
195
196 auto [Pattern, Category] = Postfix.split(Separator: "=");
197 auto &Entry = CurrentSection->Entries[Prefix][Category];
198 if (auto Err = Entry.insert(Pattern, LineNumber: LineNo, UseGlobs)) {
199 Error =
200 (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
201 Twine(LineNo) + ": '" + Pattern + "': " + toString(E: std::move(Err)))
202 .str();
203 return false;
204 }
205 }
206 return true;
207}
208
209SpecialCaseList::~SpecialCaseList() = default;
210
211bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
212 StringRef Query, StringRef Category) const {
213 auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
214 return LineNo;
215}
216
217std::pair<unsigned, unsigned>
218SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
219 StringRef Query, StringRef Category) const {
220 for (const auto &S : reverse(C: Sections)) {
221 if (S.SectionMatcher->match(Query: Section)) {
222 unsigned Blame = inSectionBlame(Entries: S.Entries, Prefix, Query, Category);
223 if (Blame)
224 return {S.FileIdx, Blame};
225 }
226 }
227 return NotFound;
228}
229
230unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
231 StringRef Prefix, StringRef Query,
232 StringRef Category) const {
233 SectionEntries::const_iterator I = Entries.find(Key: Prefix);
234 if (I == Entries.end())
235 return 0;
236 StringMap<Matcher>::const_iterator II = I->second.find(Key: Category);
237 if (II == I->second.end())
238 return 0;
239
240 return II->getValue().match(Query);
241}
242
243} // namespace llvm
244