| 1 | //===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This is a utility class for instrumentation passes (like AddressSanitizer |
| 10 | // or ThreadSanitizer) to avoid instrumenting some functions or global |
| 11 | // variables, or to instrument some functions or global variables in a specific |
| 12 | // way, based on a user-supplied list. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "llvm/Support/SpecialCaseList.h" |
| 17 | #include "llvm/ADT/STLExtras.h" |
| 18 | #include "llvm/Support/LineIterator.h" |
| 19 | #include "llvm/Support/MemoryBuffer.h" |
| 20 | #include "llvm/Support/VirtualFileSystem.h" |
| 21 | #include <stdio.h> |
| 22 | #include <string> |
| 23 | #include <system_error> |
| 24 | #include <utility> |
| 25 | |
| 26 | namespace llvm { |
| 27 | |
| 28 | Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber, |
| 29 | bool UseGlobs) { |
| 30 | if (Pattern.empty()) |
| 31 | return createStringError(EC: errc::invalid_argument, |
| 32 | S: Twine("Supplied " ) + |
| 33 | (UseGlobs ? "glob" : "regex" ) + " was blank" ); |
| 34 | |
| 35 | if (!UseGlobs) { |
| 36 | // Replace * with .* |
| 37 | auto Regexp = Pattern.str(); |
| 38 | for (size_t pos = 0; (pos = Regexp.find(c: '*', pos: pos)) != std::string::npos; |
| 39 | pos += strlen(s: ".*" )) { |
| 40 | Regexp.replace(pos: pos, n1: strlen(s: "*" ), s: ".*" ); |
| 41 | } |
| 42 | |
| 43 | Regexp = (Twine("^(" ) + StringRef(Regexp) + ")$" ).str(); |
| 44 | |
| 45 | // Check that the regexp is valid. |
| 46 | Regex CheckRE(Regexp); |
| 47 | std::string REError; |
| 48 | if (!CheckRE.isValid(Error&: REError)) |
| 49 | return createStringError(EC: errc::invalid_argument, S: REError); |
| 50 | |
| 51 | RegExes.emplace_back(args: std::make_pair( |
| 52 | x: std::make_unique<Regex>(args: std::move(CheckRE)), y&: LineNumber)); |
| 53 | |
| 54 | return Error::success(); |
| 55 | } |
| 56 | |
| 57 | auto Glob = std::make_unique<Matcher::Glob>(); |
| 58 | Glob->Name = Pattern.str(); |
| 59 | Glob->LineNo = LineNumber; |
| 60 | // We must be sure to use the string in `Glob` rather than the provided |
| 61 | // reference which could be destroyed before match() is called |
| 62 | if (auto Err = GlobPattern::create(Pat: Glob->Name, /*MaxSubPatterns=*/1024) |
| 63 | .moveInto(Value&: Glob->Pattern)) |
| 64 | return Err; |
| 65 | Globs.push_back(x: std::move(Glob)); |
| 66 | return Error::success(); |
| 67 | } |
| 68 | |
| 69 | unsigned SpecialCaseList::Matcher::match(StringRef Query) const { |
| 70 | for (const auto &Glob : reverse(C: Globs)) |
| 71 | if (Glob->Pattern.match(S: Query)) |
| 72 | return Glob->LineNo; |
| 73 | for (const auto &[Regex, LineNumber] : reverse(C: RegExes)) |
| 74 | if (Regex->match(String: Query)) |
| 75 | return LineNumber; |
| 76 | return 0; |
| 77 | } |
| 78 | |
| 79 | // TODO: Refactor this to return Expected<...> |
| 80 | std::unique_ptr<SpecialCaseList> |
| 81 | SpecialCaseList::create(const std::vector<std::string> &Paths, |
| 82 | llvm::vfs::FileSystem &FS, std::string &Error) { |
| 83 | std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); |
| 84 | if (SCL->createInternal(Paths, VFS&: FS, Error)) |
| 85 | return SCL; |
| 86 | return nullptr; |
| 87 | } |
| 88 | |
| 89 | std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB, |
| 90 | std::string &Error) { |
| 91 | std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); |
| 92 | if (SCL->createInternal(MB, Error)) |
| 93 | return SCL; |
| 94 | return nullptr; |
| 95 | } |
| 96 | |
| 97 | std::unique_ptr<SpecialCaseList> |
| 98 | SpecialCaseList::createOrDie(const std::vector<std::string> &Paths, |
| 99 | llvm::vfs::FileSystem &FS) { |
| 100 | std::string Error; |
| 101 | if (auto SCL = create(Paths, FS, Error)) |
| 102 | return SCL; |
| 103 | report_fatal_error(reason: Twine(Error)); |
| 104 | } |
| 105 | |
| 106 | bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths, |
| 107 | vfs::FileSystem &VFS, std::string &Error) { |
| 108 | for (size_t i = 0; i < Paths.size(); ++i) { |
| 109 | const auto &Path = Paths[i]; |
| 110 | ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = |
| 111 | VFS.getBufferForFile(Name: Path); |
| 112 | if (std::error_code EC = FileOrErr.getError()) { |
| 113 | Error = (Twine("can't open file '" ) + Path + "': " + EC.message()).str(); |
| 114 | return false; |
| 115 | } |
| 116 | std::string ParseError; |
| 117 | if (!parse(FileIdx: i, MB: FileOrErr.get().get(), Error&: ParseError)) { |
| 118 | Error = (Twine("error parsing file '" ) + Path + "': " + ParseError).str(); |
| 119 | return false; |
| 120 | } |
| 121 | } |
| 122 | return true; |
| 123 | } |
| 124 | |
| 125 | bool SpecialCaseList::createInternal(const MemoryBuffer *MB, |
| 126 | std::string &Error) { |
| 127 | if (!parse(FileIdx: 0, MB, Error)) |
| 128 | return false; |
| 129 | return true; |
| 130 | } |
| 131 | |
| 132 | Expected<SpecialCaseList::Section *> |
| 133 | SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, |
| 134 | unsigned LineNo, bool UseGlobs) { |
| 135 | Sections.emplace_back(args&: SectionStr, args&: FileNo); |
| 136 | auto &Section = Sections.back(); |
| 137 | |
| 138 | if (auto Err = Section.SectionMatcher->insert(Pattern: SectionStr, LineNumber: LineNo, UseGlobs)) { |
| 139 | return createStringError(EC: errc::invalid_argument, |
| 140 | S: "malformed section at line " + Twine(LineNo) + |
| 141 | ": '" + SectionStr + |
| 142 | "': " + toString(E: std::move(Err))); |
| 143 | } |
| 144 | |
| 145 | return &Section; |
| 146 | } |
| 147 | |
| 148 | bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, |
| 149 | std::string &Error) { |
| 150 | Section *CurrentSection; |
| 151 | if (auto Err = addSection(SectionStr: "*" , FileNo: FileIdx, LineNo: 1).moveInto(Value&: CurrentSection)) { |
| 152 | Error = toString(E: std::move(Err)); |
| 153 | return false; |
| 154 | } |
| 155 | |
| 156 | // In https://reviews.llvm.org/D154014 we added glob support and planned to |
| 157 | // remove regex support in patterns. We temporarily support the original |
| 158 | // behavior using regexes if "#!special-case-list-v1" is the first line of the |
| 159 | // file. For more details, see |
| 160 | // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666 |
| 161 | bool UseGlobs = !MB->getBuffer().starts_with(Prefix: "#!special-case-list-v1\n" ); |
| 162 | |
| 163 | for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); |
| 164 | !LineIt.is_at_eof(); LineIt++) { |
| 165 | unsigned LineNo = LineIt.line_number(); |
| 166 | StringRef Line = LineIt->trim(); |
| 167 | if (Line.empty()) |
| 168 | continue; |
| 169 | |
| 170 | // Save section names |
| 171 | if (Line.starts_with(Prefix: "[" )) { |
| 172 | if (!Line.ends_with(Suffix: "]" )) { |
| 173 | Error = |
| 174 | ("malformed section header on line " + Twine(LineNo) + ": " + Line) |
| 175 | .str(); |
| 176 | return false; |
| 177 | } |
| 178 | |
| 179 | if (auto Err = addSection(SectionStr: Line.drop_front().drop_back(), FileNo: FileIdx, LineNo, |
| 180 | UseGlobs) |
| 181 | .moveInto(Value&: CurrentSection)) { |
| 182 | Error = toString(E: std::move(Err)); |
| 183 | return false; |
| 184 | } |
| 185 | continue; |
| 186 | } |
| 187 | |
| 188 | // Get our prefix and unparsed glob. |
| 189 | auto [Prefix, Postfix] = Line.split(Separator: ":" ); |
| 190 | if (Postfix.empty()) { |
| 191 | // Missing ':' in the line. |
| 192 | Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'" ).str(); |
| 193 | return false; |
| 194 | } |
| 195 | |
| 196 | auto [Pattern, Category] = Postfix.split(Separator: "=" ); |
| 197 | auto &Entry = CurrentSection->Entries[Prefix][Category]; |
| 198 | if (auto Err = Entry.insert(Pattern, LineNumber: LineNo, UseGlobs)) { |
| 199 | Error = |
| 200 | (Twine("malformed " ) + (UseGlobs ? "glob" : "regex" ) + " in line " + |
| 201 | Twine(LineNo) + ": '" + Pattern + "': " + toString(E: std::move(Err))) |
| 202 | .str(); |
| 203 | return false; |
| 204 | } |
| 205 | } |
| 206 | return true; |
| 207 | } |
| 208 | |
| 209 | SpecialCaseList::~SpecialCaseList() = default; |
| 210 | |
| 211 | bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, |
| 212 | StringRef Query, StringRef Category) const { |
| 213 | auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category); |
| 214 | return LineNo; |
| 215 | } |
| 216 | |
| 217 | std::pair<unsigned, unsigned> |
| 218 | SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, |
| 219 | StringRef Query, StringRef Category) const { |
| 220 | for (const auto &S : reverse(C: Sections)) { |
| 221 | if (S.SectionMatcher->match(Query: Section)) { |
| 222 | unsigned Blame = inSectionBlame(Entries: S.Entries, Prefix, Query, Category); |
| 223 | if (Blame) |
| 224 | return {S.FileIdx, Blame}; |
| 225 | } |
| 226 | } |
| 227 | return NotFound; |
| 228 | } |
| 229 | |
| 230 | unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, |
| 231 | StringRef Prefix, StringRef Query, |
| 232 | StringRef Category) const { |
| 233 | SectionEntries::const_iterator I = Entries.find(Key: Prefix); |
| 234 | if (I == Entries.end()) |
| 235 | return 0; |
| 236 | StringMap<Matcher>::const_iterator II = I->second.find(Key: Category); |
| 237 | if (II == I->second.end()) |
| 238 | return 0; |
| 239 | |
| 240 | return II->getValue().match(Query); |
| 241 | } |
| 242 | |
| 243 | } // namespace llvm |
| 244 | |