| 1 | //===----------------- ItaniumManglingCanonicalizer.cpp -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/ProfileData/ItaniumManglingCanonicalizer.h" |
| 10 | #include "llvm/ADT/DenseMap.h" |
| 11 | #include "llvm/ADT/FoldingSet.h" |
| 12 | #include "llvm/ADT/StringRef.h" |
| 13 | #include "llvm/Demangle/ItaniumDemangle.h" |
| 14 | #include "llvm/Support/Allocator.h" |
| 15 | |
| 16 | using namespace llvm; |
| 17 | using llvm::itanium_demangle::ForwardTemplateReference; |
| 18 | using llvm::itanium_demangle::Node; |
| 19 | using llvm::itanium_demangle::NodeKind; |
| 20 | |
| 21 | namespace { |
| 22 | struct FoldingSetNodeIDBuilder { |
| 23 | llvm::FoldingSetNodeID &ID; |
| 24 | void operator()(const Node *P) { ID.AddPointer(Ptr: P); } |
| 25 | void operator()(std::string_view Str) { |
| 26 | if (Str.empty()) |
| 27 | ID.AddString(String: {}); |
| 28 | else |
| 29 | ID.AddString(String: llvm::StringRef(&*Str.begin(), Str.size())); |
| 30 | } |
| 31 | template <typename T> |
| 32 | std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) { |
| 33 | ID.AddInteger(I: (unsigned long long)V); |
| 34 | } |
| 35 | void operator()(itanium_demangle::NodeArray A) { |
| 36 | ID.AddInteger(I: A.size()); |
| 37 | for (const Node *N : A) |
| 38 | (*this)(N); |
| 39 | } |
| 40 | }; |
| 41 | |
| 42 | template<typename ...T> |
| 43 | void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) { |
| 44 | FoldingSetNodeIDBuilder Builder = {.ID: ID}; |
| 45 | Builder(K); |
| 46 | int VisitInOrder[] = { |
| 47 | (Builder(V), 0) ..., |
| 48 | 0 // Avoid empty array if there are no arguments. |
| 49 | }; |
| 50 | (void)VisitInOrder; |
| 51 | } |
| 52 | |
| 53 | // FIXME: Convert this to a generic lambda when possible. |
| 54 | template<typename NodeT> struct ProfileSpecificNode { |
| 55 | FoldingSetNodeID &ID; |
| 56 | template<typename ...T> void operator()(T ...V) { |
| 57 | profileCtor(ID, NodeKind<NodeT>::Kind, V...); |
| 58 | } |
| 59 | }; |
| 60 | |
| 61 | struct ProfileNode { |
| 62 | FoldingSetNodeID &ID; |
| 63 | template<typename NodeT> void operator()(const NodeT *N) { |
| 64 | N->match(ProfileSpecificNode<NodeT>{ID}); |
| 65 | } |
| 66 | }; |
| 67 | |
| 68 | template<> void ProfileNode::operator()(const ForwardTemplateReference *N) { |
| 69 | llvm_unreachable("should never canonicalize a ForwardTemplateReference" ); |
| 70 | } |
| 71 | |
| 72 | void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) { |
| 73 | N->visit(F: ProfileNode{.ID: ID}); |
| 74 | } |
| 75 | |
| 76 | class FoldingNodeAllocator { |
| 77 | class alignas(alignof(Node *)) : public llvm::FoldingSetNode { |
| 78 | public: |
| 79 | // 'Node' in this context names the injected-class-name of the base class. |
| 80 | itanium_demangle::Node *() { |
| 81 | return reinterpret_cast<itanium_demangle::Node *>(this + 1); |
| 82 | } |
| 83 | void (llvm::FoldingSetNodeID &ID) { profileNode(ID, N: getNode()); } |
| 84 | }; |
| 85 | |
| 86 | BumpPtrAllocator RawAlloc; |
| 87 | llvm::FoldingSet<NodeHeader> Nodes; |
| 88 | |
| 89 | public: |
| 90 | void reset() {} |
| 91 | |
| 92 | template <typename T, typename... Args> |
| 93 | std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) { |
| 94 | // FIXME: Don't canonicalize forward template references for now, because |
| 95 | // they contain state (the resolved template node) that's not known at their |
| 96 | // point of creation. |
| 97 | if (std::is_same<T, ForwardTemplateReference>::value) { |
| 98 | // Note that we don't use if-constexpr here and so we must still write |
| 99 | // this code in a generic form. |
| 100 | return {new (RawAlloc.Allocate(Size: sizeof(T), Alignment: alignof(T))) |
| 101 | T(std::forward<Args>(As)...), |
| 102 | true}; |
| 103 | } |
| 104 | |
| 105 | llvm::FoldingSetNodeID ID; |
| 106 | profileCtor(ID, NodeKind<T>::Kind, As...); |
| 107 | |
| 108 | void *InsertPos; |
| 109 | if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos)) |
| 110 | return {static_cast<T*>(Existing->getNode()), false}; |
| 111 | |
| 112 | if (!CreateNewNodes) |
| 113 | return {nullptr, true}; |
| 114 | |
| 115 | static_assert(alignof(T) <= alignof(NodeHeader), |
| 116 | "underaligned node header for specific node kind" ); |
| 117 | void *Storage = |
| 118 | RawAlloc.Allocate(Size: sizeof(NodeHeader) + sizeof(T), Alignment: alignof(NodeHeader)); |
| 119 | NodeHeader *New = new (Storage) NodeHeader; |
| 120 | T *Result = new (New->getNode()) T(std::forward<Args>(As)...); |
| 121 | Nodes.InsertNode(N: New, InsertPos); |
| 122 | return {Result, true}; |
| 123 | } |
| 124 | |
| 125 | template<typename T, typename... Args> |
| 126 | Node *makeNode(Args &&...As) { |
| 127 | return getOrCreateNode<T>(true, std::forward<Args>(As)...).first; |
| 128 | } |
| 129 | |
| 130 | void *allocateNodeArray(size_t sz) { |
| 131 | return RawAlloc.Allocate(Size: sizeof(Node *) * sz, Alignment: alignof(Node *)); |
| 132 | } |
| 133 | }; |
| 134 | |
| 135 | class CanonicalizerAllocator : public FoldingNodeAllocator { |
| 136 | Node *MostRecentlyCreated = nullptr; |
| 137 | Node *TrackedNode = nullptr; |
| 138 | bool TrackedNodeIsUsed = false; |
| 139 | bool CreateNewNodes = true; |
| 140 | llvm::SmallDenseMap<Node*, Node*, 32> Remappings; |
| 141 | |
| 142 | template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) { |
| 143 | std::pair<Node *, bool> Result = |
| 144 | getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...); |
| 145 | if (Result.second) { |
| 146 | // Node is new. Make a note of that. |
| 147 | MostRecentlyCreated = Result.first; |
| 148 | } else if (Result.first) { |
| 149 | // Node is pre-existing; check if it's in our remapping table. |
| 150 | if (auto *N = Remappings.lookup(Val: Result.first)) { |
| 151 | Result.first = N; |
| 152 | assert(!Remappings.contains(Result.first) && |
| 153 | "should never need multiple remap steps" ); |
| 154 | } |
| 155 | if (Result.first == TrackedNode) |
| 156 | TrackedNodeIsUsed = true; |
| 157 | } |
| 158 | return Result.first; |
| 159 | } |
| 160 | |
| 161 | /// Helper to allow makeNode to be partially-specialized on T. |
| 162 | template<typename T> struct MakeNodeImpl { |
| 163 | CanonicalizerAllocator &Self; |
| 164 | template<typename ...Args> Node *make(Args &&...As) { |
| 165 | return Self.makeNodeSimple<T>(std::forward<Args>(As)...); |
| 166 | } |
| 167 | }; |
| 168 | |
| 169 | public: |
| 170 | template<typename T, typename ...Args> Node *makeNode(Args &&...As) { |
| 171 | return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...); |
| 172 | } |
| 173 | |
| 174 | void reset() { MostRecentlyCreated = nullptr; } |
| 175 | |
| 176 | void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; } |
| 177 | |
| 178 | void addRemapping(Node *A, Node *B) { |
| 179 | // Note, we don't need to check whether B is also remapped, because if it |
| 180 | // was we would have already remapped it when building it. |
| 181 | Remappings.insert(KV: std::make_pair(x&: A, y&: B)); |
| 182 | } |
| 183 | |
| 184 | bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; } |
| 185 | |
| 186 | void trackUsesOf(Node *N) { |
| 187 | TrackedNode = N; |
| 188 | TrackedNodeIsUsed = false; |
| 189 | } |
| 190 | bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } |
| 191 | }; |
| 192 | |
| 193 | // FIXME: Also expand built-in substitutions? |
| 194 | |
| 195 | using CanonicalizingDemangler = |
| 196 | itanium_demangle::ManglingParser<CanonicalizerAllocator>; |
| 197 | } // namespace |
| 198 | |
| 199 | struct ItaniumManglingCanonicalizer::Impl { |
| 200 | CanonicalizingDemangler Demangler = {nullptr, nullptr}; |
| 201 | }; |
| 202 | |
| 203 | ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {} |
| 204 | ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; } |
| 205 | |
| 206 | ItaniumManglingCanonicalizer::EquivalenceError |
| 207 | ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First, |
| 208 | StringRef Second) { |
| 209 | auto &Alloc = P->Demangler.ASTAllocator; |
| 210 | Alloc.setCreateNewNodes(true); |
| 211 | |
| 212 | auto Parse = [&](StringRef Str) { |
| 213 | P->Demangler.reset(First_: Str.begin(), Last_: Str.end()); |
| 214 | Node *N = nullptr; |
| 215 | switch (Kind) { |
| 216 | // A <name>, with minor extensions to allow arbitrary namespace and |
| 217 | // template names that can't easily be written as <name>s. |
| 218 | case FragmentKind::Name: |
| 219 | // Very special case: allow "St" as a shorthand for "3std". It's not |
| 220 | // valid as a <name> mangling, but is nonetheless the most natural |
| 221 | // way to name the 'std' namespace. |
| 222 | if (Str.size() == 2 && P->Demangler.consumeIf(S: "St" )) |
| 223 | N = P->Demangler.make<itanium_demangle::NameType>(args: "std" ); |
| 224 | // We permit substitutions to name templates without their template |
| 225 | // arguments. This mostly just falls out, as almost all template names |
| 226 | // are valid as <name>s, but we also want to parse <substitution>s as |
| 227 | // <name>s, even though they're not. |
| 228 | else if (Str.starts_with(Prefix: "S" )) |
| 229 | // Parse the substitution and optional following template arguments. |
| 230 | N = P->Demangler.parseType(); |
| 231 | else |
| 232 | N = P->Demangler.parseName(); |
| 233 | break; |
| 234 | |
| 235 | // A <type>. |
| 236 | case FragmentKind::Type: |
| 237 | N = P->Demangler.parseType(); |
| 238 | break; |
| 239 | |
| 240 | // An <encoding>. |
| 241 | case FragmentKind::Encoding: |
| 242 | N = P->Demangler.parseEncoding(); |
| 243 | break; |
| 244 | } |
| 245 | |
| 246 | // If we have trailing junk, the mangling is invalid. |
| 247 | if (P->Demangler.numLeft() != 0) |
| 248 | N = nullptr; |
| 249 | |
| 250 | // If any node was created after N, then we cannot safely remap it because |
| 251 | // it might already be in use by another node. |
| 252 | return std::make_pair(x&: N, y: Alloc.isMostRecentlyCreated(N)); |
| 253 | }; |
| 254 | |
| 255 | Node *FirstNode, *SecondNode; |
| 256 | bool FirstIsNew, SecondIsNew; |
| 257 | |
| 258 | std::tie(args&: FirstNode, args&: FirstIsNew) = Parse(First); |
| 259 | if (!FirstNode) |
| 260 | return EquivalenceError::InvalidFirstMangling; |
| 261 | |
| 262 | Alloc.trackUsesOf(N: FirstNode); |
| 263 | std::tie(args&: SecondNode, args&: SecondIsNew) = Parse(Second); |
| 264 | if (!SecondNode) |
| 265 | return EquivalenceError::InvalidSecondMangling; |
| 266 | |
| 267 | // If they're already equivalent, there's nothing to do. |
| 268 | if (FirstNode == SecondNode) |
| 269 | return EquivalenceError::Success; |
| 270 | |
| 271 | if (FirstIsNew && !Alloc.trackedNodeIsUsed()) |
| 272 | Alloc.addRemapping(A: FirstNode, B: SecondNode); |
| 273 | else if (SecondIsNew) |
| 274 | Alloc.addRemapping(A: SecondNode, B: FirstNode); |
| 275 | else |
| 276 | return EquivalenceError::ManglingAlreadyUsed; |
| 277 | |
| 278 | return EquivalenceError::Success; |
| 279 | } |
| 280 | |
| 281 | static ItaniumManglingCanonicalizer::Key |
| 282 | parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling, |
| 283 | bool CreateNewNodes) { |
| 284 | Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes); |
| 285 | Demangler.reset(First_: Mangling.begin(), Last_: Mangling.end()); |
| 286 | // Attempt demangling only for names that look like C++ mangled names. |
| 287 | // Otherwise, treat them as extern "C" names. We permit the latter to |
| 288 | // be remapped by (eg) |
| 289 | // encoding 6memcpy 7memmove |
| 290 | // consistent with how they are encoded as local-names inside a C++ mangling. |
| 291 | Node *N; |
| 292 | if (Mangling.starts_with(Prefix: "_Z" ) || Mangling.starts_with(Prefix: "__Z" ) || |
| 293 | Mangling.starts_with(Prefix: "___Z" ) || Mangling.starts_with(Prefix: "____Z" )) |
| 294 | N = Demangler.parse(); |
| 295 | else |
| 296 | N = Demangler.make<itanium_demangle::NameType>( |
| 297 | args: std::string_view(Mangling.data(), Mangling.size())); |
| 298 | return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N); |
| 299 | } |
| 300 | |
| 301 | ItaniumManglingCanonicalizer::Key |
| 302 | ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) { |
| 303 | return parseMaybeMangledName(Demangler&: P->Demangler, Mangling, CreateNewNodes: true); |
| 304 | } |
| 305 | |
| 306 | ItaniumManglingCanonicalizer::Key |
| 307 | ItaniumManglingCanonicalizer::lookup(StringRef Mangling) { |
| 308 | return parseMaybeMangledName(Demangler&: P->Demangler, Mangling, CreateNewNodes: false); |
| 309 | } |
| 310 | |