1 | //===----------------- ItaniumManglingCanonicalizer.cpp -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/ProfileData/ItaniumManglingCanonicalizer.h" |
10 | #include "llvm/ADT/DenseMap.h" |
11 | #include "llvm/ADT/FoldingSet.h" |
12 | #include "llvm/ADT/StringRef.h" |
13 | #include "llvm/Demangle/ItaniumDemangle.h" |
14 | #include "llvm/Support/Allocator.h" |
15 | |
16 | using namespace llvm; |
17 | using llvm::itanium_demangle::ForwardTemplateReference; |
18 | using llvm::itanium_demangle::Node; |
19 | using llvm::itanium_demangle::NodeKind; |
20 | |
21 | namespace { |
22 | struct FoldingSetNodeIDBuilder { |
23 | llvm::FoldingSetNodeID &ID; |
24 | void operator()(const Node *P) { ID.AddPointer(Ptr: P); } |
25 | void operator()(std::string_view Str) { |
26 | if (Str.empty()) |
27 | ID.AddString(String: {}); |
28 | else |
29 | ID.AddString(String: llvm::StringRef(&*Str.begin(), Str.size())); |
30 | } |
31 | template <typename T> |
32 | std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) { |
33 | ID.AddInteger(I: (unsigned long long)V); |
34 | } |
35 | void operator()(itanium_demangle::NodeArray A) { |
36 | ID.AddInteger(I: A.size()); |
37 | for (const Node *N : A) |
38 | (*this)(N); |
39 | } |
40 | }; |
41 | |
42 | template<typename ...T> |
43 | void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) { |
44 | FoldingSetNodeIDBuilder Builder = {.ID: ID}; |
45 | Builder(K); |
46 | int VisitInOrder[] = { |
47 | (Builder(V), 0) ..., |
48 | 0 // Avoid empty array if there are no arguments. |
49 | }; |
50 | (void)VisitInOrder; |
51 | } |
52 | |
53 | // FIXME: Convert this to a generic lambda when possible. |
54 | template<typename NodeT> struct ProfileSpecificNode { |
55 | FoldingSetNodeID &ID; |
56 | template<typename ...T> void operator()(T ...V) { |
57 | profileCtor(ID, NodeKind<NodeT>::Kind, V...); |
58 | } |
59 | }; |
60 | |
61 | struct ProfileNode { |
62 | FoldingSetNodeID &ID; |
63 | template<typename NodeT> void operator()(const NodeT *N) { |
64 | N->match(ProfileSpecificNode<NodeT>{ID}); |
65 | } |
66 | }; |
67 | |
68 | template<> void ProfileNode::operator()(const ForwardTemplateReference *N) { |
69 | llvm_unreachable("should never canonicalize a ForwardTemplateReference" ); |
70 | } |
71 | |
72 | void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) { |
73 | N->visit(F: ProfileNode{.ID: ID}); |
74 | } |
75 | |
76 | class FoldingNodeAllocator { |
77 | class alignas(alignof(Node *)) : public llvm::FoldingSetNode { |
78 | public: |
79 | // 'Node' in this context names the injected-class-name of the base class. |
80 | itanium_demangle::Node *() { |
81 | return reinterpret_cast<itanium_demangle::Node *>(this + 1); |
82 | } |
83 | void (llvm::FoldingSetNodeID &ID) { profileNode(ID, N: getNode()); } |
84 | }; |
85 | |
86 | BumpPtrAllocator RawAlloc; |
87 | llvm::FoldingSet<NodeHeader> Nodes; |
88 | |
89 | public: |
90 | void reset() {} |
91 | |
92 | template <typename T, typename... Args> |
93 | std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) { |
94 | // FIXME: Don't canonicalize forward template references for now, because |
95 | // they contain state (the resolved template node) that's not known at their |
96 | // point of creation. |
97 | if (std::is_same<T, ForwardTemplateReference>::value) { |
98 | // Note that we don't use if-constexpr here and so we must still write |
99 | // this code in a generic form. |
100 | return {new (RawAlloc.Allocate(Size: sizeof(T), Alignment: alignof(T))) |
101 | T(std::forward<Args>(As)...), |
102 | true}; |
103 | } |
104 | |
105 | llvm::FoldingSetNodeID ID; |
106 | profileCtor(ID, NodeKind<T>::Kind, As...); |
107 | |
108 | void *InsertPos; |
109 | if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos)) |
110 | return {static_cast<T*>(Existing->getNode()), false}; |
111 | |
112 | if (!CreateNewNodes) |
113 | return {nullptr, true}; |
114 | |
115 | static_assert(alignof(T) <= alignof(NodeHeader), |
116 | "underaligned node header for specific node kind" ); |
117 | void *Storage = |
118 | RawAlloc.Allocate(Size: sizeof(NodeHeader) + sizeof(T), Alignment: alignof(NodeHeader)); |
119 | NodeHeader *New = new (Storage) NodeHeader; |
120 | T *Result = new (New->getNode()) T(std::forward<Args>(As)...); |
121 | Nodes.InsertNode(N: New, InsertPos); |
122 | return {Result, true}; |
123 | } |
124 | |
125 | template<typename T, typename... Args> |
126 | Node *makeNode(Args &&...As) { |
127 | return getOrCreateNode<T>(true, std::forward<Args>(As)...).first; |
128 | } |
129 | |
130 | void *allocateNodeArray(size_t sz) { |
131 | return RawAlloc.Allocate(Size: sizeof(Node *) * sz, Alignment: alignof(Node *)); |
132 | } |
133 | }; |
134 | |
135 | class CanonicalizerAllocator : public FoldingNodeAllocator { |
136 | Node *MostRecentlyCreated = nullptr; |
137 | Node *TrackedNode = nullptr; |
138 | bool TrackedNodeIsUsed = false; |
139 | bool CreateNewNodes = true; |
140 | llvm::SmallDenseMap<Node*, Node*, 32> Remappings; |
141 | |
142 | template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) { |
143 | std::pair<Node *, bool> Result = |
144 | getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...); |
145 | if (Result.second) { |
146 | // Node is new. Make a note of that. |
147 | MostRecentlyCreated = Result.first; |
148 | } else if (Result.first) { |
149 | // Node is pre-existing; check if it's in our remapping table. |
150 | if (auto *N = Remappings.lookup(Val: Result.first)) { |
151 | Result.first = N; |
152 | assert(!Remappings.contains(Result.first) && |
153 | "should never need multiple remap steps" ); |
154 | } |
155 | if (Result.first == TrackedNode) |
156 | TrackedNodeIsUsed = true; |
157 | } |
158 | return Result.first; |
159 | } |
160 | |
161 | /// Helper to allow makeNode to be partially-specialized on T. |
162 | template<typename T> struct MakeNodeImpl { |
163 | CanonicalizerAllocator &Self; |
164 | template<typename ...Args> Node *make(Args &&...As) { |
165 | return Self.makeNodeSimple<T>(std::forward<Args>(As)...); |
166 | } |
167 | }; |
168 | |
169 | public: |
170 | template<typename T, typename ...Args> Node *makeNode(Args &&...As) { |
171 | return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...); |
172 | } |
173 | |
174 | void reset() { MostRecentlyCreated = nullptr; } |
175 | |
176 | void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; } |
177 | |
178 | void addRemapping(Node *A, Node *B) { |
179 | // Note, we don't need to check whether B is also remapped, because if it |
180 | // was we would have already remapped it when building it. |
181 | Remappings.insert(KV: std::make_pair(x&: A, y&: B)); |
182 | } |
183 | |
184 | bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; } |
185 | |
186 | void trackUsesOf(Node *N) { |
187 | TrackedNode = N; |
188 | TrackedNodeIsUsed = false; |
189 | } |
190 | bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } |
191 | }; |
192 | |
193 | // FIXME: Also expand built-in substitutions? |
194 | |
195 | using CanonicalizingDemangler = |
196 | itanium_demangle::ManglingParser<CanonicalizerAllocator>; |
197 | } // namespace |
198 | |
199 | struct ItaniumManglingCanonicalizer::Impl { |
200 | CanonicalizingDemangler Demangler = {nullptr, nullptr}; |
201 | }; |
202 | |
203 | ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {} |
204 | ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; } |
205 | |
206 | ItaniumManglingCanonicalizer::EquivalenceError |
207 | ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First, |
208 | StringRef Second) { |
209 | auto &Alloc = P->Demangler.ASTAllocator; |
210 | Alloc.setCreateNewNodes(true); |
211 | |
212 | auto Parse = [&](StringRef Str) { |
213 | P->Demangler.reset(First_: Str.begin(), Last_: Str.end()); |
214 | Node *N = nullptr; |
215 | switch (Kind) { |
216 | // A <name>, with minor extensions to allow arbitrary namespace and |
217 | // template names that can't easily be written as <name>s. |
218 | case FragmentKind::Name: |
219 | // Very special case: allow "St" as a shorthand for "3std". It's not |
220 | // valid as a <name> mangling, but is nonetheless the most natural |
221 | // way to name the 'std' namespace. |
222 | if (Str.size() == 2 && P->Demangler.consumeIf(S: "St" )) |
223 | N = P->Demangler.make<itanium_demangle::NameType>(args: "std" ); |
224 | // We permit substitutions to name templates without their template |
225 | // arguments. This mostly just falls out, as almost all template names |
226 | // are valid as <name>s, but we also want to parse <substitution>s as |
227 | // <name>s, even though they're not. |
228 | else if (Str.starts_with(Prefix: "S" )) |
229 | // Parse the substitution and optional following template arguments. |
230 | N = P->Demangler.parseType(); |
231 | else |
232 | N = P->Demangler.parseName(); |
233 | break; |
234 | |
235 | // A <type>. |
236 | case FragmentKind::Type: |
237 | N = P->Demangler.parseType(); |
238 | break; |
239 | |
240 | // An <encoding>. |
241 | case FragmentKind::Encoding: |
242 | N = P->Demangler.parseEncoding(); |
243 | break; |
244 | } |
245 | |
246 | // If we have trailing junk, the mangling is invalid. |
247 | if (P->Demangler.numLeft() != 0) |
248 | N = nullptr; |
249 | |
250 | // If any node was created after N, then we cannot safely remap it because |
251 | // it might already be in use by another node. |
252 | return std::make_pair(x&: N, y: Alloc.isMostRecentlyCreated(N)); |
253 | }; |
254 | |
255 | Node *FirstNode, *SecondNode; |
256 | bool FirstIsNew, SecondIsNew; |
257 | |
258 | std::tie(args&: FirstNode, args&: FirstIsNew) = Parse(First); |
259 | if (!FirstNode) |
260 | return EquivalenceError::InvalidFirstMangling; |
261 | |
262 | Alloc.trackUsesOf(N: FirstNode); |
263 | std::tie(args&: SecondNode, args&: SecondIsNew) = Parse(Second); |
264 | if (!SecondNode) |
265 | return EquivalenceError::InvalidSecondMangling; |
266 | |
267 | // If they're already equivalent, there's nothing to do. |
268 | if (FirstNode == SecondNode) |
269 | return EquivalenceError::Success; |
270 | |
271 | if (FirstIsNew && !Alloc.trackedNodeIsUsed()) |
272 | Alloc.addRemapping(A: FirstNode, B: SecondNode); |
273 | else if (SecondIsNew) |
274 | Alloc.addRemapping(A: SecondNode, B: FirstNode); |
275 | else |
276 | return EquivalenceError::ManglingAlreadyUsed; |
277 | |
278 | return EquivalenceError::Success; |
279 | } |
280 | |
281 | static ItaniumManglingCanonicalizer::Key |
282 | parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling, |
283 | bool CreateNewNodes) { |
284 | Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes); |
285 | Demangler.reset(First_: Mangling.begin(), Last_: Mangling.end()); |
286 | // Attempt demangling only for names that look like C++ mangled names. |
287 | // Otherwise, treat them as extern "C" names. We permit the latter to |
288 | // be remapped by (eg) |
289 | // encoding 6memcpy 7memmove |
290 | // consistent with how they are encoded as local-names inside a C++ mangling. |
291 | Node *N; |
292 | if (Mangling.starts_with(Prefix: "_Z" ) || Mangling.starts_with(Prefix: "__Z" ) || |
293 | Mangling.starts_with(Prefix: "___Z" ) || Mangling.starts_with(Prefix: "____Z" )) |
294 | N = Demangler.parse(); |
295 | else |
296 | N = Demangler.make<itanium_demangle::NameType>( |
297 | args: std::string_view(Mangling.data(), Mangling.size())); |
298 | return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N); |
299 | } |
300 | |
301 | ItaniumManglingCanonicalizer::Key |
302 | ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) { |
303 | return parseMaybeMangledName(Demangler&: P->Demangler, Mangling, CreateNewNodes: true); |
304 | } |
305 | |
306 | ItaniumManglingCanonicalizer::Key |
307 | ItaniumManglingCanonicalizer::lookup(StringRef Mangling) { |
308 | return parseMaybeMangledName(Demangler&: P->Demangler, Mangling, CreateNewNodes: false); |
309 | } |
310 | |