1//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ProfileData/ItaniumManglingCanonicalizer.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/FoldingSet.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/Demangle/ItaniumDemangle.h"
14#include "llvm/Support/Allocator.h"
15
16using namespace llvm;
17using llvm::itanium_demangle::ForwardTemplateReference;
18using llvm::itanium_demangle::Node;
19using llvm::itanium_demangle::NodeKind;
20
21namespace {
22struct FoldingSetNodeIDBuilder {
23 llvm::FoldingSetNodeID &ID;
24 void operator()(const Node *P) { ID.AddPointer(Ptr: P); }
25 void operator()(std::string_view Str) {
26 if (Str.empty())
27 ID.AddString(String: {});
28 else
29 ID.AddString(String: llvm::StringRef(&*Str.begin(), Str.size()));
30 }
31 template <typename T>
32 std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) {
33 ID.AddInteger(I: (unsigned long long)V);
34 }
35 void operator()(itanium_demangle::NodeArray A) {
36 ID.AddInteger(I: A.size());
37 for (const Node *N : A)
38 (*this)(N);
39 }
40};
41
42template<typename ...T>
43void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) {
44 FoldingSetNodeIDBuilder Builder = {.ID: ID};
45 Builder(K);
46 int VisitInOrder[] = {
47 (Builder(V), 0) ...,
48 0 // Avoid empty array if there are no arguments.
49 };
50 (void)VisitInOrder;
51}
52
53// FIXME: Convert this to a generic lambda when possible.
54template<typename NodeT> struct ProfileSpecificNode {
55 FoldingSetNodeID &ID;
56 template<typename ...T> void operator()(T ...V) {
57 profileCtor(ID, NodeKind<NodeT>::Kind, V...);
58 }
59};
60
61struct ProfileNode {
62 FoldingSetNodeID &ID;
63 template<typename NodeT> void operator()(const NodeT *N) {
64 N->match(ProfileSpecificNode<NodeT>{ID});
65 }
66};
67
68template<> void ProfileNode::operator()(const ForwardTemplateReference *N) {
69 llvm_unreachable("should never canonicalize a ForwardTemplateReference");
70}
71
72void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) {
73 N->visit(F: ProfileNode{.ID: ID});
74}
75
76class FoldingNodeAllocator {
77 class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode {
78 public:
79 // 'Node' in this context names the injected-class-name of the base class.
80 itanium_demangle::Node *getNode() {
81 return reinterpret_cast<itanium_demangle::Node *>(this + 1);
82 }
83 void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, N: getNode()); }
84 };
85
86 BumpPtrAllocator RawAlloc;
87 llvm::FoldingSet<NodeHeader> Nodes;
88
89public:
90 void reset() {}
91
92 template <typename T, typename... Args>
93 std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) {
94 // FIXME: Don't canonicalize forward template references for now, because
95 // they contain state (the resolved template node) that's not known at their
96 // point of creation.
97 if (std::is_same<T, ForwardTemplateReference>::value) {
98 // Note that we don't use if-constexpr here and so we must still write
99 // this code in a generic form.
100 return {new (RawAlloc.Allocate(Size: sizeof(T), Alignment: alignof(T)))
101 T(std::forward<Args>(As)...),
102 true};
103 }
104
105 llvm::FoldingSetNodeID ID;
106 profileCtor(ID, NodeKind<T>::Kind, As...);
107
108 void *InsertPos;
109 if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos))
110 return {static_cast<T*>(Existing->getNode()), false};
111
112 if (!CreateNewNodes)
113 return {nullptr, true};
114
115 static_assert(alignof(T) <= alignof(NodeHeader),
116 "underaligned node header for specific node kind");
117 void *Storage =
118 RawAlloc.Allocate(Size: sizeof(NodeHeader) + sizeof(T), Alignment: alignof(NodeHeader));
119 NodeHeader *New = new (Storage) NodeHeader;
120 T *Result = new (New->getNode()) T(std::forward<Args>(As)...);
121 Nodes.InsertNode(N: New, InsertPos);
122 return {Result, true};
123 }
124
125 template<typename T, typename... Args>
126 Node *makeNode(Args &&...As) {
127 return getOrCreateNode<T>(true, std::forward<Args>(As)...).first;
128 }
129
130 void *allocateNodeArray(size_t sz) {
131 return RawAlloc.Allocate(Size: sizeof(Node *) * sz, Alignment: alignof(Node *));
132 }
133};
134
135class CanonicalizerAllocator : public FoldingNodeAllocator {
136 Node *MostRecentlyCreated = nullptr;
137 Node *TrackedNode = nullptr;
138 bool TrackedNodeIsUsed = false;
139 bool CreateNewNodes = true;
140 llvm::SmallDenseMap<Node*, Node*, 32> Remappings;
141
142 template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) {
143 std::pair<Node *, bool> Result =
144 getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...);
145 if (Result.second) {
146 // Node is new. Make a note of that.
147 MostRecentlyCreated = Result.first;
148 } else if (Result.first) {
149 // Node is pre-existing; check if it's in our remapping table.
150 if (auto *N = Remappings.lookup(Val: Result.first)) {
151 Result.first = N;
152 assert(!Remappings.contains(Result.first) &&
153 "should never need multiple remap steps");
154 }
155 if (Result.first == TrackedNode)
156 TrackedNodeIsUsed = true;
157 }
158 return Result.first;
159 }
160
161 /// Helper to allow makeNode to be partially-specialized on T.
162 template<typename T> struct MakeNodeImpl {
163 CanonicalizerAllocator &Self;
164 template<typename ...Args> Node *make(Args &&...As) {
165 return Self.makeNodeSimple<T>(std::forward<Args>(As)...);
166 }
167 };
168
169public:
170 template<typename T, typename ...Args> Node *makeNode(Args &&...As) {
171 return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...);
172 }
173
174 void reset() { MostRecentlyCreated = nullptr; }
175
176 void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; }
177
178 void addRemapping(Node *A, Node *B) {
179 // Note, we don't need to check whether B is also remapped, because if it
180 // was we would have already remapped it when building it.
181 Remappings.insert(KV: std::make_pair(x&: A, y&: B));
182 }
183
184 bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; }
185
186 void trackUsesOf(Node *N) {
187 TrackedNode = N;
188 TrackedNodeIsUsed = false;
189 }
190 bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
191};
192
193// FIXME: Also expand built-in substitutions?
194
195using CanonicalizingDemangler =
196 itanium_demangle::ManglingParser<CanonicalizerAllocator>;
197} // namespace
198
199struct ItaniumManglingCanonicalizer::Impl {
200 CanonicalizingDemangler Demangler = {nullptr, nullptr};
201};
202
203ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {}
204ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; }
205
206ItaniumManglingCanonicalizer::EquivalenceError
207ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
208 StringRef Second) {
209 auto &Alloc = P->Demangler.ASTAllocator;
210 Alloc.setCreateNewNodes(true);
211
212 auto Parse = [&](StringRef Str) {
213 P->Demangler.reset(First_: Str.begin(), Last_: Str.end());
214 Node *N = nullptr;
215 switch (Kind) {
216 // A <name>, with minor extensions to allow arbitrary namespace and
217 // template names that can't easily be written as <name>s.
218 case FragmentKind::Name:
219 // Very special case: allow "St" as a shorthand for "3std". It's not
220 // valid as a <name> mangling, but is nonetheless the most natural
221 // way to name the 'std' namespace.
222 if (Str.size() == 2 && P->Demangler.consumeIf(S: "St"))
223 N = P->Demangler.make<itanium_demangle::NameType>(args: "std");
224 // We permit substitutions to name templates without their template
225 // arguments. This mostly just falls out, as almost all template names
226 // are valid as <name>s, but we also want to parse <substitution>s as
227 // <name>s, even though they're not.
228 else if (Str.starts_with(Prefix: "S"))
229 // Parse the substitution and optional following template arguments.
230 N = P->Demangler.parseType();
231 else
232 N = P->Demangler.parseName();
233 break;
234
235 // A <type>.
236 case FragmentKind::Type:
237 N = P->Demangler.parseType();
238 break;
239
240 // An <encoding>.
241 case FragmentKind::Encoding:
242 N = P->Demangler.parseEncoding();
243 break;
244 }
245
246 // If we have trailing junk, the mangling is invalid.
247 if (P->Demangler.numLeft() != 0)
248 N = nullptr;
249
250 // If any node was created after N, then we cannot safely remap it because
251 // it might already be in use by another node.
252 return std::make_pair(x&: N, y: Alloc.isMostRecentlyCreated(N));
253 };
254
255 Node *FirstNode, *SecondNode;
256 bool FirstIsNew, SecondIsNew;
257
258 std::tie(args&: FirstNode, args&: FirstIsNew) = Parse(First);
259 if (!FirstNode)
260 return EquivalenceError::InvalidFirstMangling;
261
262 Alloc.trackUsesOf(N: FirstNode);
263 std::tie(args&: SecondNode, args&: SecondIsNew) = Parse(Second);
264 if (!SecondNode)
265 return EquivalenceError::InvalidSecondMangling;
266
267 // If they're already equivalent, there's nothing to do.
268 if (FirstNode == SecondNode)
269 return EquivalenceError::Success;
270
271 if (FirstIsNew && !Alloc.trackedNodeIsUsed())
272 Alloc.addRemapping(A: FirstNode, B: SecondNode);
273 else if (SecondIsNew)
274 Alloc.addRemapping(A: SecondNode, B: FirstNode);
275 else
276 return EquivalenceError::ManglingAlreadyUsed;
277
278 return EquivalenceError::Success;
279}
280
281static ItaniumManglingCanonicalizer::Key
282parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling,
283 bool CreateNewNodes) {
284 Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes);
285 Demangler.reset(First_: Mangling.begin(), Last_: Mangling.end());
286 // Attempt demangling only for names that look like C++ mangled names.
287 // Otherwise, treat them as extern "C" names. We permit the latter to
288 // be remapped by (eg)
289 // encoding 6memcpy 7memmove
290 // consistent with how they are encoded as local-names inside a C++ mangling.
291 Node *N;
292 if (Mangling.starts_with(Prefix: "_Z") || Mangling.starts_with(Prefix: "__Z") ||
293 Mangling.starts_with(Prefix: "___Z") || Mangling.starts_with(Prefix: "____Z"))
294 N = Demangler.parse();
295 else
296 N = Demangler.make<itanium_demangle::NameType>(
297 args: std::string_view(Mangling.data(), Mangling.size()));
298 return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N);
299}
300
301ItaniumManglingCanonicalizer::Key
302ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
303 return parseMaybeMangledName(Demangler&: P->Demangler, Mangling, CreateNewNodes: true);
304}
305
306ItaniumManglingCanonicalizer::Key
307ItaniumManglingCanonicalizer::lookup(StringRef Mangling) {
308 return parseMaybeMangledName(Demangler&: P->Demangler, Mangling, CreateNewNodes: false);
309}
310