1 | //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a hash set that can be used to remove duplication of |
10 | // nodes in a graph. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/ADT/FoldingSet.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/Support/Allocator.h" |
17 | #include "llvm/Support/ErrorHandling.h" |
18 | #include "llvm/Support/MathExtras.h" |
19 | #include "llvm/Support/SwapByteOrder.h" |
20 | #include <cassert> |
21 | #include <cstring> |
22 | using namespace llvm; |
23 | |
24 | //===----------------------------------------------------------------------===// |
25 | // FoldingSetNodeIDRef Implementation |
26 | |
27 | bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { |
28 | if (Size != RHS.Size) return false; |
29 | return memcmp(s1: Data, s2: RHS.Data, n: Size*sizeof(*Data)) == 0; |
30 | } |
31 | |
32 | /// Used to compare the "ordering" of two nodes as defined by the |
33 | /// profiled bits and their ordering defined by memcmp(). |
34 | bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { |
35 | if (Size != RHS.Size) |
36 | return Size < RHS.Size; |
37 | return memcmp(s1: Data, s2: RHS.Data, n: Size*sizeof(*Data)) < 0; |
38 | } |
39 | |
40 | //===----------------------------------------------------------------------===// |
41 | // FoldingSetNodeID Implementation |
42 | |
43 | /// Add* - Add various data types to Bit data. |
44 | /// |
45 | void FoldingSetNodeID::AddString(StringRef String) { |
46 | unsigned Size = String.size(); |
47 | |
48 | unsigned NumInserts = 1 + divideCeil(Numerator: Size, Denominator: 4); |
49 | Bits.reserve(N: Bits.size() + NumInserts); |
50 | |
51 | Bits.push_back(Elt: Size); |
52 | if (!Size) return; |
53 | |
54 | unsigned Units = Size / 4; |
55 | unsigned Pos = 0; |
56 | const unsigned *Base = (const unsigned*) String.data(); |
57 | |
58 | // If the string is aligned do a bulk transfer. |
59 | if (!((intptr_t)Base & 3)) { |
60 | Bits.append(in_start: Base, in_end: Base + Units); |
61 | Pos = (Units + 1) * 4; |
62 | } else { |
63 | // Otherwise do it the hard way. |
64 | // To be compatible with above bulk transfer, we need to take endianness |
65 | // into account. |
66 | static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost, |
67 | "Unexpected host endianness" ); |
68 | if (sys::IsBigEndianHost) { |
69 | for (Pos += 4; Pos <= Size; Pos += 4) { |
70 | unsigned V = ((unsigned char)String[Pos - 4] << 24) | |
71 | ((unsigned char)String[Pos - 3] << 16) | |
72 | ((unsigned char)String[Pos - 2] << 8) | |
73 | (unsigned char)String[Pos - 1]; |
74 | Bits.push_back(Elt: V); |
75 | } |
76 | } else { // Little-endian host |
77 | for (Pos += 4; Pos <= Size; Pos += 4) { |
78 | unsigned V = ((unsigned char)String[Pos - 1] << 24) | |
79 | ((unsigned char)String[Pos - 2] << 16) | |
80 | ((unsigned char)String[Pos - 3] << 8) | |
81 | (unsigned char)String[Pos - 4]; |
82 | Bits.push_back(Elt: V); |
83 | } |
84 | } |
85 | } |
86 | |
87 | // With the leftover bits. |
88 | unsigned V = 0; |
89 | // Pos will have overshot size by 4 - #bytes left over. |
90 | // No need to take endianness into account here - this is always executed. |
91 | switch (Pos - Size) { |
92 | case 1: V = (V << 8) | (unsigned char)String[Size - 3]; [[fallthrough]]; |
93 | case 2: V = (V << 8) | (unsigned char)String[Size - 2]; [[fallthrough]]; |
94 | case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; |
95 | default: return; // Nothing left. |
96 | } |
97 | |
98 | Bits.push_back(Elt: V); |
99 | } |
100 | |
101 | // AddNodeID - Adds the Bit data of another ID to *this. |
102 | void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { |
103 | Bits.append(in_start: ID.Bits.begin(), in_end: ID.Bits.end()); |
104 | } |
105 | |
106 | /// operator== - Used to compare two nodes to each other. |
107 | /// |
108 | bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { |
109 | return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); |
110 | } |
111 | |
112 | /// operator== - Used to compare two nodes to each other. |
113 | /// |
114 | bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { |
115 | return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; |
116 | } |
117 | |
118 | /// Used to compare the "ordering" of two nodes as defined by the |
119 | /// profiled bits and their ordering defined by memcmp(). |
120 | bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { |
121 | return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); |
122 | } |
123 | |
124 | bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { |
125 | return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; |
126 | } |
127 | |
128 | /// Intern - Copy this node's data to a memory region allocated from the |
129 | /// given allocator and return a FoldingSetNodeIDRef describing the |
130 | /// interned data. |
131 | FoldingSetNodeIDRef |
132 | FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { |
133 | unsigned *New = Allocator.Allocate<unsigned>(Num: Bits.size()); |
134 | std::uninitialized_copy(first: Bits.begin(), last: Bits.end(), result: New); |
135 | return FoldingSetNodeIDRef(New, Bits.size()); |
136 | } |
137 | |
138 | //===----------------------------------------------------------------------===// |
139 | /// Helper functions for FoldingSetBase. |
140 | |
141 | /// GetNextPtr - In order to save space, each bucket is a |
142 | /// singly-linked-list. In order to make deletion more efficient, we make |
143 | /// the list circular, so we can delete a node without computing its hash. |
144 | /// The problem with this is that the start of the hash buckets are not |
145 | /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: |
146 | /// use GetBucketPtr when this happens. |
147 | static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { |
148 | // The low bit is set if this is the pointer back to the bucket. |
149 | if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) |
150 | return nullptr; |
151 | |
152 | return static_cast<FoldingSetBase::Node*>(NextInBucketPtr); |
153 | } |
154 | |
155 | |
156 | /// testing. |
157 | static void **GetBucketPtr(void *NextInBucketPtr) { |
158 | intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); |
159 | assert((Ptr & 1) && "Not a bucket pointer" ); |
160 | return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); |
161 | } |
162 | |
163 | /// GetBucketFor - Hash the specified node ID and return the hash bucket for |
164 | /// the specified ID. |
165 | static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { |
166 | // NumBuckets is always a power of 2. |
167 | unsigned BucketNum = Hash & (NumBuckets-1); |
168 | return Buckets + BucketNum; |
169 | } |
170 | |
171 | /// AllocateBuckets - Allocated initialized bucket memory. |
172 | static void **AllocateBuckets(unsigned NumBuckets) { |
173 | void **Buckets = static_cast<void**>(safe_calloc(Count: NumBuckets + 1, |
174 | Sz: sizeof(void*))); |
175 | // Set the very last bucket to be a non-null "pointer". |
176 | Buckets[NumBuckets] = reinterpret_cast<void*>(-1); |
177 | return Buckets; |
178 | } |
179 | |
180 | //===----------------------------------------------------------------------===// |
181 | // FoldingSetBase Implementation |
182 | |
183 | FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { |
184 | assert(5 < Log2InitSize && Log2InitSize < 32 && |
185 | "Initial hash table size out of range" ); |
186 | NumBuckets = 1 << Log2InitSize; |
187 | Buckets = AllocateBuckets(NumBuckets); |
188 | NumNodes = 0; |
189 | } |
190 | |
191 | FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) |
192 | : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { |
193 | Arg.Buckets = nullptr; |
194 | Arg.NumBuckets = 0; |
195 | Arg.NumNodes = 0; |
196 | } |
197 | |
198 | FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { |
199 | free(ptr: Buckets); // This may be null if the set is in a moved-from state. |
200 | Buckets = RHS.Buckets; |
201 | NumBuckets = RHS.NumBuckets; |
202 | NumNodes = RHS.NumNodes; |
203 | RHS.Buckets = nullptr; |
204 | RHS.NumBuckets = 0; |
205 | RHS.NumNodes = 0; |
206 | return *this; |
207 | } |
208 | |
209 | FoldingSetBase::~FoldingSetBase() { |
210 | free(ptr: Buckets); |
211 | } |
212 | |
213 | void FoldingSetBase::clear() { |
214 | // Set all but the last bucket to null pointers. |
215 | memset(s: Buckets, c: 0, n: NumBuckets*sizeof(void*)); |
216 | |
217 | // Set the very last bucket to be a non-null "pointer". |
218 | Buckets[NumBuckets] = reinterpret_cast<void*>(-1); |
219 | |
220 | // Reset the node count to zero. |
221 | NumNodes = 0; |
222 | } |
223 | |
224 | void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount, |
225 | const FoldingSetInfo &Info) { |
226 | assert((NewBucketCount > NumBuckets) && |
227 | "Can't shrink a folding set with GrowBucketCount" ); |
228 | assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!" ); |
229 | void **OldBuckets = Buckets; |
230 | unsigned OldNumBuckets = NumBuckets; |
231 | |
232 | // Clear out new buckets. |
233 | Buckets = AllocateBuckets(NumBuckets: NewBucketCount); |
234 | // Set NumBuckets only if allocation of new buckets was successful. |
235 | NumBuckets = NewBucketCount; |
236 | NumNodes = 0; |
237 | |
238 | // Walk the old buckets, rehashing nodes into their new place. |
239 | FoldingSetNodeID TempID; |
240 | for (unsigned i = 0; i != OldNumBuckets; ++i) { |
241 | void *Probe = OldBuckets[i]; |
242 | if (!Probe) continue; |
243 | while (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) { |
244 | // Figure out the next link, remove NodeInBucket from the old link. |
245 | Probe = NodeInBucket->getNextInBucket(); |
246 | NodeInBucket->SetNextInBucket(nullptr); |
247 | |
248 | // Insert the node into the new bucket, after recomputing the hash. |
249 | InsertNode(N: NodeInBucket, |
250 | InsertPos: GetBucketFor(Hash: Info.ComputeNodeHash(this, NodeInBucket, TempID), |
251 | Buckets, NumBuckets), |
252 | Info); |
253 | TempID.clear(); |
254 | } |
255 | } |
256 | |
257 | free(ptr: OldBuckets); |
258 | } |
259 | |
260 | /// GrowHashTable - Double the size of the hash table and rehash everything. |
261 | /// |
262 | void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) { |
263 | GrowBucketCount(NewBucketCount: NumBuckets * 2, Info); |
264 | } |
265 | |
266 | void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) { |
267 | // This will give us somewhere between EltCount / 2 and |
268 | // EltCount buckets. This puts us in the load factor |
269 | // range of 1.0 - 2.0. |
270 | if(EltCount < capacity()) |
271 | return; |
272 | GrowBucketCount(NewBucketCount: llvm::bit_floor(Value: EltCount), Info); |
273 | } |
274 | |
275 | /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, |
276 | /// return it. If not, return the insertion token that will make insertion |
277 | /// faster. |
278 | FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos( |
279 | const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) { |
280 | unsigned IDHash = ID.ComputeHash(); |
281 | void **Bucket = GetBucketFor(Hash: IDHash, Buckets, NumBuckets); |
282 | void *Probe = *Bucket; |
283 | |
284 | InsertPos = nullptr; |
285 | |
286 | FoldingSetNodeID TempID; |
287 | while (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) { |
288 | if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID)) |
289 | return NodeInBucket; |
290 | TempID.clear(); |
291 | |
292 | Probe = NodeInBucket->getNextInBucket(); |
293 | } |
294 | |
295 | // Didn't find the node, return null with the bucket as the InsertPos. |
296 | InsertPos = Bucket; |
297 | return nullptr; |
298 | } |
299 | |
300 | /// InsertNode - Insert the specified node into the folding set, knowing that it |
301 | /// is not already in the map. InsertPos must be obtained from |
302 | /// FindNodeOrInsertPos. |
303 | void FoldingSetBase::InsertNode(Node *N, void *InsertPos, |
304 | const FoldingSetInfo &Info) { |
305 | assert(!N->getNextInBucket()); |
306 | // Do we need to grow the hashtable? |
307 | if (NumNodes+1 > capacity()) { |
308 | GrowHashTable(Info); |
309 | FoldingSetNodeID TempID; |
310 | InsertPos = GetBucketFor(Hash: Info.ComputeNodeHash(this, N, TempID), Buckets, |
311 | NumBuckets); |
312 | } |
313 | |
314 | ++NumNodes; |
315 | |
316 | /// The insert position is actually a bucket pointer. |
317 | void **Bucket = static_cast<void**>(InsertPos); |
318 | |
319 | void *Next = *Bucket; |
320 | |
321 | // If this is the first insertion into this bucket, its next pointer will be |
322 | // null. Pretend as if it pointed to itself, setting the low bit to indicate |
323 | // that it is a pointer to the bucket. |
324 | if (!Next) |
325 | Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); |
326 | |
327 | // Set the node's next pointer, and make the bucket point to the node. |
328 | N->SetNextInBucket(Next); |
329 | *Bucket = N; |
330 | } |
331 | |
332 | /// RemoveNode - Remove a node from the folding set, returning true if one was |
333 | /// removed or false if the node was not in the folding set. |
334 | bool FoldingSetBase::RemoveNode(Node *N) { |
335 | // Because each bucket is a circular list, we don't need to compute N's hash |
336 | // to remove it. |
337 | void *Ptr = N->getNextInBucket(); |
338 | if (!Ptr) return false; // Not in folding set. |
339 | |
340 | --NumNodes; |
341 | N->SetNextInBucket(nullptr); |
342 | |
343 | // Remember what N originally pointed to, either a bucket or another node. |
344 | void *NodeNextPtr = Ptr; |
345 | |
346 | // Chase around the list until we find the node (or bucket) which points to N. |
347 | while (true) { |
348 | if (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Ptr)) { |
349 | // Advance pointer. |
350 | Ptr = NodeInBucket->getNextInBucket(); |
351 | |
352 | // We found a node that points to N, change it to point to N's next node, |
353 | // removing N from the list. |
354 | if (Ptr == N) { |
355 | NodeInBucket->SetNextInBucket(NodeNextPtr); |
356 | return true; |
357 | } |
358 | } else { |
359 | void **Bucket = GetBucketPtr(NextInBucketPtr: Ptr); |
360 | Ptr = *Bucket; |
361 | |
362 | // If we found that the bucket points to N, update the bucket to point to |
363 | // whatever is next. |
364 | if (Ptr == N) { |
365 | *Bucket = NodeNextPtr; |
366 | return true; |
367 | } |
368 | } |
369 | } |
370 | } |
371 | |
372 | /// GetOrInsertNode - If there is an existing simple Node exactly |
373 | /// equal to the specified node, return it. Otherwise, insert 'N' and it |
374 | /// instead. |
375 | FoldingSetBase::Node * |
376 | FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N, |
377 | const FoldingSetInfo &Info) { |
378 | FoldingSetNodeID ID; |
379 | Info.GetNodeProfile(this, N, ID); |
380 | void *IP; |
381 | if (Node *E = FindNodeOrInsertPos(ID, InsertPos&: IP, Info)) |
382 | return E; |
383 | InsertNode(N, InsertPos: IP, Info); |
384 | return N; |
385 | } |
386 | |
387 | //===----------------------------------------------------------------------===// |
388 | // FoldingSetIteratorImpl Implementation |
389 | |
390 | FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { |
391 | // Skip to the first non-null non-self-cycle bucket. |
392 | while (*Bucket != reinterpret_cast<void*>(-1) && |
393 | (!*Bucket || !GetNextPtr(NextInBucketPtr: *Bucket))) |
394 | ++Bucket; |
395 | |
396 | NodePtr = static_cast<FoldingSetNode*>(*Bucket); |
397 | } |
398 | |
399 | void FoldingSetIteratorImpl::advance() { |
400 | // If there is another link within this bucket, go to it. |
401 | void *Probe = NodePtr->getNextInBucket(); |
402 | |
403 | if (FoldingSetNode *NextNodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) |
404 | NodePtr = NextNodeInBucket; |
405 | else { |
406 | // Otherwise, this is the last link in this bucket. |
407 | void **Bucket = GetBucketPtr(NextInBucketPtr: Probe); |
408 | |
409 | // Skip to the next non-null non-self-cycle bucket. |
410 | do { |
411 | ++Bucket; |
412 | } while (*Bucket != reinterpret_cast<void*>(-1) && |
413 | (!*Bucket || !GetNextPtr(NextInBucketPtr: *Bucket))); |
414 | |
415 | NodePtr = static_cast<FoldingSetNode*>(*Bucket); |
416 | } |
417 | } |
418 | |
419 | //===----------------------------------------------------------------------===// |
420 | // FoldingSetBucketIteratorImpl Implementation |
421 | |
422 | FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { |
423 | Ptr = (!*Bucket || !GetNextPtr(NextInBucketPtr: *Bucket)) ? (void*) Bucket : *Bucket; |
424 | } |
425 | |