FoldingSet.cpp source code [llvm_projects/llvm/lib/Support/FoldingSet.cpp]

1	//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements a hash set that can be used to remove duplication of
10	// nodes in a graph.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/ADT/FoldingSet.h"
15	#include "llvm/ADT/StringRef.h"
16	#include "llvm/Support/Allocator.h"
17	#include "llvm/Support/ErrorHandling.h"
18	#include "llvm/Support/MathExtras.h"
19	#include "llvm/Support/SwapByteOrder.h"
20	#include <cassert>
21	#include <cstring>
22	using namespace llvm;
23
24	//===----------------------------------------------------------------------===//
25	// FoldingSetNodeIDRef Implementation
26
27	bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
28	if (Size != RHS.Size) return false;
29	return memcmp(s1: Data, s2: RHS.Data, n: Size*sizeof(*Data)) == `0`;
30	}
31
32	/// Used to compare the "ordering" of two nodes as defined by the
33	/// profiled bits and their ordering defined by memcmp().
34	bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
35	if (Size != RHS.Size)
36	return Size < RHS.Size;
37	return memcmp(s1: Data, s2: RHS.Data, n: Size*sizeof(*Data)) < `0`;
38	}
39
40	//===----------------------------------------------------------------------===//
41	// FoldingSetNodeID Implementation
42
43	/// Add - Add various data types to Bit data.*
44	///
45	void FoldingSetNodeID::AddString(StringRef String) {
46	unsigned Size = String.size();
47
48	unsigned NumInserts = `1` + divideCeil(Numerator: Size, Denominator: `4`);
49	Bits.reserve(N: Bits.size() + NumInserts);
50
51	Bits.push_back(Elt: Size);
52	if (!Size) return;
53
54	unsigned Units = Size / `4`;
55	unsigned Pos = `0`;
56	const unsigned Base = (const* unsigned*) String.data();
57
58	// If the string is aligned do a bulk transfer.
59	if (!((intptr_t)Base & `3`)) {
60	Bits.append(in_start: Base, in_end: Base + Units);
61	Pos = (Units + `1`) * `4`;
62	} else {
63	// Otherwise do it the hard way.
64	// To be compatible with above bulk transfer, we need to take endianness
65	// into account.
66	static_assert(sys::IsBigEndianHost \|\| sys::IsLittleEndianHost,
67	"Unexpected host endianness");
68	if (sys::IsBigEndianHost) {
69	for (Pos += `4`; Pos <= Size; Pos += `4`) {
70	unsigned V = ((unsigned char)String [Pos - `4`] << `24`) \|
71	((unsigned char)String [Pos - `3`] << `16`) \|
72	((unsigned char)String [Pos - `2`] << `8`) \|
73	(unsigned char)String [Pos - `1`];
74	Bits.push_back(Elt: V);
75	}
76	} else { // Little-endian host
77	for (Pos += `4`; Pos <= Size; Pos += `4`) {
78	unsigned V = ((unsigned char)String [Pos - `1`] << `24`) \|
79	((unsigned char)String [Pos - `2`] << `16`) \|
80	((unsigned char)String [Pos - `3`] << `8`) \|
81	(unsigned char)String [Pos - `4`];
82	Bits.push_back(Elt: V);
83	}
84	}
85	}
86
87	// With the leftover bits.
88	unsigned V = `0`;
89	// Pos will have overshot size by 4 - #bytes left over.
90	// No need to take endianness into account here - this is always executed.
91	switch (Pos - Size) {
92	case `1`: V = (V << `8`) \| (unsigned char)String [Size - `3`]; [[fallthrough]];
93	case `2`: V = (V << `8`) \| (unsigned char)String [Size - `2`]; [[fallthrough]];
94	case `3`: V = (V << `8`) \| (unsigned char)String [Size - `1`]; break;
95	default: return; // Nothing left.
96	}
97
98	Bits.push_back(Elt: V);
99	}
100
101	// AddNodeID - Adds the Bit data of another ID to this.*
102	void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
103	Bits.append(in_start: ID.Bits.begin(), in_end: ID.Bits.end());
104	}
105
106	/// operator== - Used to compare two nodes to each other.
107	///
108	bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
109	return *this == FoldingSetNodeIDRef (RHS.Bits.data(), RHS.Bits.size());
110	}
111
112	/// operator== - Used to compare two nodes to each other.
113	///
114	bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
115	return FoldingSetNodeIDRef (Bits.data(), Bits.size()) == RHS;
116	}
117
118	/// Used to compare the "ordering" of two nodes as defined by the
119	/// profiled bits and their ordering defined by memcmp().
120	bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
121	return *this < FoldingSetNodeIDRef (RHS.Bits.data(), RHS.Bits.size());
122	}
123
124	bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
125	return FoldingSetNodeIDRef (Bits.data(), Bits.size()) < RHS;
126	}
127
128	/// Intern - Copy this node's data to a memory region allocated from the
129	/// given allocator and return a FoldingSetNodeIDRef describing the
130	/// interned data.
131	FoldingSetNodeIDRef
132	FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
133	unsigned New = Allocator.Allocate<unsigned*>(Num: Bits.size());
134	std::uninitialized_copy(first: Bits.begin(), last: Bits.end(), result: New);
135	return FoldingSetNodeIDRef (New, Bits.size());
136	}
137
138	//===----------------------------------------------------------------------===//
139	/// Helper functions for FoldingSetBase.
140
141	/// GetNextPtr - In order to save space, each bucket is a
142	/// singly-linked-list. In order to make deletion more efficient, we make
143	/// the list circular, so we can delete a node without computing its hash.
144	/// The problem with this is that the start of the hash buckets are not
145	/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
146	/// use GetBucketPtr when this happens.
147	static FoldingSetBase::Node GetNextPtr(void* *NextInBucketPtr) {
148	// The low bit is set if this is the pointer back to the bucket.
149	if (reinterpret_cast<intptr_t>(NextInBucketPtr) & `1`)
150	return nullptr;
151
152	return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
153	}
154
155
156	/// testing.
157	static void *GetBucketPtr(void* *NextInBucketPtr) {
158	intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
159	assert((Ptr & `1`) && "Not a bucket pointer");
160	return reinterpret_cast<void**>(Ptr & ~intptr_t(`1`));
161	}
162
163	/// GetBucketFor - Hash the specified node ID and return the hash bucket for
164	/// the specified ID.
165	static void *GetBucketFor(unsigned* Hash, void *Buckets, unsigned* NumBuckets) {
166	// NumBuckets is always a power of 2.
167	unsigned BucketNum = Hash & (NumBuckets-`1`);
168	return Buckets + BucketNum;
169	}
170
171	/// AllocateBuckets - Allocated initialized bucket memory.
172	static void *AllocateBuckets(unsigned* NumBuckets) {
173	void Buckets = static_cast*<void***>(safe_calloc(Count: NumBuckets + `1`,
174	Sz: sizeof(void*)));
175	// Set the very last bucket to be a non-null "pointer".
176	Buckets[NumBuckets] = reinterpret_cast<void*>(-`1`);
177	return Buckets;
178	}
179
180	//===----------------------------------------------------------------------===//
181	// FoldingSetBase Implementation
182
183	FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) {
184	assert(`5` < Log2InitSize && Log2InitSize < `32` &&
185	"Initial hash table size out of range");
186	NumBuckets = `1` << Log2InitSize;
187	Buckets = AllocateBuckets(NumBuckets);
188	NumNodes = `0`;
189	}
190
191	FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg)
192	: Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
193	Arg.Buckets = nullptr;
194	Arg.NumBuckets = `0`;
195	Arg.NumNodes = `0`;
196	}
197
198	FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) {
199	free(ptr: Buckets); // This may be null if the set is in a moved-from state.
200	Buckets = RHS.Buckets;
201	NumBuckets = RHS.NumBuckets;
202	NumNodes = RHS.NumNodes;
203	RHS.Buckets = nullptr;
204	RHS.NumBuckets = `0`;
205	RHS.NumNodes = `0`;
206	return *this;
207	}
208
209	FoldingSetBase::~FoldingSetBase() {
210	free(ptr: Buckets);
211	}
212
213	void FoldingSetBase::clear() {
214	// Set all but the last bucket to null pointers.
215	memset(s: Buckets, c: `0`, n: NumBuckets*sizeof(void*));
216
217	// Set the very last bucket to be a non-null "pointer".
218	Buckets[NumBuckets] = reinterpret_cast<void*>(-`1`);
219
220	// Reset the node count to zero.
221	NumNodes = `0`;
222	}
223
224	void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount,
225	const FoldingSetInfo &Info) {
226	assert((NewBucketCount > NumBuckets) &&
227	"Can't shrink a folding set with GrowBucketCount");
228	assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
229	void **OldBuckets = Buckets;
230	unsigned OldNumBuckets = NumBuckets;
231
232	// Clear out new buckets.
233	Buckets = AllocateBuckets(NumBuckets: NewBucketCount);
234	// Set NumBuckets only if allocation of new buckets was successful.
235	NumBuckets = NewBucketCount;
236	NumNodes = `0`;
237
238	// Walk the old buckets, rehashing nodes into their new place.
239	FoldingSetNodeID TempID;
240	for (unsigned i = `0`; i != OldNumBuckets; ++i) {
241	void *Probe = OldBuckets[i];
242	if (!Probe) continue;
243	while (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) {
244	// Figure out the next link, remove NodeInBucket from the old link.
245	Probe = NodeInBucket->getNextInBucket();
246	NodeInBucket->SetNextInBucket(nullptr);
247
248	// Insert the node into the new bucket, after recomputing the hash.
249	InsertNode(N: NodeInBucket,
250	InsertPos: GetBucketFor(Hash: Info.ComputeNodeHash(this, NodeInBucket, TempID),
251	Buckets, NumBuckets),
252	Info);
253	TempID.clear();
254	}
255	}
256
257	free(ptr: OldBuckets);
258	}
259
260	/// GrowHashTable - Double the size of the hash table and rehash everything.
261	///
262	void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) {
263	GrowBucketCount(NewBucketCount: NumBuckets * `2`, Info);
264	}
265
266	void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
267	// This will give us somewhere between EltCount / 2 and
268	// EltCount buckets. This puts us in the load factor
269	// range of 1.0 - 2.0.
270	if(EltCount < capacity())
271	return;
272	GrowBucketCount(NewBucketCount: llvm::bit_floor(Value: EltCount), Info);
273	}
274
275	/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
276	/// return it. If not, return the insertion token that will make insertion
277	/// faster.
278	FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos(
279	const FoldingSetNodeID &ID, void &InsertPos, const* FoldingSetInfo &Info) {
280	unsigned IDHash = ID.ComputeHash();
281	void **Bucket = GetBucketFor(Hash: IDHash, Buckets, NumBuckets);
282	void Probe = Bucket;
283
284	InsertPos = nullptr;
285
286	FoldingSetNodeID TempID;
287	while (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) {
288	if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID))
289	return NodeInBucket;
290	TempID.clear();
291
292	Probe = NodeInBucket->getNextInBucket();
293	}
294
295	// Didn't find the node, return null with the bucket as the InsertPos.
296	InsertPos = Bucket;
297	return nullptr;
298	}
299
300	/// InsertNode - Insert the specified node into the folding set, knowing that it
301	/// is not already in the map. InsertPos must be obtained from
302	/// FindNodeOrInsertPos.
303	void FoldingSetBase::InsertNode(Node N, void* *InsertPos,
304	const FoldingSetInfo &Info) {
305	assert(!N->getNextInBucket());
306	// Do we need to grow the hashtable?
307	if (NumNodes+`1` > capacity()) {
308	GrowHashTable(Info);
309	FoldingSetNodeID TempID;
310	InsertPos = GetBucketFor(Hash: Info.ComputeNodeHash(this, N, TempID), Buckets,
311	NumBuckets);
312	}
313
314	++NumNodes;
315
316	/// The insert position is actually a bucket pointer.
317	void Bucket = static_cast*<void***>(InsertPos);
318
319	void Next = Bucket;
320
321	// If this is the first insertion into this bucket, its next pointer will be
322	// null. Pretend as if it pointed to itself, setting the low bit to indicate
323	// that it is a pointer to the bucket.
324	if (!Next)
325	Next = reinterpret_cast<void>(reinterpret_cast*<intptr_t>(Bucket)\|`1`);
326
327	// Set the node's next pointer, and make the bucket point to the node.
328	N->SetNextInBucket(Next);
329	*Bucket = N;
330	}
331
332	/// RemoveNode - Remove a node from the folding set, returning true if one was
333	/// removed or false if the node was not in the folding set.
334	bool FoldingSetBase::RemoveNode(Node *N) {
335	// Because each bucket is a circular list, we don't need to compute N's hash
336	// to remove it.
337	void *Ptr = N->getNextInBucket();
338	if (!Ptr) return false; // Not in folding set.
339
340	--NumNodes;
341	N->SetNextInBucket(nullptr);
342
343	// Remember what N originally pointed to, either a bucket or another node.
344	void *NodeNextPtr = Ptr;
345
346	// Chase around the list until we find the node (or bucket) which points to N.
347	while (true) {
348	if (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Ptr)) {
349	// Advance pointer.
350	Ptr = NodeInBucket->getNextInBucket();
351
352	// We found a node that points to N, change it to point to N's next node,
353	// removing N from the list.
354	if (Ptr == N) {
355	NodeInBucket->SetNextInBucket(NodeNextPtr);
356	return true;
357	}
358	} else {
359	void **Bucket = GetBucketPtr(NextInBucketPtr: Ptr);
360	Ptr = *Bucket;
361
362	// If we found that the bucket points to N, update the bucket to point to
363	// whatever is next.
364	if (Ptr == N) {
365	*Bucket = NodeNextPtr;
366	return true;
367	}
368	}
369	}
370	}
371
372	/// GetOrInsertNode - If there is an existing simple Node exactly
373	/// equal to the specified node, return it. Otherwise, insert 'N' and it
374	/// instead.
375	FoldingSetBase::Node *
376	FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N,
377	const FoldingSetInfo &Info) {
378	FoldingSetNodeID ID;
379	Info.GetNodeProfile(this, N, ID);
380	void *IP;
381	if (Node *E = FindNodeOrInsertPos(ID, InsertPos&: IP, Info))
382	return E;
383	InsertNode(N, InsertPos: IP, Info);
384	return N;
385	}
386
387	//===----------------------------------------------------------------------===//
388	// FoldingSetIteratorImpl Implementation
389
390	FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
391	// Skip to the first non-null non-self-cycle bucket.
392	while (Bucket != reinterpret_cast<void**>(-`1`) &&
393	(!Bucket \|\| !GetNextPtr(NextInBucketPtr: Bucket)))
394	++Bucket;
395
396	NodePtr = static_cast<FoldingSetNode>(Bucket);
397	}
398
399	void FoldingSetIteratorImpl::advance() {
400	// If there is another link within this bucket, go to it.
401	void *Probe = NodePtr->getNextInBucket();
402
403	if (FoldingSetNode *NextNodeInBucket = GetNextPtr(NextInBucketPtr: Probe))
404	NodePtr = NextNodeInBucket;
405	else {
406	// Otherwise, this is the last link in this bucket.
407	void **Bucket = GetBucketPtr(NextInBucketPtr: Probe);
408
409	// Skip to the next non-null non-self-cycle bucket.
410	do {
411	++Bucket;
412	} while (Bucket != reinterpret_cast<void**>(-`1`) &&
413	(!Bucket \|\| !GetNextPtr(NextInBucketPtr: Bucket)));
414
415	NodePtr = static_cast<FoldingSetNode>(Bucket);
416	}
417	}
418
419	//===----------------------------------------------------------------------===//
420	// FoldingSetBucketIteratorImpl Implementation
421
422	FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
423	Ptr = (!Bucket \|\| !GetNextPtr(NextInBucketPtr: Bucket)) ? (void) Bucket : Bucket;
424	}
425

Browse the source code of llvm_projects/llvm/lib/Support/FoldingSet.cpp