| 1 | //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements a hash set that can be used to remove duplication of |
| 10 | // nodes in a graph. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/ADT/FoldingSet.h" |
| 15 | #include "llvm/ADT/StringRef.h" |
| 16 | #include "llvm/Support/Allocator.h" |
| 17 | #include "llvm/Support/ErrorHandling.h" |
| 18 | #include "llvm/Support/MathExtras.h" |
| 19 | #include "llvm/Support/SwapByteOrder.h" |
| 20 | #include <cassert> |
| 21 | #include <cstring> |
| 22 | using namespace llvm; |
| 23 | |
| 24 | //===----------------------------------------------------------------------===// |
| 25 | // FoldingSetNodeIDRef Implementation |
| 26 | |
| 27 | bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { |
| 28 | if (Size != RHS.Size) return false; |
| 29 | return memcmp(s1: Data, s2: RHS.Data, n: Size*sizeof(*Data)) == 0; |
| 30 | } |
| 31 | |
| 32 | /// Used to compare the "ordering" of two nodes as defined by the |
| 33 | /// profiled bits and their ordering defined by memcmp(). |
| 34 | bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { |
| 35 | if (Size != RHS.Size) |
| 36 | return Size < RHS.Size; |
| 37 | return memcmp(s1: Data, s2: RHS.Data, n: Size*sizeof(*Data)) < 0; |
| 38 | } |
| 39 | |
| 40 | //===----------------------------------------------------------------------===// |
| 41 | // FoldingSetNodeID Implementation |
| 42 | |
| 43 | /// Add* - Add various data types to Bit data. |
| 44 | /// |
| 45 | void FoldingSetNodeID::AddString(StringRef String) { |
| 46 | unsigned Size = String.size(); |
| 47 | |
| 48 | unsigned NumInserts = 1 + divideCeil(Numerator: Size, Denominator: 4); |
| 49 | Bits.reserve(N: Bits.size() + NumInserts); |
| 50 | |
| 51 | Bits.push_back(Elt: Size); |
| 52 | if (!Size) return; |
| 53 | |
| 54 | unsigned Units = Size / 4; |
| 55 | unsigned Pos = 0; |
| 56 | const unsigned *Base = (const unsigned*) String.data(); |
| 57 | |
| 58 | // If the string is aligned do a bulk transfer. |
| 59 | if (!((intptr_t)Base & 3)) { |
| 60 | Bits.append(in_start: Base, in_end: Base + Units); |
| 61 | Pos = (Units + 1) * 4; |
| 62 | } else { |
| 63 | // Otherwise do it the hard way. |
| 64 | // To be compatible with above bulk transfer, we need to take endianness |
| 65 | // into account. |
| 66 | static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost, |
| 67 | "Unexpected host endianness" ); |
| 68 | if (sys::IsBigEndianHost) { |
| 69 | for (Pos += 4; Pos <= Size; Pos += 4) { |
| 70 | unsigned V = ((unsigned char)String[Pos - 4] << 24) | |
| 71 | ((unsigned char)String[Pos - 3] << 16) | |
| 72 | ((unsigned char)String[Pos - 2] << 8) | |
| 73 | (unsigned char)String[Pos - 1]; |
| 74 | Bits.push_back(Elt: V); |
| 75 | } |
| 76 | } else { // Little-endian host |
| 77 | for (Pos += 4; Pos <= Size; Pos += 4) { |
| 78 | unsigned V = ((unsigned char)String[Pos - 1] << 24) | |
| 79 | ((unsigned char)String[Pos - 2] << 16) | |
| 80 | ((unsigned char)String[Pos - 3] << 8) | |
| 81 | (unsigned char)String[Pos - 4]; |
| 82 | Bits.push_back(Elt: V); |
| 83 | } |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | // With the leftover bits. |
| 88 | unsigned V = 0; |
| 89 | // Pos will have overshot size by 4 - #bytes left over. |
| 90 | // No need to take endianness into account here - this is always executed. |
| 91 | switch (Pos - Size) { |
| 92 | case 1: V = (V << 8) | (unsigned char)String[Size - 3]; [[fallthrough]]; |
| 93 | case 2: V = (V << 8) | (unsigned char)String[Size - 2]; [[fallthrough]]; |
| 94 | case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; |
| 95 | default: return; // Nothing left. |
| 96 | } |
| 97 | |
| 98 | Bits.push_back(Elt: V); |
| 99 | } |
| 100 | |
| 101 | // AddNodeID - Adds the Bit data of another ID to *this. |
| 102 | void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { |
| 103 | Bits.append(in_start: ID.Bits.begin(), in_end: ID.Bits.end()); |
| 104 | } |
| 105 | |
| 106 | /// operator== - Used to compare two nodes to each other. |
| 107 | /// |
| 108 | bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { |
| 109 | return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); |
| 110 | } |
| 111 | |
| 112 | /// operator== - Used to compare two nodes to each other. |
| 113 | /// |
| 114 | bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { |
| 115 | return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; |
| 116 | } |
| 117 | |
| 118 | /// Used to compare the "ordering" of two nodes as defined by the |
| 119 | /// profiled bits and their ordering defined by memcmp(). |
| 120 | bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { |
| 121 | return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); |
| 122 | } |
| 123 | |
| 124 | bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { |
| 125 | return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; |
| 126 | } |
| 127 | |
| 128 | /// Intern - Copy this node's data to a memory region allocated from the |
| 129 | /// given allocator and return a FoldingSetNodeIDRef describing the |
| 130 | /// interned data. |
| 131 | FoldingSetNodeIDRef |
| 132 | FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { |
| 133 | unsigned *New = Allocator.Allocate<unsigned>(Num: Bits.size()); |
| 134 | llvm::uninitialized_copy(Src: Bits, Dst: New); |
| 135 | return FoldingSetNodeIDRef(New, Bits.size()); |
| 136 | } |
| 137 | |
| 138 | //===----------------------------------------------------------------------===// |
| 139 | /// Helper functions for FoldingSetBase. |
| 140 | |
| 141 | /// GetNextPtr - In order to save space, each bucket is a |
| 142 | /// singly-linked-list. In order to make deletion more efficient, we make |
| 143 | /// the list circular, so we can delete a node without computing its hash. |
| 144 | /// The problem with this is that the start of the hash buckets are not |
| 145 | /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: |
| 146 | /// use GetBucketPtr when this happens. |
| 147 | static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { |
| 148 | // The low bit is set if this is the pointer back to the bucket. |
| 149 | if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) |
| 150 | return nullptr; |
| 151 | |
| 152 | return static_cast<FoldingSetBase::Node*>(NextInBucketPtr); |
| 153 | } |
| 154 | |
| 155 | |
| 156 | /// testing. |
| 157 | static void **GetBucketPtr(void *NextInBucketPtr) { |
| 158 | intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); |
| 159 | assert((Ptr & 1) && "Not a bucket pointer" ); |
| 160 | return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); |
| 161 | } |
| 162 | |
| 163 | /// GetBucketFor - Hash the specified node ID and return the hash bucket for |
| 164 | /// the specified ID. |
| 165 | static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { |
| 166 | // NumBuckets is always a power of 2. |
| 167 | unsigned BucketNum = Hash & (NumBuckets-1); |
| 168 | return Buckets + BucketNum; |
| 169 | } |
| 170 | |
| 171 | /// AllocateBuckets - Allocated initialized bucket memory. |
| 172 | static void **AllocateBuckets(unsigned NumBuckets) { |
| 173 | void **Buckets = static_cast<void**>(safe_calloc(Count: NumBuckets + 1, |
| 174 | Sz: sizeof(void*))); |
| 175 | // Set the very last bucket to be a non-null "pointer". |
| 176 | Buckets[NumBuckets] = reinterpret_cast<void*>(-1); |
| 177 | return Buckets; |
| 178 | } |
| 179 | |
| 180 | //===----------------------------------------------------------------------===// |
| 181 | // FoldingSetBase Implementation |
| 182 | |
| 183 | FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { |
| 184 | assert(5 < Log2InitSize && Log2InitSize < 32 && |
| 185 | "Initial hash table size out of range" ); |
| 186 | NumBuckets = 1 << Log2InitSize; |
| 187 | Buckets = AllocateBuckets(NumBuckets); |
| 188 | NumNodes = 0; |
| 189 | } |
| 190 | |
| 191 | FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) |
| 192 | : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { |
| 193 | Arg.Buckets = nullptr; |
| 194 | Arg.NumBuckets = 0; |
| 195 | Arg.NumNodes = 0; |
| 196 | } |
| 197 | |
| 198 | FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { |
| 199 | free(ptr: Buckets); // This may be null if the set is in a moved-from state. |
| 200 | Buckets = RHS.Buckets; |
| 201 | NumBuckets = RHS.NumBuckets; |
| 202 | NumNodes = RHS.NumNodes; |
| 203 | RHS.Buckets = nullptr; |
| 204 | RHS.NumBuckets = 0; |
| 205 | RHS.NumNodes = 0; |
| 206 | return *this; |
| 207 | } |
| 208 | |
| 209 | FoldingSetBase::~FoldingSetBase() { |
| 210 | free(ptr: Buckets); |
| 211 | } |
| 212 | |
| 213 | void FoldingSetBase::clear() { |
| 214 | // Set all but the last bucket to null pointers. |
| 215 | memset(s: Buckets, c: 0, n: NumBuckets*sizeof(void*)); |
| 216 | |
| 217 | // Set the very last bucket to be a non-null "pointer". |
| 218 | Buckets[NumBuckets] = reinterpret_cast<void*>(-1); |
| 219 | |
| 220 | // Reset the node count to zero. |
| 221 | NumNodes = 0; |
| 222 | } |
| 223 | |
| 224 | void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount, |
| 225 | const FoldingSetInfo &Info) { |
| 226 | assert((NewBucketCount > NumBuckets) && |
| 227 | "Can't shrink a folding set with GrowBucketCount" ); |
| 228 | assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!" ); |
| 229 | void **OldBuckets = Buckets; |
| 230 | unsigned OldNumBuckets = NumBuckets; |
| 231 | |
| 232 | // Clear out new buckets. |
| 233 | Buckets = AllocateBuckets(NumBuckets: NewBucketCount); |
| 234 | // Set NumBuckets only if allocation of new buckets was successful. |
| 235 | NumBuckets = NewBucketCount; |
| 236 | NumNodes = 0; |
| 237 | |
| 238 | // Walk the old buckets, rehashing nodes into their new place. |
| 239 | FoldingSetNodeID TempID; |
| 240 | for (unsigned i = 0; i != OldNumBuckets; ++i) { |
| 241 | void *Probe = OldBuckets[i]; |
| 242 | if (!Probe) continue; |
| 243 | while (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) { |
| 244 | // Figure out the next link, remove NodeInBucket from the old link. |
| 245 | Probe = NodeInBucket->getNextInBucket(); |
| 246 | NodeInBucket->SetNextInBucket(nullptr); |
| 247 | |
| 248 | // Insert the node into the new bucket, after recomputing the hash. |
| 249 | InsertNode(N: NodeInBucket, |
| 250 | InsertPos: GetBucketFor(Hash: Info.ComputeNodeHash(this, NodeInBucket, TempID), |
| 251 | Buckets, NumBuckets), |
| 252 | Info); |
| 253 | TempID.clear(); |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | free(ptr: OldBuckets); |
| 258 | } |
| 259 | |
| 260 | /// GrowHashTable - Double the size of the hash table and rehash everything. |
| 261 | /// |
| 262 | void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) { |
| 263 | GrowBucketCount(NewBucketCount: NumBuckets * 2, Info); |
| 264 | } |
| 265 | |
| 266 | void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) { |
| 267 | // This will give us somewhere between EltCount / 2 and |
| 268 | // EltCount buckets. This puts us in the load factor |
| 269 | // range of 1.0 - 2.0. |
| 270 | if(EltCount < capacity()) |
| 271 | return; |
| 272 | GrowBucketCount(NewBucketCount: llvm::bit_floor(Value: EltCount), Info); |
| 273 | } |
| 274 | |
| 275 | /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, |
| 276 | /// return it. If not, return the insertion token that will make insertion |
| 277 | /// faster. |
| 278 | FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos( |
| 279 | const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) { |
| 280 | unsigned IDHash = ID.ComputeHash(); |
| 281 | void **Bucket = GetBucketFor(Hash: IDHash, Buckets, NumBuckets); |
| 282 | void *Probe = *Bucket; |
| 283 | |
| 284 | InsertPos = nullptr; |
| 285 | |
| 286 | FoldingSetNodeID TempID; |
| 287 | while (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) { |
| 288 | if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID)) |
| 289 | return NodeInBucket; |
| 290 | TempID.clear(); |
| 291 | |
| 292 | Probe = NodeInBucket->getNextInBucket(); |
| 293 | } |
| 294 | |
| 295 | // Didn't find the node, return null with the bucket as the InsertPos. |
| 296 | InsertPos = Bucket; |
| 297 | return nullptr; |
| 298 | } |
| 299 | |
| 300 | /// InsertNode - Insert the specified node into the folding set, knowing that it |
| 301 | /// is not already in the map. InsertPos must be obtained from |
| 302 | /// FindNodeOrInsertPos. |
| 303 | void FoldingSetBase::InsertNode(Node *N, void *InsertPos, |
| 304 | const FoldingSetInfo &Info) { |
| 305 | assert(!N->getNextInBucket()); |
| 306 | // Do we need to grow the hashtable? |
| 307 | if (NumNodes+1 > capacity()) { |
| 308 | GrowHashTable(Info); |
| 309 | FoldingSetNodeID TempID; |
| 310 | InsertPos = GetBucketFor(Hash: Info.ComputeNodeHash(this, N, TempID), Buckets, |
| 311 | NumBuckets); |
| 312 | } |
| 313 | |
| 314 | ++NumNodes; |
| 315 | |
| 316 | /// The insert position is actually a bucket pointer. |
| 317 | void **Bucket = static_cast<void**>(InsertPos); |
| 318 | |
| 319 | void *Next = *Bucket; |
| 320 | |
| 321 | // If this is the first insertion into this bucket, its next pointer will be |
| 322 | // null. Pretend as if it pointed to itself, setting the low bit to indicate |
| 323 | // that it is a pointer to the bucket. |
| 324 | if (!Next) |
| 325 | Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); |
| 326 | |
| 327 | // Set the node's next pointer, and make the bucket point to the node. |
| 328 | N->SetNextInBucket(Next); |
| 329 | *Bucket = N; |
| 330 | } |
| 331 | |
| 332 | /// RemoveNode - Remove a node from the folding set, returning true if one was |
| 333 | /// removed or false if the node was not in the folding set. |
| 334 | bool FoldingSetBase::RemoveNode(Node *N) { |
| 335 | // Because each bucket is a circular list, we don't need to compute N's hash |
| 336 | // to remove it. |
| 337 | void *Ptr = N->getNextInBucket(); |
| 338 | if (!Ptr) return false; // Not in folding set. |
| 339 | |
| 340 | --NumNodes; |
| 341 | N->SetNextInBucket(nullptr); |
| 342 | |
| 343 | // Remember what N originally pointed to, either a bucket or another node. |
| 344 | void *NodeNextPtr = Ptr; |
| 345 | |
| 346 | // Chase around the list until we find the node (or bucket) which points to N. |
| 347 | while (true) { |
| 348 | if (Node *NodeInBucket = GetNextPtr(NextInBucketPtr: Ptr)) { |
| 349 | // Advance pointer. |
| 350 | Ptr = NodeInBucket->getNextInBucket(); |
| 351 | |
| 352 | // We found a node that points to N, change it to point to N's next node, |
| 353 | // removing N from the list. |
| 354 | if (Ptr == N) { |
| 355 | NodeInBucket->SetNextInBucket(NodeNextPtr); |
| 356 | return true; |
| 357 | } |
| 358 | } else { |
| 359 | void **Bucket = GetBucketPtr(NextInBucketPtr: Ptr); |
| 360 | Ptr = *Bucket; |
| 361 | |
| 362 | // If we found that the bucket points to N, update the bucket to point to |
| 363 | // whatever is next. |
| 364 | if (Ptr == N) { |
| 365 | *Bucket = NodeNextPtr; |
| 366 | return true; |
| 367 | } |
| 368 | } |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | /// GetOrInsertNode - If there is an existing simple Node exactly |
| 373 | /// equal to the specified node, return it. Otherwise, insert 'N' and it |
| 374 | /// instead. |
| 375 | FoldingSetBase::Node * |
| 376 | FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N, |
| 377 | const FoldingSetInfo &Info) { |
| 378 | FoldingSetNodeID ID; |
| 379 | Info.GetNodeProfile(this, N, ID); |
| 380 | void *IP; |
| 381 | if (Node *E = FindNodeOrInsertPos(ID, InsertPos&: IP, Info)) |
| 382 | return E; |
| 383 | InsertNode(N, InsertPos: IP, Info); |
| 384 | return N; |
| 385 | } |
| 386 | |
| 387 | //===----------------------------------------------------------------------===// |
| 388 | // FoldingSetIteratorImpl Implementation |
| 389 | |
| 390 | FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { |
| 391 | // Skip to the first non-null non-self-cycle bucket. |
| 392 | while (*Bucket != reinterpret_cast<void*>(-1) && |
| 393 | (!*Bucket || !GetNextPtr(NextInBucketPtr: *Bucket))) |
| 394 | ++Bucket; |
| 395 | |
| 396 | NodePtr = static_cast<FoldingSetNode*>(*Bucket); |
| 397 | } |
| 398 | |
| 399 | void FoldingSetIteratorImpl::advance() { |
| 400 | // If there is another link within this bucket, go to it. |
| 401 | void *Probe = NodePtr->getNextInBucket(); |
| 402 | |
| 403 | if (FoldingSetNode *NextNodeInBucket = GetNextPtr(NextInBucketPtr: Probe)) |
| 404 | NodePtr = NextNodeInBucket; |
| 405 | else { |
| 406 | // Otherwise, this is the last link in this bucket. |
| 407 | void **Bucket = GetBucketPtr(NextInBucketPtr: Probe); |
| 408 | |
| 409 | // Skip to the next non-null non-self-cycle bucket. |
| 410 | do { |
| 411 | ++Bucket; |
| 412 | } while (*Bucket != reinterpret_cast<void*>(-1) && |
| 413 | (!*Bucket || !GetNextPtr(NextInBucketPtr: *Bucket))); |
| 414 | |
| 415 | NodePtr = static_cast<FoldingSetNode*>(*Bucket); |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | //===----------------------------------------------------------------------===// |
| 420 | // FoldingSetBucketIteratorImpl Implementation |
| 421 | |
| 422 | FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { |
| 423 | Ptr = (!*Bucket || !GetNextPtr(NextInBucketPtr: *Bucket)) ? (void*) Bucket : *Bucket; |
| 424 | } |
| 425 | |