1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file Implements OnDiskTrieRawHashMap.
10///
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CAS/OnDiskTrieRawHashMap.h"
14#include "DatabaseFile.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/ADT/TrieHashIndexGenerator.h"
17#include "llvm/CAS/MappedFileRegionArena.h"
18#include "llvm/CAS/OnDiskCASLogger.h"
19#include "llvm/Config/llvm-config.h"
20#include "llvm/Support/ThreadPool.h"
21#include "llvm/Support/Threading.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace llvm;
25using namespace llvm::cas;
26using namespace llvm::cas::ondisk;
27
28#if LLVM_ENABLE_ONDISK_CAS
29
30//===----------------------------------------------------------------------===//
31// TrieRawHashMap data structures.
32//===----------------------------------------------------------------------===//
33
34namespace {
35
36class SubtrieHandle;
37class TrieRawHashMapHandle;
38class TrieVisitor;
39
40/// A value stored in the slots inside a SubTrie. A stored value can either be a
41/// subtrie (encoded after negation) which is the file offset to another
42/// subtrie, or it can be a fileset to a DataRecord.
43class SubtrieSlotValue {
44public:
45 explicit operator bool() const { return !isEmpty(); }
46 bool isEmpty() const { return !Offset; }
47 bool isData() const { return Offset > 0; }
48 bool isSubtrie() const { return Offset < 0; }
49 uint64_t asData() const {
50 assert(isData());
51 return Offset;
52 }
53 uint64_t asSubtrie() const {
54 assert(isSubtrie());
55 return -Offset;
56 }
57
58 FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); }
59
60 FileOffset asDataFileOffset() const { return FileOffset(asData()); }
61
62 int64_t getRawOffset() const { return Offset; }
63
64 static SubtrieSlotValue getDataOffset(int64_t Offset) {
65 return SubtrieSlotValue(Offset);
66 }
67
68 static SubtrieSlotValue getSubtrieOffset(int64_t Offset) {
69 return SubtrieSlotValue(-Offset);
70 }
71
72 static SubtrieSlotValue getDataOffset(FileOffset Offset) {
73 return getDataOffset(Offset: Offset.get());
74 }
75
76 static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) {
77 return getDataOffset(Offset: Offset.get());
78 }
79
80 static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) {
81 return SubtrieSlotValue(Slot.load());
82 }
83
84 SubtrieSlotValue() = default;
85
86private:
87 friend class SubtrieHandle;
88 explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {}
89 int64_t Offset = 0;
90};
91
92/// Subtrie layout:
93/// - 2-bytes: StartBit
94/// - 1-bytes: NumBits=lg(num-slots)
95/// - 5-bytes: 0-pad
96/// - <slots>
97class SubtrieHandle {
98public:
99 struct Header {
100 /// The bit this subtrie starts on.
101 uint16_t StartBit;
102
103 /// The number of bits this subtrie handles. It has 2^NumBits slots.
104 uint8_t NumBits;
105
106 /// 0-pad to 8B.
107 uint8_t ZeroPad1B;
108 uint32_t ZeroPad4B;
109 };
110
111 /// Slot storage:
112 /// - zero: Empty
113 /// - positive: RecordOffset
114 /// - negative: SubtrieOffset
115 using SlotT = std::atomic<int64_t>;
116
117 static int64_t getSlotsSize(uint32_t NumBits) {
118 return sizeof(int64_t) * (1ull << NumBits);
119 }
120
121 static int64_t getSize(uint32_t NumBits) {
122 return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits);
123 }
124
125 int64_t getSize() const { return getSize(NumBits: H->NumBits); }
126 size_t getNumSlots() const { return Slots.size(); }
127
128 SubtrieSlotValue load(size_t I) const {
129 return SubtrieSlotValue(Slots[I].load());
130 }
131 void store(size_t I, SubtrieSlotValue V) {
132 return Slots[I].store(i: V.getRawOffset());
133 }
134
135 void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const;
136
137 /// Return None on success, or the existing offset on failure.
138 bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected,
139 SubtrieSlotValue New) {
140 SubtrieSlotValue SaveExpected(Expected);
141 bool Result = Slots[I].compare_exchange_strong(i1&: Expected.Offset, i2: New.Offset);
142 if (Logger)
143 Logger->logSubtrieHandleCmpXchg(Region: Region->data(), Trie: getOffset().Offset, SlotI: I,
144 Expected: SaveExpected.Offset, New: New.Offset,
145 Previous: Expected.Offset);
146 return Result;
147 }
148
149 /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with
150 /// \p NumSubtrieBits.
151 ///
152 /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a
153 /// new subtrie is created that isn't used because of a lost race, then it If
154 /// it's already valid, it should be used instead of allocating a new one.
155 /// should be returned as an out parameter to be passed back in the future.
156 /// If it's already valid, it should be used instead of allocating a new one.
157 ///
158 /// Returns the subtrie that now lives at \p I.
159 Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V,
160 MappedFileRegionArena &Alloc,
161 size_t NumSubtrieBits,
162 SubtrieHandle &UnusedSubtrie, size_t NewI);
163
164 /// Only safe if the subtrie is empty.
165 void reinitialize(uint32_t StartBit, uint32_t NumBits);
166
167 SubtrieSlotValue getOffset() const {
168 return SubtrieSlotValue::getSubtrieOffset(
169 Offset: reinterpret_cast<const char *>(H) - Region->data());
170 }
171
172 FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); }
173
174 explicit operator bool() const { return H; }
175
176 Header &getHeader() const { return *H; }
177 uint32_t getStartBit() const { return H->StartBit; }
178 uint32_t getNumBits() const { return H->NumBits; }
179
180 static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc,
181 uint32_t StartBit, uint32_t NumBits,
182 OnDiskCASLogger *Logger);
183
184 static SubtrieHandle getFromFileOffset(MappedFileRegion &Region,
185 FileOffset Offset,
186 OnDiskCASLogger *Logger) {
187 return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset),
188 Logger);
189 }
190
191 SubtrieHandle() = default;
192 SubtrieHandle(MappedFileRegion &Region, Header &H, OnDiskCASLogger *Logger)
193 : Region(&Region), H(&H), Slots(getSlots(H)), Logger(Logger) {}
194 SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset,
195 OnDiskCASLogger *Logger)
196 : SubtrieHandle(
197 Region,
198 *reinterpret_cast<Header *>(Region.data() + Offset.asSubtrie()),
199 Logger) {}
200
201private:
202 MappedFileRegion *Region = nullptr;
203 Header *H = nullptr;
204 MutableArrayRef<SlotT> Slots;
205 OnDiskCASLogger *Logger = nullptr;
206
207 static MutableArrayRef<SlotT> getSlots(Header &H) {
208 return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1),
209 1ull << H.NumBits);
210 }
211};
212
213/// Handle for a TrieRawHashMap table.
214///
215/// TrieRawHashMap table layout:
216/// - [8-bytes: Generic table header]
217/// - 1-byte: NumSubtrieBits
218/// - 1-byte: Flags (not used yet)
219/// - 2-bytes: NumHashBits
220/// - 4-bytes: RecordDataSize (in bytes)
221/// - 8-bytes: RootTrieOffset
222/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
223/// - <name> '\0'
224///
225/// Record layout:
226/// - <hash>
227/// - <data>
228class TrieRawHashMapHandle {
229public:
230 static constexpr TableHandle::TableKind Kind =
231 TableHandle::TableKind::TrieRawHashMap;
232
233 struct Header {
234 TableHandle::Header GenericHeader;
235 uint8_t NumSubtrieBits;
236 uint8_t Flags; ///< None used yet.
237 uint16_t NumHashBits;
238 uint32_t RecordDataSize;
239 std::atomic<int64_t> RootTrieOffset;
240 std::atomic<int64_t> AllocatorOffset;
241 };
242
243 operator TableHandle() const {
244 if (!H)
245 return TableHandle();
246 return TableHandle(*Region, H->GenericHeader);
247 }
248
249 struct RecordData {
250 OnDiskTrieRawHashMap::ValueProxy Proxy;
251 SubtrieSlotValue Offset;
252 FileOffset getFileOffset() const { return Offset.asDataFileOffset(); }
253 };
254
255 enum Limits : size_t {
256 /// Seems like 65528 hash bits ought to be enough.
257 MaxNumHashBytes = UINT16_MAX >> 3,
258 MaxNumHashBits = MaxNumHashBytes << 3,
259
260 /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit
261 /// index. This many slots is suspicously large anyway.
262 MaxNumRootBits = 16,
263
264 /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously
265 /// large for subtries.
266 MaxNumSubtrieBits = 10,
267 };
268
269 static constexpr size_t getNumHashBytes(size_t NumHashBits) {
270 assert(NumHashBits % 8 == 0);
271 return NumHashBits / 8;
272 }
273 static constexpr size_t getRecordSize(size_t RecordDataSize,
274 size_t NumHashBits) {
275 return RecordDataSize + getNumHashBytes(NumHashBits);
276 }
277
278 RecordData getRecord(SubtrieSlotValue Offset);
279 Expected<RecordData> createRecord(MappedFileRegionArena &Alloc,
280 ArrayRef<uint8_t> Hash);
281
282 explicit operator bool() const { return H; }
283 const Header &getHeader() const { return *H; }
284 SubtrieHandle getRoot() const;
285 Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
286 MappedFileRegion &getRegion() const { return *Region; }
287
288 size_t getFlags() const { return H->Flags; }
289 size_t getNumSubtrieBits() const { return H->NumSubtrieBits; }
290 size_t getNumHashBits() const { return H->NumHashBits; }
291 size_t getNumHashBytes() const { return getNumHashBytes(NumHashBits: H->NumHashBits); }
292 size_t getRecordDataSize() const { return H->RecordDataSize; }
293 size_t getRecordSize() const {
294 return getRecordSize(RecordDataSize: H->RecordDataSize, NumHashBits: H->NumHashBits);
295 }
296
297 TrieHashIndexGenerator getIndexGen(SubtrieHandle Root,
298 ArrayRef<uint8_t> Hash) {
299 assert(Root.getStartBit() == 0);
300 assert(getNumHashBytes() == Hash.size());
301 assert(getNumHashBits() == Hash.size() * 8);
302 return TrieHashIndexGenerator{.NumRootBits: Root.getNumBits(), .NumSubtrieBits: getNumSubtrieBits(), .Bytes: Hash};
303 }
304
305 static Expected<TrieRawHashMapHandle>
306 create(MappedFileRegionArena &Alloc, StringRef Name,
307 std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits,
308 uint64_t NumHashBits, uint64_t RecordDataSize,
309 std::shared_ptr<OnDiskCASLogger> Logger);
310
311 void
312 print(raw_ostream &OS,
313 function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
314
315 Error validate(
316 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
317 RecordVerifier) const;
318 TrieRawHashMapHandle() = default;
319 TrieRawHashMapHandle(MappedFileRegion &Region, Header &H,
320 std::shared_ptr<OnDiskCASLogger> Logger = nullptr)
321 : Region(&Region), H(&H), Logger(std::move(Logger)) {}
322 TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset,
323 std::shared_ptr<OnDiskCASLogger> Logger = nullptr)
324 : TrieRawHashMapHandle(
325 Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset),
326 std::move(Logger)) {}
327
328 OnDiskCASLogger *getLogger() const { return Logger.get(); }
329 void setLogger(std::shared_ptr<OnDiskCASLogger> Logger) {
330 this->Logger = std::move(Logger);
331 }
332
333private:
334 MappedFileRegion *Region = nullptr;
335 Header *H = nullptr;
336 std::shared_ptr<OnDiskCASLogger> Logger;
337};
338
339} // end anonymous namespace
340
341struct OnDiskTrieRawHashMap::ImplType {
342 DatabaseFile File;
343 TrieRawHashMapHandle Trie;
344};
345
346Expected<SubtrieHandle> SubtrieHandle::create(MappedFileRegionArena &Alloc,
347 uint32_t StartBit,
348 uint32_t NumBits,
349 OnDiskCASLogger *Logger) {
350 assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits);
351 assert(NumBits <= UINT8_MAX);
352 assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits);
353
354 auto Mem = Alloc.allocate(AllocSize: getSize(NumBits));
355 if (LLVM_UNLIKELY(!Mem))
356 return Mem.takeError();
357 auto *H =
358 new (*Mem) SubtrieHandle::Header{.StartBit: (uint16_t)StartBit, .NumBits: (uint8_t)NumBits,
359 /*ZeroPad1B=*/0, /*ZeroPad4B=*/0};
360 SubtrieHandle S(Alloc.getRegion(), *H, Logger);
361 for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I)
362 new (I) SlotT(0);
363
364 if (Logger)
365 Logger->logSubtrieHandleCreate(Region: Alloc.data(), Trie: S.getOffset().Offset, StartBit,
366 NumBits);
367 return S;
368}
369
370SubtrieHandle TrieRawHashMapHandle::getRoot() const {
371 if (int64_t Root = H->RootTrieOffset)
372 return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Offset: Root),
373 Logger.get());
374 return SubtrieHandle();
375}
376
377Expected<SubtrieHandle>
378TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
379 assert(&Alloc.getRegion() == &getRegion());
380 if (SubtrieHandle Root = getRoot())
381 return Root;
382
383 int64_t Race = 0;
384 auto LazyRoot =
385 SubtrieHandle::create(Alloc, StartBit: 0, NumBits: H->NumSubtrieBits, Logger: Logger.get());
386 if (LLVM_UNLIKELY(!LazyRoot))
387 return LazyRoot.takeError();
388
389 if (H->RootTrieOffset.compare_exchange_strong(
390 i1&: Race, i2: LazyRoot->getOffset().asSubtrie()),
391 Logger.get())
392 return *LazyRoot;
393
394 // There was a race. Return the other root.
395 //
396 // TODO: Avoid leaking the lazy root by storing it in an allocator.
397 return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Offset: Race),
398 Logger.get());
399}
400
401Expected<TrieRawHashMapHandle>
402TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
403 std::optional<uint64_t> NumRootBits,
404 uint64_t NumSubtrieBits, uint64_t NumHashBits,
405 uint64_t RecordDataSize,
406 std::shared_ptr<OnDiskCASLogger> Logger) {
407 // Allocate.
408 auto Offset = Alloc.allocateOffset(AllocSize: sizeof(Header) + Name.size() + 1);
409 if (LLVM_UNLIKELY(!Offset))
410 return Offset.takeError();
411
412 // Construct the header and the name.
413 assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
414 assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits");
415 assert(NumHashBits <= UINT16_MAX && "Expected valid hash size");
416 assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name");
417 auto *H = new (Alloc.getRegion().data() + *Offset)
418 Header{.GenericHeader: {.Kind: TableHandle::TableKind::TrieRawHashMap, .NameSize: (uint16_t)Name.size(),
419 .NameRelOffset: (uint32_t)sizeof(Header)},
420 .NumSubtrieBits: (uint8_t)NumSubtrieBits,
421 /*Flags=*/0,
422 .NumHashBits: (uint16_t)NumHashBits,
423 .RecordDataSize: (uint32_t)RecordDataSize,
424 /*RootTrieOffset=*/{0},
425 /*AllocatorOffset=*/{0}};
426 char *NameStorage = reinterpret_cast<char *>(H + 1);
427 llvm::copy(Range&: Name, Out: NameStorage);
428 NameStorage[Name.size()] = 0;
429
430 // Construct a root trie, if requested.
431 TrieRawHashMapHandle Trie(Alloc.getRegion(), *H, Logger);
432 auto Sub = SubtrieHandle::create(Alloc, StartBit: 0, NumBits: *NumRootBits, Logger: Logger.get());
433 if (LLVM_UNLIKELY(!Sub))
434 return Sub.takeError();
435 if (NumRootBits)
436 H->RootTrieOffset = Sub->getOffset().asSubtrie();
437 return Trie;
438}
439
440TrieRawHashMapHandle::RecordData
441TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) {
442 char *Begin = Region->data() + Offset.asData();
443 OnDiskTrieRawHashMap::ValueProxy Proxy;
444 Proxy.Data = MutableArrayRef(Begin, getRecordDataSize());
445 Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()),
446 getNumHashBytes());
447 return RecordData{.Proxy: Proxy, .Offset: Offset};
448}
449
450Expected<TrieRawHashMapHandle::RecordData>
451TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc,
452 ArrayRef<uint8_t> Hash) {
453 assert(&Alloc.getRegion() == Region);
454 assert(Hash.size() == getNumHashBytes());
455 auto Offset = Alloc.allocateOffset(AllocSize: getRecordSize());
456 if (LLVM_UNLIKELY(!Offset))
457 return Offset.takeError();
458
459 RecordData Record = getRecord(Offset: SubtrieSlotValue::getDataOffset(Offset: *Offset));
460 llvm::copy(Range&: Hash, Out: const_cast<uint8_t *>(Record.Proxy.Hash.begin()));
461
462 if (Logger)
463 Logger->logHashMappedTrieHandleCreateRecord(
464 Region: Alloc.data(), TrieOffset: Record.Offset.getRawOffset(), Hash);
465
466 return Record;
467}
468
469Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr>
470OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
471 // Check alignment.
472 if (!isAligned(Lhs: MappedFileRegionArena::getAlign(), SizeInBytes: Offset.get()))
473 return createStringError(EC: make_error_code(e: std::errc::protocol_error),
474 S: "unaligned file offset at 0x" +
475 utohexstr(X: Offset.get(), /*LowerCase=*/true));
476
477 // Check bounds.
478 //
479 // Note: There's no potential overflow when using \c uint64_t because Offset
480 // is in valid offset range and the record size is in \c [0,UINT32_MAX].
481 if (!validOffset(Offset) ||
482 Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size())
483 return createStringError(EC: make_error_code(e: std::errc::protocol_error),
484 S: "file offset too large: 0x" +
485 utohexstr(X: Offset.get(), /*LowerCase=*/true));
486
487 // Looks okay...
488 TrieRawHashMapHandle::RecordData D =
489 Impl->Trie.getRecord(Offset: SubtrieSlotValue::getDataOffset(Offset));
490 return ConstOnDiskPtr(D.Proxy, D.getFileOffset());
491}
492
493OnDiskTrieRawHashMap::ConstOnDiskPtr
494OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
495 TrieRawHashMapHandle Trie = Impl->Trie;
496 assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
497
498 SubtrieHandle S = Trie.getRoot();
499 if (!S)
500 return ConstOnDiskPtr();
501
502 TrieHashIndexGenerator IndexGen = Trie.getIndexGen(Root: S, Hash);
503 size_t Index = IndexGen.next();
504 for (;;) {
505 // Try to set the content.
506 SubtrieSlotValue V = S.load(I: Index);
507 if (!V)
508 return ConstOnDiskPtr();
509
510 // Check for an exact match.
511 if (V.isData()) {
512 TrieRawHashMapHandle::RecordData D = Trie.getRecord(Offset: V);
513 return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset())
514 : ConstOnDiskPtr();
515 }
516
517 Index = IndexGen.next();
518 S = SubtrieHandle(Trie.getRegion(), V, Trie.getLogger());
519 }
520}
521
522/// Only safe if the subtrie is empty.
523void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) {
524 assert(StartBit > H->StartBit);
525 assert(NumBits <= H->NumBits);
526 // Ideally would also assert that all slots are empty, but that's expensive.
527
528 H->StartBit = StartBit;
529 H->NumBits = NumBits;
530}
531
532Expected<OnDiskTrieRawHashMap::OnDiskPtr>
533OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
534 LazyInsertOnConstructCB OnConstruct,
535 LazyInsertOnLeakCB OnLeak) {
536 TrieRawHashMapHandle Trie = Impl->Trie;
537 assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
538
539 MappedFileRegionArena &Alloc = Impl->File.getAlloc();
540 std::optional<SubtrieHandle> S;
541 auto Err = Trie.getOrCreateRoot(Alloc).moveInto(Value&: S);
542 if (LLVM_UNLIKELY(Err))
543 return std::move(Err);
544
545 TrieHashIndexGenerator IndexGen = Trie.getIndexGen(Root: *S, Hash);
546 size_t Index = IndexGen.next();
547
548 // Walk through the hash bytes and insert into correct trie position.
549 std::optional<TrieRawHashMapHandle::RecordData> NewRecord;
550 SubtrieHandle UnusedSubtrie;
551 for (;;) {
552 SubtrieSlotValue Existing = S->load(I: Index);
553
554 // Try to set it, if it's empty.
555 if (!Existing) {
556 if (!NewRecord) {
557 auto Err = Trie.createRecord(Alloc, Hash).moveInto(Value&: NewRecord);
558 if (LLVM_UNLIKELY(Err))
559 return std::move(Err);
560 if (OnConstruct)
561 OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy);
562 }
563
564 if (S->compare_exchange_strong(I: Index, Expected&: Existing, New: NewRecord->Offset))
565 return OnDiskPtr(NewRecord->Proxy,
566 NewRecord->Offset.asDataFileOffset());
567
568 // Race means that Existing is no longer empty; fall through...
569 }
570
571 if (Existing.isSubtrie()) {
572 S = SubtrieHandle(Trie.getRegion(), Existing, Trie.getLogger());
573 Index = IndexGen.next();
574 continue;
575 }
576
577 // Check for an exact match.
578 TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Offset: Existing);
579 if (ExistingRecord.Proxy.Hash == Hash) {
580 if (NewRecord && OnLeak)
581 OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy,
582 ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy);
583 return OnDiskPtr(ExistingRecord.Proxy,
584 ExistingRecord.Offset.asDataFileOffset());
585 }
586
587 // Sink the existing content as long as the indexes match.
588 for (;;) {
589 size_t NextIndex = IndexGen.next();
590 size_t NewIndexForExistingContent =
591 IndexGen.getCollidingBits(CollidingBits: ExistingRecord.Proxy.Hash);
592
593 auto Err = S->sink(I: Index, V: Existing, Alloc, NumSubtrieBits: IndexGen.getNumBits(),
594 UnusedSubtrie, NewI: NewIndexForExistingContent)
595 .moveInto(Value&: S);
596 if (LLVM_UNLIKELY(Err))
597 return std::move(Err);
598 Index = NextIndex;
599
600 // Found the difference.
601 if (NextIndex != NewIndexForExistingContent)
602 break;
603 }
604 }
605}
606
607Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V,
608 MappedFileRegionArena &Alloc,
609 size_t NumSubtrieBits,
610 SubtrieHandle &UnusedSubtrie,
611 size_t NewI) {
612 std::optional<SubtrieHandle> NewS;
613 if (UnusedSubtrie) {
614 // Steal UnusedSubtrie and initialize it.
615 NewS.emplace();
616 std::swap(a&: *NewS, b&: UnusedSubtrie);
617 NewS->reinitialize(StartBit: getStartBit() + getNumBits(), NumBits: NumSubtrieBits);
618 } else {
619 // Allocate a new, empty subtrie.
620 auto Err = SubtrieHandle::create(Alloc, StartBit: getStartBit() + getNumBits(),
621 NumBits: NumSubtrieBits, Logger)
622 .moveInto(Value&: NewS);
623 if (LLVM_UNLIKELY(Err))
624 return std::move(Err);
625 }
626
627 NewS->store(I: NewI, V);
628 if (compare_exchange_strong(I, Expected&: V, New: NewS->getOffset()))
629 return *NewS; // Success!
630
631 // Raced.
632 assert(V.isSubtrie() && "Expected racing sink() to add a subtrie");
633
634 // Wipe out the new slot so NewS can be reused and set the out parameter.
635 NewS->store(I: NewI, V: SubtrieSlotValue());
636 UnusedSubtrie = *NewS;
637
638 // Return the subtrie added by the concurrent sink() call.
639 return SubtrieHandle(Alloc.getRegion(), V, Logger);
640}
641
642void OnDiskTrieRawHashMap::print(
643 raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
644 Impl->Trie.print(OS, PrintRecordData);
645}
646
647Error OnDiskTrieRawHashMap::validate(
648 function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
649 return Impl->Trie.validate(RecordVerifier);
650}
651
652// Helper function that prints hexdigit and have a sub-byte starting position.
653static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
654 size_t StartBit, size_t NumBits) {
655 assert(StartBit % 4 == 0);
656 assert(NumBits % 4 == 0);
657 for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) {
658 uint8_t HexPair = Bytes[I / 8];
659 uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf;
660 OS << hexdigit(X: HexDigit, /*LowerCase=*/true);
661 }
662}
663
664static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit,
665 size_t NumBits) {
666 assert(StartBit + NumBits <= Bytes.size() * 8u);
667 for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) {
668 uint8_t Byte = Bytes[I / 8];
669 size_t ByteOffset = I % 8;
670 if (size_t ByteShift = 8 - ByteOffset - 1)
671 Byte >>= ByteShift;
672 OS << (Byte & 0x1 ? '1' : '0');
673 }
674}
675
676void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const {
677 // afb[1c:00*01110*0]def
678 size_t EndBit = getStartBit() + getNumBits();
679 size_t HashEndBit = Bytes.size() * 8u;
680
681 size_t FirstBinaryBit = getStartBit() & ~0x3u;
682 printHexDigits(OS, Bytes, StartBit: 0, NumBits: FirstBinaryBit);
683
684 size_t LastBinaryBit = (EndBit + 3u) & ~0x3u;
685 OS << "[";
686 printBits(OS, Bytes, StartBit: FirstBinaryBit, NumBits: LastBinaryBit - FirstBinaryBit);
687 OS << "]";
688
689 printHexDigits(OS, Bytes, StartBit: LastBinaryBit, NumBits: HashEndBit - LastBinaryBit);
690}
691
692static void appendIndexBits(std::string &Prefix, size_t Index,
693 size_t NumSlots) {
694 std::string Bits;
695 for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) {
696 Bits.push_back(c: '0' + (Index & 0x1));
697 Index >>= 1;
698 }
699 for (char Ch : llvm::reverse(C&: Bits))
700 Prefix += Ch;
701}
702
703static void printPrefix(raw_ostream &OS, StringRef Prefix) {
704 while (Prefix.size() >= 4) {
705 uint8_t Digit;
706 bool ErrorParsingBinary = Prefix.take_front(N: 4).getAsInteger(Radix: 2, Result&: Digit);
707 assert(!ErrorParsingBinary);
708 (void)ErrorParsingBinary;
709 OS << hexdigit(X: Digit, /*LowerCase=*/true);
710 Prefix = Prefix.drop_front(N: 4);
711 }
712 if (!Prefix.empty())
713 OS << "[" << Prefix << "]";
714}
715
716LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(OS&: dbgs()); }
717
718static Expected<size_t> checkParameter(StringRef Label, size_t Max,
719 std::optional<size_t> Value,
720 std::optional<size_t> Default,
721 StringRef Path, StringRef TableName) {
722 assert(Value || Default);
723 assert(!Default || *Default <= Max);
724 if (!Value)
725 return *Default;
726
727 if (*Value <= Max)
728 return *Value;
729 return createTableConfigError(
730 ErrC: std::errc::argument_out_of_domain, Path, TableName,
731 Msg: "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")");
732}
733
734size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); }
735size_t OnDiskTrieRawHashMap::capacity() const {
736 return Impl->File.getRegion().size();
737}
738
739Expected<OnDiskTrieRawHashMap>
740OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
741 size_t NumHashBits, uint64_t DataSize,
742 uint64_t MaxFileSize,
743 std::optional<uint64_t> NewFileInitialSize,
744 std::shared_ptr<OnDiskCASLogger> Logger,
745 std::optional<size_t> NewTableNumRootBits,
746 std::optional<size_t> NewTableNumSubtrieBits) {
747 SmallString<128> PathStorage;
748 StringRef Path = PathTwine.toStringRef(Out&: PathStorage);
749 SmallString<128> TrieNameStorage;
750 StringRef TrieName = TrieNameTwine.toStringRef(Out&: TrieNameStorage);
751
752 constexpr size_t DefaultNumRootBits = 10;
753 constexpr size_t DefaultNumSubtrieBits = 6;
754
755 size_t NumRootBits;
756 if (Error E = checkParameter(
757 Label: "root bits", Max: TrieRawHashMapHandle::MaxNumRootBits,
758 Value: NewTableNumRootBits, Default: DefaultNumRootBits, Path, TableName: TrieName)
759 .moveInto(Value&: NumRootBits))
760 return std::move(E);
761
762 size_t NumSubtrieBits;
763 if (Error E = checkParameter(Label: "subtrie bits",
764 Max: TrieRawHashMapHandle::MaxNumSubtrieBits,
765 Value: NewTableNumSubtrieBits, Default: DefaultNumSubtrieBits,
766 Path, TableName: TrieName)
767 .moveInto(Value&: NumSubtrieBits))
768 return std::move(E);
769
770 size_t NumHashBytes = NumHashBits >> 3;
771 if (Error E =
772 checkParameter(Label: "hash size", Max: TrieRawHashMapHandle::MaxNumHashBits,
773 Value: NumHashBits, Default: std::nullopt, Path, TableName: TrieName)
774 .takeError())
775 return std::move(E);
776 assert(NumHashBits == NumHashBytes << 3 &&
777 "Expected hash size to be byte-aligned");
778 if (NumHashBits != NumHashBytes << 3)
779 return createTableConfigError(
780 ErrC: std::errc::argument_out_of_domain, Path, TableName: TrieName,
781 Msg: "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)");
782
783 // Constructor for if the file doesn't exist.
784 auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
785 auto Trie = TrieRawHashMapHandle::create(Alloc&: DB.getAlloc(), Name: TrieName,
786 NumRootBits, NumSubtrieBits,
787 NumHashBits, RecordDataSize: DataSize, Logger);
788 if (LLVM_UNLIKELY(!Trie))
789 return Trie.takeError();
790
791 return DB.addTable(Table: *Trie);
792 };
793
794 // Get or create the file.
795 Expected<DatabaseFile> File =
796 DatabaseFile::create(Path, Capacity: MaxFileSize, Logger, NewDBConstructor);
797 if (!File)
798 return File.takeError();
799
800 // Find the trie and validate it.
801 std::optional<TableHandle> Table = File->findTable(Name: TrieName);
802 if (!Table)
803 return createTableConfigError(ErrC: std::errc::argument_out_of_domain, Path,
804 TableName: TrieName, Msg: "table not found");
805 if (Error E = checkTable(Label: "table kind", Expected: (size_t)TrieRawHashMapHandle::Kind,
806 Observed: (size_t)Table->getHeader().Kind, Path, TrieName))
807 return std::move(E);
808 auto Trie = Table->cast<TrieRawHashMapHandle>();
809 Trie.setLogger(Logger);
810 assert(Trie && "Already checked the kind");
811
812 // Check the hash and data size.
813 if (Error E = checkTable(Label: "hash size", Expected: NumHashBits, Observed: Trie.getNumHashBits(),
814 Path, TrieName))
815 return std::move(E);
816 if (Error E = checkTable(Label: "data size", Expected: DataSize, Observed: Trie.getRecordDataSize(),
817 Path, TrieName))
818 return std::move(E);
819
820 // No flags supported right now. Either corrupt, or coming from a future
821 // writer.
822 if (size_t Flags = Trie.getFlags())
823 return createTableConfigError(ErrC: std::errc::invalid_argument, Path, TableName: TrieName,
824 Msg: "unsupported flags: " + Twine(Flags));
825
826 // Success.
827 OnDiskTrieRawHashMap::ImplType Impl{.File: DatabaseFile(std::move(*File)), .Trie: Trie};
828 return OnDiskTrieRawHashMap(std::make_unique<ImplType>(args: std::move(Impl)));
829}
830
831static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) {
832 return createStringError(EC: make_error_code(e: std::errc::protocol_error),
833 S: "invalid trie at 0x" +
834 utohexstr(X: Offset, /*LowerCase=*/true) + ": " +
835 Msg);
836}
837
838//===----------------------------------------------------------------------===//
839// TrieVisitor data structures.
840//===----------------------------------------------------------------------===//
841
842namespace {
843/// A multi-threaded vistior to traverse the Trie.
844///
845/// TODO: add more sanity checks that isn't just plain data corruption. For
846/// example, some ill-formed data can be constructed to form a cycle using
847/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting
848/// data).
849class TrieVisitor {
850public:
851 TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0,
852 unsigned ErrorLimit = 50)
853 : Trie(Trie), ErrorLimit(ErrorLimit),
854 Threads(hardware_concurrency(ThreadCount)) {}
855 virtual ~TrieVisitor() = default;
856 Error visit();
857
858private:
859 // Virtual method to implement the action when visiting a sub-trie.
860 virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) {
861 return Error::success();
862 }
863
864 // Virtual method to implement the action when visiting a slot in a trie node.
865 virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
866 SubtrieSlotValue Slot) {
867 return Error::success();
868 }
869
870protected:
871 TrieRawHashMapHandle Trie;
872
873private:
874 Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix);
875
876 Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
877
878 // Helper function to capture errors when visiting the trie nodes.
879 void addError(Error NewError) {
880 assert(NewError && "not an error");
881 std::lock_guard<std::mutex> ErrorLock(Lock);
882 if (NumError >= ErrorLimit) {
883 // Too many errors.
884 consumeError(Err: std::move(NewError));
885 return;
886 }
887
888 if (Err)
889 Err = joinErrors(E1: std::move(*Err), E2: std::move(NewError));
890 else
891 Err = std::move(NewError);
892 NumError++;
893 }
894
895 bool tooManyErrors() {
896 std::lock_guard<std::mutex> ErrorLock(Lock);
897 return (bool)Err && NumError >= ErrorLimit;
898 }
899
900 const unsigned ErrorLimit;
901 std::optional<Error> Err;
902 unsigned NumError = 0;
903 std::mutex Lock;
904 DefaultThreadPool Threads;
905};
906
907/// A visitor that traverse and print the Trie.
908class TriePrinter : public TrieVisitor {
909public:
910 TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS,
911 function_ref<void(ArrayRef<char>)> PrintRecordData)
912 : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS),
913 PrintRecordData(PrintRecordData) {}
914
915 Error printRecords() {
916 if (Records.empty())
917 return Error::success();
918
919 OS << "records\n";
920 llvm::sort(C&: Records);
921 for (int64_t Offset : Records) {
922 TrieRawHashMapHandle::RecordData Record =
923 Trie.getRecord(Offset: SubtrieSlotValue::getDataOffset(Offset));
924 if (auto Err = printRecord(Record))
925 return Err;
926 }
927 return Error::success();
928 }
929
930 Error printRecord(TrieRawHashMapHandle::RecordData &Record) {
931 OS << "- addr=" << (void *)Record.getFileOffset().get() << " ";
932 if (PrintRecordData) {
933 PrintRecordData(Record.Proxy.Data);
934 } else {
935 OS << "bytes=";
936 ArrayRef<uint8_t> Data(
937 reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()),
938 Record.Proxy.Data.size());
939 printHexDigits(OS, Bytes: Data, StartBit: 0, NumBits: Data.size() * 8);
940 }
941 OS << "\n";
942 return Error::success();
943 }
944
945 Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override {
946 if (Prefix.empty()) {
947 OS << "root";
948 } else {
949 OS << "subtrie=";
950 printPrefix(OS, Prefix);
951 }
952
953 OS << " addr="
954 << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) -
955 Trie.getRegion().data());
956 OS << " num-slots=" << SubTrie.getNumSlots() << "\n";
957 return Error::success();
958 }
959
960 Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
961 SubtrieSlotValue Slot) override {
962 OS << "- index=";
963 for (size_t Pad : {10, 100, 1000})
964 if (I < Pad && Subtrie.getNumSlots() >= Pad)
965 OS << "0";
966 OS << I << " ";
967 if (Slot.isSubtrie()) {
968 OS << "addr=" << (void *)Slot.asSubtrie();
969 OS << " subtrie=";
970 printPrefix(OS, Prefix);
971 OS << "\n";
972 return Error::success();
973 }
974 TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Offset: Slot);
975 OS << "addr=" << (void *)Record.getFileOffset().get();
976 OS << " content=";
977 Subtrie.printHash(OS, Bytes: Record.Proxy.Hash);
978 OS << "\n";
979 Records.push_back(Elt: Slot.asData());
980 return Error::success();
981 }
982
983private:
984 raw_ostream &OS;
985 function_ref<void(ArrayRef<char>)> PrintRecordData;
986 SmallVector<int64_t> Records;
987};
988
989/// TrieVerifier that adds additional verification on top of the basic visitor.
990class TrieVerifier : public TrieVisitor {
991public:
992 TrieVerifier(
993 TrieRawHashMapHandle Trie,
994 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
995 RecordVerifier)
996 : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {}
997
998private:
999 Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final {
1000 return Error::success();
1001 }
1002
1003 Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
1004 SubtrieSlotValue Slot) final {
1005 if (RecordVerifier && Slot.isData()) {
1006 if (!isAligned(Lhs: MappedFileRegionArena::getAlign(), SizeInBytes: Slot.asData()))
1007 return createInvalidTrieError(Offset: Slot.asData(), Msg: "mis-aligned data entry");
1008
1009 TrieRawHashMapHandle::RecordData Record =
1010 Trie.getRecord(Offset: SubtrieSlotValue::getDataOffset(Offset: Slot.asData()));
1011 return RecordVerifier(Slot.asDataFileOffset(),
1012 OnDiskTrieRawHashMap::ConstValueProxy{
1013 Record.Proxy.Hash, Record.Proxy.Data});
1014 }
1015 return Error::success();
1016 }
1017
1018 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
1019 RecordVerifier;
1020};
1021} // namespace
1022
1023Error TrieVisitor::visit() {
1024 auto Root = Trie.getRoot();
1025 if (!Root)
1026 return Error::success();
1027
1028 if (auto Err = validateSubTrie(Node: Root, /*IsRoot=*/true))
1029 return Err;
1030
1031 if (auto Err = visitSubTrie(Prefix: "", SubTrie: Root))
1032 return Err;
1033
1034 SmallVector<SubtrieHandle> Subs;
1035 SmallVector<std::string> Prefixes;
1036 const size_t NumSlots = Root.getNumSlots();
1037 for (size_t I = 0, E = NumSlots; I != E; ++I) {
1038 SubtrieSlotValue Slot = Root.load(I);
1039 if (!Slot)
1040 continue;
1041 uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
1042 if (Offset >= (uint64_t)Trie.getRegion().size())
1043 return createInvalidTrieError(Offset, Msg: "slot points out of bound");
1044 std::string SubtriePrefix;
1045 appendIndexBits(Prefix&: SubtriePrefix, Index: I, NumSlots);
1046 if (Slot.isSubtrie()) {
1047 SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
1048 Subs.push_back(Elt: S);
1049 Prefixes.push_back(Elt: SubtriePrefix);
1050 }
1051 if (auto Err = visitSlot(I, Subtrie: Root, Prefix: SubtriePrefix, Slot))
1052 return Err;
1053 }
1054
1055 for (size_t I = 0, E = Subs.size(); I != E; ++I) {
1056 Threads.async(
1057 F: [&](unsigned Idx) {
1058 // Don't run if there is an error already.
1059 if (tooManyErrors())
1060 return;
1061 if (auto Err = traverseTrieNode(Node: Subs[Idx], Prefix: Prefixes[Idx]))
1062 addError(NewError: std::move(Err));
1063 },
1064 ArgList&: I);
1065 }
1066
1067 Threads.wait();
1068 if (Err)
1069 return std::move(*Err);
1070 return Error::success();
1071}
1072
1073Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
1074 char *Addr = reinterpret_cast<char *>(&Node.getHeader());
1075 const int64_t Offset = Node.getFileOffset().get();
1076 if (Addr + Node.getSize() >=
1077 Trie.getRegion().data() + Trie.getRegion().size())
1078 return createInvalidTrieError(Offset, Msg: "subtrie node spans out of bound");
1079
1080 if (!IsRoot &&
1081 Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) {
1082 return createInvalidTrieError(Offset,
1083 Msg: "subtrie represents too many hash bits");
1084 }
1085
1086 if (IsRoot) {
1087 if (Node.getStartBit() != 0)
1088 return createInvalidTrieError(Offset,
1089 Msg: "root node doesn't start at 0 index");
1090
1091 return Error::success();
1092 }
1093
1094 if (Node.getNumBits() > Trie.getNumSubtrieBits())
1095 return createInvalidTrieError(Offset, Msg: "subtrie has wrong number of slots");
1096
1097 return Error::success();
1098}
1099
1100Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
1101 if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
1102 return Err;
1103
1104 if (auto Err = visitSubTrie(Prefix, SubTrie: Node))
1105 return Err;
1106
1107 SmallVector<SubtrieHandle> Subs;
1108 SmallVector<std::string> Prefixes;
1109 const size_t NumSlots = Node.getNumSlots();
1110 for (size_t I = 0, E = NumSlots; I != E; ++I) {
1111 SubtrieSlotValue Slot = Node.load(I);
1112 if (!Slot)
1113 continue;
1114 uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
1115 if (Offset >= (uint64_t)Trie.getRegion().size())
1116 return createInvalidTrieError(Offset, Msg: "slot points out of bound");
1117 std::string SubtriePrefix = Prefix.str();
1118 appendIndexBits(Prefix&: SubtriePrefix, Index: I, NumSlots);
1119 if (Slot.isSubtrie()) {
1120 SubtrieHandle S(Trie.getRegion(), Slot, Trie.getLogger());
1121 Subs.push_back(Elt: S);
1122 Prefixes.push_back(Elt: SubtriePrefix);
1123 }
1124 if (auto Err = visitSlot(I, Subtrie: Node, Prefix: SubtriePrefix, Slot))
1125 return Err;
1126 }
1127 for (size_t I = 0, E = Subs.size(); I != E; ++I)
1128 if (auto Err = traverseTrieNode(Node: Subs[I], Prefix: Prefixes[I]))
1129 return Err;
1130
1131 return Error::success();
1132}
1133
1134void TrieRawHashMapHandle::print(
1135 raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
1136 OS << "hash-num-bits=" << getNumHashBits()
1137 << " hash-size=" << getNumHashBytes()
1138 << " record-data-size=" << getRecordDataSize() << "\n";
1139
1140 TriePrinter Printer(*this, OS, PrintRecordData);
1141 if (auto Err = Printer.visit())
1142 OS << "error: " << toString(E: std::move(Err)) << "\n";
1143
1144 if (auto Err = Printer.printRecords())
1145 OS << "error: " << toString(E: std::move(Err)) << "\n";
1146}
1147
1148Error TrieRawHashMapHandle::validate(
1149 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
1150 RecordVerifier) const {
1151 // Use the base TrieVisitor to identify the errors inside trie first.
1152 TrieVisitor BasicVerifier(*this);
1153 if (auto Err = BasicVerifier.visit())
1154 return Err;
1155
1156 // If the trie data structure is sound, do a second pass to verify data and
1157 // verifier function can assume the index is correct. However, there can be
1158 // newly added bad entries that can still produce error.
1159 TrieVerifier Verifier(*this, RecordVerifier);
1160 return Verifier.visit();
1161}
1162
1163#else // !LLVM_ENABLE_ONDISK_CAS
1164
1165struct OnDiskTrieRawHashMap::ImplType {};
1166
1167Expected<OnDiskTrieRawHashMap>
1168OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
1169 size_t NumHashBits, uint64_t DataSize,
1170 uint64_t MaxFileSize,
1171 std::optional<uint64_t> NewFileInitialSize,
1172 std::shared_ptr<OnDiskCASLogger> Logger,
1173 std::optional<size_t> NewTableNumRootBits,
1174 std::optional<size_t> NewTableNumSubtrieBits) {
1175 return createStringError(make_error_code(std::errc::not_supported),
1176 "OnDiskTrieRawHashMap is not supported");
1177}
1178
1179Expected<OnDiskTrieRawHashMap::OnDiskPtr>
1180OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
1181 LazyInsertOnConstructCB OnConstruct,
1182 LazyInsertOnLeakCB OnLeak) {
1183 return createStringError(make_error_code(std::errc::not_supported),
1184 "OnDiskTrieRawHashMap is not supported");
1185}
1186
1187Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr>
1188OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
1189 return createStringError(make_error_code(std::errc::not_supported),
1190 "OnDiskTrieRawHashMap is not supported");
1191}
1192
1193OnDiskTrieRawHashMap::ConstOnDiskPtr
1194OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
1195 return ConstOnDiskPtr();
1196}
1197
1198void OnDiskTrieRawHashMap::print(
1199 raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
1200}
1201
1202Error OnDiskTrieRawHashMap::validate(
1203 function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
1204 RecordVerifier) const {
1205 return createStringError(make_error_code(std::errc::not_supported),
1206 "OnDiskTrieRawHashMap is not supported");
1207}
1208
1209size_t OnDiskTrieRawHashMap::size() const { return 0; }
1210size_t OnDiskTrieRawHashMap::capacity() const { return 0; }
1211
1212#endif // LLVM_ENABLE_ONDISK_CAS
1213
1214OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl)
1215 : Impl(std::move(Impl)) {}
1216OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) =
1217 default;
1218OnDiskTrieRawHashMap &
1219OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default;
1220OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default;
1221