1//===- InMemoryCAS.cpp ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BuiltinCAS.h"
10#include "llvm/ADT/LazyAtomicPointer.h"
11#include "llvm/ADT/PointerIntPair.h"
12#include "llvm/ADT/TrieRawHashMap.h"
13#include "llvm/Support/Allocator.h"
14#include "llvm/Support/Casting.h"
15#include "llvm/Support/ThreadSafeAllocator.h"
16#include "llvm/Support/TrailingObjects.h"
17
18using namespace llvm;
19using namespace llvm::cas;
20using namespace llvm::cas::builtin;
21
22namespace {
23
24class InMemoryObject;
25
26/// Index of referenced IDs (map: Hash -> InMemoryObject*). Uses
27/// LazyAtomicPointer to coordinate creation of objects.
28using InMemoryIndexT =
29 ThreadSafeTrieRawHashMap<LazyAtomicPointer<const InMemoryObject>,
30 sizeof(HashType)>;
31
32/// Values in \a InMemoryIndexT. \a InMemoryObject's point at this to access
33/// their hash.
34using InMemoryIndexValueT = InMemoryIndexT::value_type;
35
36/// Builtin InMemory CAS that stores CAS object in the memory.
37class InMemoryObject {
38public:
39 enum class Kind {
40 /// Node with refs and data.
41 RefNode,
42
43 /// Node with refs and data co-allocated.
44 InlineNode,
45
46 Max = InlineNode,
47 };
48
49 Kind getKind() const { return IndexAndKind.getInt(); }
50 const InMemoryIndexValueT &getIndex() const {
51 assert(IndexAndKind.getPointer());
52 return *IndexAndKind.getPointer();
53 }
54
55 ArrayRef<uint8_t> getHash() const { return getIndex().Hash; }
56
57 InMemoryObject() = delete;
58 InMemoryObject(InMemoryObject &&) = delete;
59 InMemoryObject(const InMemoryObject &) = delete;
60 InMemoryObject &operator=(const InMemoryObject &) = delete;
61 InMemoryObject &operator=(InMemoryObject &&) = delete;
62 virtual ~InMemoryObject() = default;
63
64protected:
65 InMemoryObject(Kind K, const InMemoryIndexValueT &I) : IndexAndKind(&I, K) {}
66
67private:
68 enum Counts : int {
69 NumKindBits = 2,
70 };
71 PointerIntPair<const InMemoryIndexValueT *, NumKindBits, Kind> IndexAndKind;
72 static_assert((1U << NumKindBits) <= alignof(InMemoryIndexValueT),
73 "Kind will clobber pointer");
74 static_assert(((int)Kind::Max >> NumKindBits) == 0, "Kind will be truncated");
75
76public:
77 ArrayRef<char> getData() const;
78
79 ArrayRef<const InMemoryObject *> getRefs() const;
80};
81
82class InMemoryRefObject final : public InMemoryObject {
83public:
84 static constexpr Kind KindValue = Kind::RefNode;
85 static bool classof(const InMemoryObject *O) {
86 return O->getKind() == KindValue;
87 }
88
89 ArrayRef<const InMemoryObject *> getRefsImpl() const { return Refs; }
90 ArrayRef<const InMemoryObject *> getRefs() const { return Refs; }
91 ArrayRef<char> getDataImpl() const { return Data; }
92 ArrayRef<char> getData() const { return Data; }
93
94 static InMemoryRefObject &create(function_ref<void *(size_t Size)> Allocate,
95 const InMemoryIndexValueT &I,
96 ArrayRef<const InMemoryObject *> Refs,
97 ArrayRef<char> Data) {
98 void *Mem = Allocate(sizeof(InMemoryRefObject));
99 return *new (Mem) InMemoryRefObject(I, Refs, Data);
100 }
101
102private:
103 InMemoryRefObject(const InMemoryIndexValueT &I,
104 ArrayRef<const InMemoryObject *> Refs, ArrayRef<char> Data)
105 : InMemoryObject(KindValue, I), Refs(Refs), Data(Data) {
106 assert(isAddrAligned(Align(8), this) && "Expected 8-byte alignment");
107 assert(isAddrAligned(Align(8), Data.data()) && "Expected 8-byte alignment");
108 assert(*Data.end() == 0 && "Expected null-termination");
109 }
110
111 ArrayRef<const InMemoryObject *> Refs;
112 ArrayRef<char> Data;
113};
114
115class InMemoryInlineObject final
116 : public InMemoryObject,
117 public TrailingObjects<InMemoryInlineObject, const InMemoryObject *,
118 char> {
119public:
120 static constexpr Kind KindValue = Kind::InlineNode;
121 static bool classof(const InMemoryObject *O) {
122 return O->getKind() == KindValue;
123 }
124
125 ArrayRef<const InMemoryObject *> getRefs() const { return getRefsImpl(); }
126 ArrayRef<const InMemoryObject *> getRefsImpl() const {
127 return ArrayRef(getTrailingObjects<const InMemoryObject *>(), NumRefs);
128 }
129
130 ArrayRef<char> getData() const { return getDataImpl(); }
131 ArrayRef<char> getDataImpl() const {
132 return ArrayRef(getTrailingObjects<char>(), DataSize);
133 }
134
135 static InMemoryInlineObject &
136 create(function_ref<void *(size_t Size)> Allocate,
137 const InMemoryIndexValueT &I, ArrayRef<const InMemoryObject *> Refs,
138 ArrayRef<char> Data) {
139 void *Mem = Allocate(sizeof(InMemoryInlineObject) +
140 sizeof(uintptr_t) * Refs.size() + Data.size() + 1);
141 return *new (Mem) InMemoryInlineObject(I, Refs, Data);
142 }
143
144 size_t numTrailingObjects(OverloadToken<const InMemoryObject *>) const {
145 return NumRefs;
146 }
147
148private:
149 InMemoryInlineObject(const InMemoryIndexValueT &I,
150 ArrayRef<const InMemoryObject *> Refs,
151 ArrayRef<char> Data)
152 : InMemoryObject(KindValue, I), NumRefs(Refs.size()),
153 DataSize(Data.size()) {
154 auto *BeginRefs = reinterpret_cast<const InMemoryObject **>(this + 1);
155 llvm::copy(Range&: Refs, Out: BeginRefs);
156 auto *BeginData = reinterpret_cast<char *>(BeginRefs + NumRefs);
157 llvm::copy(Range&: Data, Out: BeginData);
158 BeginData[Data.size()] = 0;
159 }
160 uint32_t NumRefs;
161 uint32_t DataSize;
162};
163
164/// In-memory CAS database and action cache (the latter should be separated).
165class InMemoryCAS : public BuiltinCAS {
166public:
167 Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
168 ArrayRef<ObjectRef> Refs,
169 ArrayRef<char> Data) final;
170
171 Expected<ObjectRef>
172 storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
173 sys::fs::mapped_file_region Map) override;
174
175 CASID getID(const InMemoryIndexValueT &I) const {
176 StringRef Hash = toStringRef(Input: I.Hash);
177 return CASID::create(Context: &getContext(), Hash);
178 }
179 CASID getID(const InMemoryObject &O) const { return getID(I: O.getIndex()); }
180
181 ObjectHandle getObjectHandle(const InMemoryObject &Node) const {
182 assert(!(reinterpret_cast<uintptr_t>(&Node) & 0x1ULL));
183 return makeObjectHandle(InternalRef: reinterpret_cast<uintptr_t>(&Node));
184 }
185
186 Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) override {
187 return getObjectHandle(Node: asInMemoryObject(Ref));
188 }
189
190 InMemoryIndexValueT &indexHash(ArrayRef<uint8_t> Hash) {
191 return *Index.insertLazy(
192 Hash, OnConstruct: [](auto ValueConstructor) { ValueConstructor.emplace(nullptr); });
193 }
194
195 /// TODO: Consider callers to actually do an insert and to return a handle to
196 /// the slot in the trie.
197 const InMemoryObject *getInMemoryObject(CASID ID) const {
198 assert(ID.getContext().getHashSchemaIdentifier() ==
199 getContext().getHashSchemaIdentifier() &&
200 "Expected ID from same hash schema");
201 if (InMemoryIndexT::const_pointer P = Index.find(Hash: ID.getHash()))
202 return P->Data;
203 return nullptr;
204 }
205
206 const InMemoryObject &getInMemoryObject(ObjectHandle OH) const {
207 return *reinterpret_cast<const InMemoryObject *>(
208 (uintptr_t)OH.getInternalRef(ExpectedCAS: *this));
209 }
210
211 const InMemoryObject &asInMemoryObject(ReferenceBase Ref) const {
212 uintptr_t P = Ref.getInternalRef(ExpectedCAS: *this);
213 return *reinterpret_cast<const InMemoryObject *>(P);
214 }
215 ObjectRef toReference(const InMemoryObject &O) const {
216 return makeObjectRef(InternalRef: reinterpret_cast<uintptr_t>(&O));
217 }
218
219 CASID getID(ObjectRef Ref) const final { return getIDImpl(Ref); }
220 CASID getIDImpl(ReferenceBase Ref) const {
221 return getID(O: asInMemoryObject(Ref));
222 }
223
224 std::optional<ObjectRef> getReference(const CASID &ID) const final {
225 if (const InMemoryObject *Object = getInMemoryObject(ID))
226 return toReference(O: *Object);
227 return std::nullopt;
228 }
229
230 Expected<bool> isMaterialized(ObjectRef Ref) const final { return true; }
231
232 ArrayRef<char> getDataConst(ObjectHandle Node) const final {
233 return cast<InMemoryObject>(Val: asInMemoryObject(Ref: Node)).getData();
234 }
235
236 void print(raw_ostream &OS) const final;
237
238 Error validate(bool CheckHash) const final {
239 return createStringError(Fmt: "InMemoryCAS doesn't support validate()");
240 }
241
242 InMemoryCAS() = default;
243
244private:
245 size_t getNumRefs(ObjectHandle Node) const final {
246 return getInMemoryObject(OH: Node).getRefs().size();
247 }
248 ObjectRef readRef(ObjectHandle Node, size_t I) const final {
249 return toReference(O: *getInMemoryObject(OH: Node).getRefs()[I]);
250 }
251 Error forEachRef(ObjectHandle Node,
252 function_ref<Error(ObjectRef)> Callback) const final;
253
254 /// Index of referenced IDs (map: Hash -> InMemoryObject*). Mapped to nullptr
255 /// as a convenient way to store hashes.
256 ///
257 /// - Insert nullptr on lookups.
258 /// - InMemoryObject points back to here.
259 InMemoryIndexT Index;
260
261 ThreadSafeAllocator<BumpPtrAllocator> Objects;
262 ThreadSafeAllocator<SpecificBumpPtrAllocator<sys::fs::mapped_file_region>>
263 MemoryMaps;
264};
265
266} // end anonymous namespace
267
268ArrayRef<char> InMemoryObject::getData() const {
269 if (auto *Derived = dyn_cast<InMemoryRefObject>(Val: this))
270 return Derived->getDataImpl();
271 return cast<InMemoryInlineObject>(Val: this)->getDataImpl();
272}
273
274ArrayRef<const InMemoryObject *> InMemoryObject::getRefs() const {
275 if (auto *Derived = dyn_cast<InMemoryRefObject>(Val: this))
276 return Derived->getRefsImpl();
277 return cast<InMemoryInlineObject>(Val: this)->getRefsImpl();
278}
279
280void InMemoryCAS::print(raw_ostream &OS) const {}
281
282Expected<ObjectRef>
283InMemoryCAS::storeFromNullTerminatedRegion(ArrayRef<uint8_t> ComputedHash,
284 sys::fs::mapped_file_region Map) {
285 // Look up the hash in the index, initializing to nullptr if it's new.
286 ArrayRef<char> Data(Map.data(), Map.size());
287 auto &I = indexHash(Hash: ComputedHash);
288
289 // Load or generate.
290 auto Allocator = [&](size_t Size) -> void * {
291 return Objects.Allocate(Size, Align: alignof(InMemoryObject));
292 };
293 auto Generator = [&]() -> const InMemoryObject * {
294 return &InMemoryRefObject::create(Allocate: Allocator, I, Refs: {}, Data);
295 };
296 const InMemoryObject &Node =
297 cast<InMemoryObject>(Val: I.Data.loadOrGenerate(Generator));
298
299 // Save Map if the winning node uses it.
300 if (auto *RefNode = dyn_cast<InMemoryRefObject>(Val: &Node))
301 if (RefNode->getData().data() == Map.data())
302 new (MemoryMaps.Allocate(N: 1)) sys::fs::mapped_file_region(std::move(Map));
303
304 return toReference(O: Node);
305}
306
307Expected<ObjectRef> InMemoryCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
308 ArrayRef<ObjectRef> Refs,
309 ArrayRef<char> Data) {
310 // Look up the hash in the index, initializing to nullptr if it's new.
311 auto &I = indexHash(Hash: ComputedHash);
312
313 // Create the node.
314 SmallVector<const InMemoryObject *> InternalRefs;
315 for (ObjectRef Ref : Refs)
316 InternalRefs.push_back(Elt: &asInMemoryObject(Ref));
317 auto Allocator = [&](size_t Size) -> void * {
318 return Objects.Allocate(Size, Align: alignof(InMemoryObject));
319 };
320 auto Generator = [&]() -> const InMemoryObject * {
321 return &InMemoryInlineObject::create(Allocate: Allocator, I, Refs: InternalRefs, Data);
322 };
323 return toReference(O: cast<InMemoryObject>(Val: I.Data.loadOrGenerate(Generator)));
324}
325
326Error InMemoryCAS::forEachRef(ObjectHandle Handle,
327 function_ref<Error(ObjectRef)> Callback) const {
328 auto &Node = getInMemoryObject(OH: Handle);
329 for (const InMemoryObject *Ref : Node.getRefs())
330 if (Error E = Callback(toReference(O: *Ref)))
331 return E;
332 return Error::success();
333}
334
335std::unique_ptr<ObjectStore> cas::createInMemoryCAS() {
336 return std::make_unique<InMemoryCAS>();
337}
338