1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/CAS/ObjectStore.h"
10#include "llvm/ADT/DenseSet.h"
11#include "llvm/Support/Debug.h"
12#include "llvm/Support/Errc.h"
13#include "llvm/Support/FileSystem.h"
14#include "llvm/Support/MemoryBuffer.h"
15#include <deque>
16
17using namespace llvm;
18using namespace llvm::cas;
19
20void CASContext::anchor() {}
21void ObjectStore::anchor() {}
22
23LLVM_DUMP_METHOD void CASID::dump() const { print(OS&: dbgs()); }
24LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
25LLVM_DUMP_METHOD void ObjectRef::dump() const { print(OS&: dbgs()); }
26LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(OS&: dbgs()); }
27
28std::string CASID::toString() const {
29 std::string S;
30 raw_string_ostream(S) << *this;
31 return S;
32}
33
34static void printReferenceBase(raw_ostream &OS, StringRef Kind,
35 uint64_t InternalRef, std::optional<CASID> ID) {
36 OS << Kind << "=" << InternalRef;
37 if (ID)
38 OS << "[" << *ID << "]";
39}
40
41void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const {
42 assert(this == &This);
43 printReferenceBase(OS, Kind: "object-handle", InternalRef, ID: std::nullopt);
44}
45
46void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const {
47 assert(this == &This);
48
49 std::optional<CASID> ID;
50#if LLVM_ENABLE_ABI_BREAKING_CHECKS
51 if (CAS)
52 ID = CAS->getID(This);
53#endif
54 printReferenceBase(OS, Kind: "object-ref", InternalRef, ID);
55}
56
57Expected<ObjectHandle> ObjectStore::load(ObjectRef Ref) {
58 std::optional<ObjectHandle> Handle;
59 if (Error E = loadIfExists(Ref).moveInto(Value&: Handle))
60 return std::move(E);
61 if (!Handle)
62 return createStringError(EC: errc::invalid_argument,
63 S: "missing object '" + getID(Ref).toString() + "'");
64 return *Handle;
65}
66
67std::unique_ptr<MemoryBuffer>
68ObjectStore::getMemoryBuffer(ObjectHandle Node, StringRef Name,
69 bool RequiresNullTerminator) {
70 return MemoryBuffer::getMemBuffer(
71 InputData: toStringRef(Input: getData(Node, RequiresNullTerminator)), BufferName: Name,
72 RequiresNullTerminator);
73}
74
75void ObjectStore::readRefs(ObjectHandle Node,
76 SmallVectorImpl<ObjectRef> &Refs) const {
77 consumeError(Err: forEachRef(Node, Callback: [&Refs](ObjectRef Ref) -> Error {
78 Refs.push_back(Elt: Ref);
79 return Error::success();
80 }));
81}
82
83Expected<ObjectProxy> ObjectStore::getProxy(const CASID &ID) {
84 std::optional<ObjectRef> Ref = getReference(ID);
85 if (!Ref)
86 return createUnknownObjectError(ID);
87
88 return getProxy(Ref: *Ref);
89}
90
91Expected<ObjectProxy> ObjectStore::getProxy(ObjectRef Ref) {
92 std::optional<ObjectHandle> H;
93 if (Error E = load(Ref).moveInto(Value&: H))
94 return std::move(E);
95
96 return ObjectProxy::load(CAS&: *this, Ref, Node: *H);
97}
98
99Expected<std::optional<ObjectProxy>>
100ObjectStore::getProxyIfExists(ObjectRef Ref) {
101 std::optional<ObjectHandle> H;
102 if (Error E = loadIfExists(Ref).moveInto(Value&: H))
103 return std::move(E);
104 if (!H)
105 return std::nullopt;
106 return ObjectProxy::load(CAS&: *this, Ref, Node: *H);
107}
108
109Error ObjectStore::createUnknownObjectError(const CASID &ID) {
110 return createStringError(EC: std::make_error_code(e: std::errc::invalid_argument),
111 S: "unknown object '" + ID.toString() + "'");
112}
113
114Expected<ObjectProxy> ObjectStore::createProxy(ArrayRef<ObjectRef> Refs,
115 StringRef Data) {
116 Expected<ObjectRef> Ref = store(Refs, Data: arrayRefFromStringRef<char>(Input: Data));
117 if (!Ref)
118 return Ref.takeError();
119 return getProxy(Ref: *Ref);
120}
121
122Expected<ObjectRef>
123ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD,
124 std::optional<sys::fs::file_status> Status) {
125 // TODO: For the on-disk CAS implementation use cloning to store it as a
126 // standalone file if the file-system supports it and the file is large.
127 uint64_t Size = Status ? Status->getSize() : -1;
128 auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"", FileSize: Size);
129 if (!Buffer)
130 return errorCodeToError(EC: Buffer.getError());
131
132 return store(Refs: {}, Data: arrayRefFromStringRef<char>(Input: (*Buffer)->getBuffer()));
133}
134
135Error ObjectStore::validateTree(ObjectRef Root) {
136 SmallDenseSet<ObjectRef> ValidatedRefs;
137 SmallVector<ObjectRef, 16> RefsToValidate;
138 RefsToValidate.push_back(Elt: Root);
139
140 while (!RefsToValidate.empty()) {
141 ObjectRef Ref = RefsToValidate.pop_back_val();
142 auto [I, Inserted] = ValidatedRefs.insert(V: Ref);
143 if (!Inserted)
144 continue; // already validated.
145 if (Error E = validateObject(ID: getID(Ref)))
146 return E;
147 Expected<ObjectHandle> Obj = load(Ref);
148 if (!Obj)
149 return Obj.takeError();
150 if (Error E = forEachRef(Node: *Obj, Callback: [&RefsToValidate](ObjectRef R) -> Error {
151 RefsToValidate.push_back(Elt: R);
152 return Error::success();
153 }))
154 return E;
155 }
156 return Error::success();
157}
158
159Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
160 ObjectRef Other) {
161 // Copy the full CAS tree from upstream with depth-first ordering to ensure
162 // all the child nodes are available in downstream CAS before inserting
163 // current object. This uses a similar algorithm as
164 // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
165 // so it can be used to import from any other ObjectStore reguardless of the
166 // CAS schema.
167
168 // There is no work to do if importing from self.
169 if (this == &Upstream)
170 return Other;
171
172 /// Keeps track of the state of visitation for current node and all of its
173 /// parents. Upstream Cursor holds information only from upstream CAS.
174 struct UpstreamCursor {
175 ObjectRef Ref;
176 ObjectHandle Node;
177 size_t RefsCount;
178 std::deque<ObjectRef> Refs;
179 };
180 SmallVector<UpstreamCursor, 16> CursorStack;
181 /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
182 /// just stored in the CAS or nodes already exists in the current CAS.
183 SmallVector<ObjectRef, 128> PrimaryRefStack;
184 /// A map from upstream ObjectRef to current ObjectRef.
185 llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
186
187 auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
188 unsigned NumRefs = Upstream.getNumRefs(Node);
189 std::deque<ObjectRef> Refs;
190 for (unsigned I = 0; I < NumRefs; ++I)
191 Refs.push_back(x: Upstream.readRef(Node, I));
192
193 CursorStack.push_back(Elt: {.Ref: Ref, .Node: Node, .RefsCount: NumRefs, .Refs: std::move(Refs)});
194 };
195
196 auto UpstreamHandle = Upstream.load(Ref: Other);
197 if (!UpstreamHandle)
198 return UpstreamHandle.takeError();
199 enqueueNode(Other, *UpstreamHandle);
200
201 while (!CursorStack.empty()) {
202 UpstreamCursor &Cur = CursorStack.back();
203 if (Cur.Refs.empty()) {
204 // Copy the node data into the primary store.
205 // The bottom of \p PrimaryRefStack contains the ObjectRef for the
206 // current node.
207 assert(PrimaryRefStack.size() >= Cur.RefsCount);
208 auto Refs = ArrayRef(PrimaryRefStack)
209 .slice(N: PrimaryRefStack.size() - Cur.RefsCount);
210 auto NewNode = store(Refs, Data: Upstream.getData(Node: Cur.Node));
211 if (!NewNode)
212 return NewNode.takeError();
213
214 // Remove the current node and its IDs from the stack.
215 PrimaryRefStack.truncate(N: PrimaryRefStack.size() - Cur.RefsCount);
216
217 // Push new node into created objects.
218 PrimaryRefStack.push_back(Elt: *NewNode);
219 CreatedObjects.try_emplace(Key: Cur.Ref, Args&: *NewNode);
220
221 // Pop the cursor in the end after all uses.
222 CursorStack.pop_back();
223 continue;
224 }
225
226 // Check if the node exists already.
227 auto CurrentID = Cur.Refs.front();
228 Cur.Refs.pop_front();
229 auto Ref = CreatedObjects.find(Val: CurrentID);
230 if (Ref != CreatedObjects.end()) {
231 // If exists already, just need to enqueue the primary node.
232 PrimaryRefStack.push_back(Elt: Ref->second);
233 continue;
234 }
235
236 // Load child.
237 auto PrimaryID = Upstream.load(Ref: CurrentID);
238 if (LLVM_UNLIKELY(!PrimaryID))
239 return PrimaryID.takeError();
240
241 enqueueNode(CurrentID, *PrimaryID);
242 }
243
244 assert(PrimaryRefStack.size() == 1);
245 return PrimaryRefStack.front();
246}
247
248std::unique_ptr<MemoryBuffer>
249ObjectProxy::getMemoryBuffer(StringRef Name,
250 bool RequiresNullTerminator) const {
251 return CAS->getMemoryBuffer(Node: H, Name, RequiresNullTerminator);
252}
253