1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/CAS/ObjectStore.h"
10#include "llvm/ADT/DenseSet.h"
11#include "llvm/ADT/ScopeExit.h"
12#include "llvm/Support/Debug.h"
13#include "llvm/Support/Errc.h"
14#include "llvm/Support/FileSystem.h"
15#include "llvm/Support/IOSandbox.h"
16#include "llvm/Support/MemoryBuffer.h"
17#include "llvm/Support/Path.h"
18#include <deque>
19
20using namespace llvm;
21using namespace llvm::cas;
22
23void CASContext::anchor() {}
24void ObjectStore::anchor() {}
25
26LLVM_DUMP_METHOD void CASID::dump() const { print(OS&: dbgs()); }
27LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); }
28LLVM_DUMP_METHOD void ObjectRef::dump() const { print(OS&: dbgs()); }
29LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(OS&: dbgs()); }
30
31std::string CASID::toString() const {
32 std::string S;
33 raw_string_ostream(S) << *this;
34 return S;
35}
36
37static void printReferenceBase(raw_ostream &OS, StringRef Kind,
38 uint64_t InternalRef, std::optional<CASID> ID) {
39 OS << Kind << "=" << InternalRef;
40 if (ID)
41 OS << "[" << *ID << "]";
42}
43
44void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const {
45 assert(this == &This);
46 printReferenceBase(OS, Kind: "object-handle", InternalRef, ID: std::nullopt);
47}
48
49void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const {
50 assert(this == &This);
51
52 std::optional<CASID> ID;
53#if LLVM_ENABLE_ABI_BREAKING_CHECKS
54 if (CAS)
55 ID = CAS->getID(This);
56#endif
57 printReferenceBase(OS, Kind: "object-ref", InternalRef, ID);
58}
59
60Expected<ObjectHandle> ObjectStore::load(ObjectRef Ref) {
61 std::optional<ObjectHandle> Handle;
62 if (Error E = loadIfExists(Ref).moveInto(Value&: Handle))
63 return std::move(E);
64 if (!Handle)
65 return createStringError(EC: errc::invalid_argument,
66 S: "missing object '" + getID(Ref).toString() + "'");
67 return *Handle;
68}
69
70std::unique_ptr<MemoryBuffer>
71ObjectStore::getMemoryBuffer(ObjectHandle Node, StringRef Name,
72 bool RequiresNullTerminator) {
73 return MemoryBuffer::getMemBuffer(
74 InputData: toStringRef(Input: getData(Node, RequiresNullTerminator)), BufferName: Name,
75 RequiresNullTerminator);
76}
77
78void ObjectStore::readRefs(ObjectHandle Node,
79 SmallVectorImpl<ObjectRef> &Refs) const {
80 consumeError(Err: forEachRef(Node, Callback: [&Refs](ObjectRef Ref) -> Error {
81 Refs.push_back(Elt: Ref);
82 return Error::success();
83 }));
84}
85
86Expected<ObjectProxy> ObjectStore::getProxy(const CASID &ID) {
87 std::optional<ObjectRef> Ref = getReference(ID);
88 if (!Ref)
89 return createUnknownObjectError(ID);
90
91 return getProxy(Ref: *Ref);
92}
93
94Expected<ObjectProxy> ObjectStore::getProxy(ObjectRef Ref) {
95 std::optional<ObjectHandle> H;
96 if (Error E = load(Ref).moveInto(Value&: H))
97 return std::move(E);
98
99 return ObjectProxy::load(CAS&: *this, Ref, Node: *H);
100}
101
102Expected<std::optional<ObjectProxy>>
103ObjectStore::getProxyIfExists(ObjectRef Ref) {
104 std::optional<ObjectHandle> H;
105 if (Error E = loadIfExists(Ref).moveInto(Value&: H))
106 return std::move(E);
107 if (!H)
108 return std::nullopt;
109 return ObjectProxy::load(CAS&: *this, Ref, Node: *H);
110}
111
112Error ObjectStore::createUnknownObjectError(const CASID &ID) {
113 return createStringError(EC: std::make_error_code(e: std::errc::invalid_argument),
114 S: "unknown object '" + ID.toString() + "'");
115}
116
117Expected<ObjectProxy> ObjectStore::createProxy(ArrayRef<ObjectRef> Refs,
118 StringRef Data) {
119 Expected<ObjectRef> Ref = store(Refs, Data: arrayRefFromStringRef<char>(Input: Data));
120 if (!Ref)
121 return Ref.takeError();
122 return getProxy(Ref: *Ref);
123}
124
125Expected<ObjectRef>
126ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD,
127 std::optional<sys::fs::file_status> Status) {
128 // TODO: For the on-disk CAS implementation use cloning to store it as a
129 // standalone file if the file-system supports it and the file is large.
130 uint64_t Size = Status ? Status->getSize() : -1;
131 auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"", FileSize: Size);
132 if (!Buffer)
133 return errorCodeToError(EC: Buffer.getError());
134
135 return store(Refs: {}, Data: arrayRefFromStringRef<char>(Input: (*Buffer)->getBuffer()));
136}
137
138Expected<ObjectRef> ObjectStore::storeFromFile(StringRef Path) {
139 auto BypassSandbox = sys::sandbox::scopedDisable();
140
141 sys::fs::file_t FD;
142 if (Error E = sys::fs::openNativeFileForRead(Name: Path).moveInto(Value&: FD))
143 return E;
144 auto CloseFile = scope_exit([&FD] { sys::fs::closeFile(F&: FD); });
145 return storeFromOpenFile(FD);
146}
147
148Error ObjectStore::exportDataToFile(ObjectHandle Node, StringRef Path) const {
149 auto BypassSandbox = sys::sandbox::scopedDisable();
150
151 SmallString<256> TmpPath;
152 SmallString<256> Model;
153 Model += sys::path::parent_path(path: Path);
154 sys::path::append(path&: Model, a: "%%%%%%%.tmp");
155 if (std::error_code EC = sys::fs::createUniqueFile(Model, ResultPath&: TmpPath))
156 return createFileError(F: Model, EC);
157 auto RemoveTmpFile = scope_exit([&] {
158 if (!TmpPath.empty())
159 sys::fs::remove(path: TmpPath);
160 });
161
162 ArrayRef<char> Data = getData(Node);
163 std::error_code EC;
164 raw_fd_ostream FS(TmpPath, EC);
165 if (EC)
166 return createFileError(F: TmpPath, EC);
167 FS.write(Ptr: Data.begin(), Size: Data.size());
168 FS.close();
169 if (FS.has_error())
170 return createFileError(F: TmpPath, EC: FS.error());
171
172 if (std::error_code EC = sys::fs::rename(from: TmpPath, to: Path))
173 return createFileError(F: Path, EC);
174 TmpPath.clear();
175
176 return Error::success();
177}
178
179Error ObjectStore::validateTree(ObjectRef Root) {
180 SmallDenseSet<ObjectRef> ValidatedRefs;
181 SmallVector<ObjectRef, 16> RefsToValidate;
182 RefsToValidate.push_back(Elt: Root);
183
184 while (!RefsToValidate.empty()) {
185 ObjectRef Ref = RefsToValidate.pop_back_val();
186 auto [I, Inserted] = ValidatedRefs.insert(V: Ref);
187 if (!Inserted)
188 continue; // already validated.
189 if (Error E = validateObject(ID: getID(Ref)))
190 return E;
191 Expected<ObjectHandle> Obj = load(Ref);
192 if (!Obj)
193 return Obj.takeError();
194 if (Error E = forEachRef(Node: *Obj, Callback: [&RefsToValidate](ObjectRef R) -> Error {
195 RefsToValidate.push_back(Elt: R);
196 return Error::success();
197 }))
198 return E;
199 }
200 return Error::success();
201}
202
203Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
204 ObjectRef Other) {
205 // Copy the full CAS tree from upstream with depth-first ordering to ensure
206 // all the child nodes are available in downstream CAS before inserting
207 // current object. This uses a similar algorithm as
208 // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema
209 // so it can be used to import from any other ObjectStore reguardless of the
210 // CAS schema.
211
212 // There is no work to do if importing from self.
213 if (this == &Upstream)
214 return Other;
215
216 /// Keeps track of the state of visitation for current node and all of its
217 /// parents. Upstream Cursor holds information only from upstream CAS.
218 struct UpstreamCursor {
219 ObjectRef Ref;
220 ObjectHandle Node;
221 size_t RefsCount;
222 std::deque<ObjectRef> Refs;
223 };
224 SmallVector<UpstreamCursor, 16> CursorStack;
225 /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
226 /// just stored in the CAS or nodes already exists in the current CAS.
227 SmallVector<ObjectRef, 128> PrimaryRefStack;
228 /// A map from upstream ObjectRef to current ObjectRef.
229 llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects;
230
231 auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) {
232 unsigned NumRefs = Upstream.getNumRefs(Node);
233 std::deque<ObjectRef> Refs;
234 for (unsigned I = 0; I < NumRefs; ++I)
235 Refs.push_back(x: Upstream.readRef(Node, I));
236
237 CursorStack.push_back(Elt: {.Ref: Ref, .Node: Node, .RefsCount: NumRefs, .Refs: std::move(Refs)});
238 };
239
240 auto UpstreamHandle = Upstream.load(Ref: Other);
241 if (!UpstreamHandle)
242 return UpstreamHandle.takeError();
243 enqueueNode(Other, *UpstreamHandle);
244
245 while (!CursorStack.empty()) {
246 UpstreamCursor &Cur = CursorStack.back();
247 if (Cur.Refs.empty()) {
248 // Copy the node data into the primary store.
249 // The bottom of \p PrimaryRefStack contains the ObjectRef for the
250 // current node.
251 assert(PrimaryRefStack.size() >= Cur.RefsCount);
252 auto Refs = ArrayRef(PrimaryRefStack)
253 .slice(N: PrimaryRefStack.size() - Cur.RefsCount);
254 auto NewNode = store(Refs, Data: Upstream.getData(Node: Cur.Node));
255 if (!NewNode)
256 return NewNode.takeError();
257
258 // Remove the current node and its IDs from the stack.
259 PrimaryRefStack.truncate(N: PrimaryRefStack.size() - Cur.RefsCount);
260
261 // Push new node into created objects.
262 PrimaryRefStack.push_back(Elt: *NewNode);
263 CreatedObjects.try_emplace(Key: Cur.Ref, Args&: *NewNode);
264
265 // Pop the cursor in the end after all uses.
266 CursorStack.pop_back();
267 continue;
268 }
269
270 // Check if the node exists already.
271 auto CurrentID = Cur.Refs.front();
272 Cur.Refs.pop_front();
273 auto Ref = CreatedObjects.find(Val: CurrentID);
274 if (Ref != CreatedObjects.end()) {
275 // If exists already, just need to enqueue the primary node.
276 PrimaryRefStack.push_back(Elt: Ref->second);
277 continue;
278 }
279
280 // Load child.
281 auto PrimaryID = Upstream.load(Ref: CurrentID);
282 if (LLVM_UNLIKELY(!PrimaryID))
283 return PrimaryID.takeError();
284
285 enqueueNode(CurrentID, *PrimaryID);
286 }
287
288 assert(PrimaryRefStack.size() == 1);
289 return PrimaryRefStack.front();
290}
291
292std::unique_ptr<MemoryBuffer>
293ObjectProxy::getMemoryBuffer(StringRef Name,
294 bool RequiresNullTerminator) const {
295 return CAS->getMemoryBuffer(Node: H, Name, RequiresNullTerminator);
296}
297