1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BuiltinCAS.h"
10#include "OnDiskCommon.h"
11#include "llvm/ADT/ScopeExit.h"
12#include "llvm/CAS/BuiltinCASContext.h"
13#include "llvm/CAS/BuiltinObjectHasher.h"
14#include "llvm/CAS/OnDiskCASLogger.h"
15#include "llvm/CAS/OnDiskGraphDB.h"
16#include "llvm/CAS/UnifiedOnDiskCache.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/Error.h"
19#include "llvm/Support/IOSandbox.h"
20#include "llvm/Support/Path.h"
21
22using namespace llvm;
23using namespace llvm::cas;
24using namespace llvm::cas::builtin;
25
26namespace {
27
28class OnDiskCAS : public BuiltinCAS {
29public:
30 Expected<ObjectRef> storeImpl(ArrayRef<uint8_t> ComputedHash,
31 ArrayRef<ObjectRef> Refs,
32 ArrayRef<char> Data) final;
33
34 Expected<std::optional<ObjectHandle>> loadIfExists(ObjectRef Ref) final;
35
36 CASID getID(ObjectRef Ref) const final;
37
38 std::optional<ObjectRef> getReference(const CASID &ID) const final;
39
40 Expected<bool> isMaterialized(ObjectRef Ref) const final;
41
42 ArrayRef<char> getDataConst(ObjectHandle Node) const final;
43
44 Expected<ObjectRef> storeFromFile(StringRef Path) final;
45
46 Error exportDataToFile(ObjectHandle Node, StringRef Path) const final;
47
48 void print(raw_ostream &OS) const final;
49 Error validate(bool CheckHash) const final;
50
51 static Expected<std::unique_ptr<OnDiskCAS>> open(StringRef Path);
52
53 OnDiskCAS(std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB)
54 : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {}
55
56private:
57 ObjectHandle convertHandle(ondisk::ObjectHandle Node) const {
58 return makeObjectHandle(InternalRef: Node.getOpaqueData());
59 }
60
61 ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
62 return ondisk::ObjectHandle(Node.getInternalRef(ExpectedCAS: *this));
63 }
64
65 ObjectRef convertRef(ondisk::ObjectID Ref) const {
66 return makeObjectRef(InternalRef: Ref.getOpaqueData());
67 }
68
69 ondisk::ObjectID convertRef(ObjectRef Ref) const {
70 return ondisk::ObjectID::fromOpaqueData(Opaque: Ref.getInternalRef(ExpectedCAS: *this));
71 }
72
73 size_t getNumRefs(ObjectHandle Node) const final {
74 auto RefsRange = DB->getObjectRefs(Node: convertHandle(Node));
75 return llvm::size(Range&: RefsRange);
76 }
77
78 ObjectRef readRef(ObjectHandle Node, size_t I) const final {
79 auto RefsRange = DB->getObjectRefs(Node: convertHandle(Node));
80 return convertRef(Ref: RefsRange.begin()[I]);
81 }
82
83 Error forEachRef(ObjectHandle Node,
84 function_ref<Error(ObjectRef)> Callback) const final;
85
86 Error setSizeLimit(std::optional<uint64_t> SizeLimit) final;
87 Expected<std::optional<uint64_t>> getStorageSize() const final;
88 Error pruneStorageData() final;
89
90 OnDiskCAS(std::unique_ptr<ondisk::OnDiskGraphDB> GraphDB)
91 : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {}
92
93 std::unique_ptr<ondisk::OnDiskGraphDB> OwnedDB;
94 std::shared_ptr<ondisk::UnifiedOnDiskCache> UnifiedDB;
95 ondisk::OnDiskGraphDB *DB;
96};
97
98} // end anonymous namespace
99
100void OnDiskCAS::print(raw_ostream &OS) const { DB->print(OS); }
101Error OnDiskCAS::validate(bool CheckHash) const {
102 if (auto E = DB->validate(Deep: CheckHash, Hasher: builtin::hashingFunc))
103 return E;
104
105 return Error::success();
106}
107
108CASID OnDiskCAS::getID(ObjectRef Ref) const {
109 ArrayRef<uint8_t> Hash = DB->getDigest(Ref: convertRef(Ref));
110 return CASID::create(Context: &getContext(), Hash: toStringRef(Input: Hash));
111}
112
113std::optional<ObjectRef> OnDiskCAS::getReference(const CASID &ID) const {
114 std::optional<ondisk::ObjectID> ObjID =
115 DB->getExistingReference(Digest: ID.getHash());
116 if (!ObjID)
117 return std::nullopt;
118 return convertRef(Ref: *ObjID);
119}
120
121Expected<bool> OnDiskCAS::isMaterialized(ObjectRef ExternalRef) const {
122 return DB->isMaterialized(Ref: convertRef(Ref: ExternalRef));
123}
124
125ArrayRef<char> OnDiskCAS::getDataConst(ObjectHandle Node) const {
126 return DB->getObjectData(Node: convertHandle(Node));
127}
128
129Expected<std::optional<ObjectHandle>>
130OnDiskCAS::loadIfExists(ObjectRef ExternalRef) {
131 Expected<std::optional<ondisk::ObjectHandle>> ObjHnd =
132 DB->load(Ref: convertRef(Ref: ExternalRef));
133 if (!ObjHnd)
134 return ObjHnd.takeError();
135 if (!*ObjHnd)
136 return std::nullopt;
137 return convertHandle(Node: **ObjHnd);
138}
139
140Expected<ObjectRef> OnDiskCAS::storeImpl(ArrayRef<uint8_t> ComputedHash,
141 ArrayRef<ObjectRef> Refs,
142 ArrayRef<char> Data) {
143 SmallVector<ondisk::ObjectID, 64> IDs;
144 IDs.reserve(N: Refs.size());
145 for (ObjectRef Ref : Refs) {
146 IDs.push_back(Elt: convertRef(Ref));
147 }
148
149 auto StoredID = DB->getReference(Hash: ComputedHash);
150 if (LLVM_UNLIKELY(!StoredID))
151 return StoredID.takeError();
152 if (Error E = DB->store(ID: *StoredID, Refs: IDs, Data))
153 return std::move(E);
154 return convertRef(Ref: *StoredID);
155}
156
157Expected<ObjectRef> OnDiskCAS::storeFromFile(StringRef Path) {
158 auto Hash = BuiltinObjectHasher<HasherT>::hashFile(FilePath: Path);
159 if (LLVM_UNLIKELY(!Hash))
160 return Hash.takeError();
161 auto StoredID = DB->getReference(Hash: *Hash);
162 if (LLVM_UNLIKELY(!StoredID))
163 return StoredID.takeError();
164 if (Error E = DB->storeFile(ID: *StoredID, FilePath: Path))
165 return E;
166 return convertRef(Ref: *StoredID);
167}
168
169Error OnDiskCAS::exportDataToFile(ObjectHandle Node, StringRef Path) const {
170 auto FBData = DB->getInternalFileBackedObjectData(Node: convertHandle(Node));
171 if (!FBData.FileInfo.has_value())
172 return BuiltinCAS::exportDataToFile(Node, Path);
173
174 // Optimized version using the underlying database file.
175 assert(FBData.FileInfo.has_value());
176
177 auto BypassSandbox = sys::sandbox::scopedDisable();
178
179 ondisk::UniqueTempFile UniqueTmp;
180 auto ExpectedPath = UniqueTmp.createAndCopyFrom(ParentPath: sys::path::parent_path(path: Path),
181 CopyFromPath: FBData.FileInfo->FilePath);
182 if (!ExpectedPath)
183 return ExpectedPath.takeError();
184 StringRef TmpPath = *ExpectedPath;
185
186 if (FBData.FileInfo->IsFileNulTerminated) {
187 // Remove the nul terminator.
188 int FD;
189 if (std::error_code EC =
190 sys::fs::openFileForWrite(Name: TmpPath, ResultFD&: FD, Disp: sys::fs::CD_OpenExisting))
191 return createFileError(F: TmpPath, EC);
192 auto CloseFile = scope_exit([&FD] {
193 sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
194 sys::fs::closeFile(F&: File);
195 });
196 if (std::error_code EC = sys::fs::resize_file(FD, Size: FBData.Data.size()))
197 return createFileError(F: TmpPath, EC);
198 }
199
200 if (Error E = UniqueTmp.renameTo(RenameToPath: Path))
201 return E;
202
203 return Error::success();
204}
205
206Error OnDiskCAS::forEachRef(ObjectHandle Node,
207 function_ref<Error(ObjectRef)> Callback) const {
208 auto RefsRange = DB->getObjectRefs(Node: convertHandle(Node));
209 for (ondisk::ObjectID Ref : RefsRange) {
210 if (Error E = Callback(convertRef(Ref)))
211 return E;
212 }
213 return Error::success();
214}
215
216Error OnDiskCAS::setSizeLimit(std::optional<uint64_t> SizeLimit) {
217 UnifiedDB->setSizeLimit(SizeLimit);
218 return Error::success();
219}
220
221Expected<std::optional<uint64_t>> OnDiskCAS::getStorageSize() const {
222 return UnifiedDB->getStorageSize();
223}
224
225Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); }
226
227Expected<std::unique_ptr<OnDiskCAS>> OnDiskCAS::open(StringRef AbsPath) {
228 std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
229#ifndef _WIN32
230 if (Error E =
231 ondisk::OnDiskCASLogger::openIfEnabled(Path: AbsPath).moveInto(Value&: Logger))
232 return std::move(E);
233#endif
234
235 Expected<std::unique_ptr<ondisk::OnDiskGraphDB>> DB =
236 ondisk::OnDiskGraphDB::open(Path: AbsPath, HashName: BuiltinCASContext::getHashName(),
237 HashByteSize: sizeof(HashType), /*UpstreamDB=*/nullptr,
238 Logger: std::move(Logger));
239 if (!DB)
240 return DB.takeError();
241 return std::unique_ptr<OnDiskCAS>(new OnDiskCAS(std::move(*DB)));
242}
243
244bool cas::isOnDiskCASEnabled() {
245#if LLVM_ENABLE_ONDISK_CAS
246 return true;
247#else
248 return false;
249#endif
250}
251
252Expected<std::unique_ptr<ObjectStore>> cas::createOnDiskCAS(const Twine &Path) {
253#if LLVM_ENABLE_ONDISK_CAS
254 // FIXME: An absolute path isn't really good enough. Should open a directory
255 // and use openat() for files underneath.
256 SmallString<256> AbsPath;
257 Path.toVector(Out&: AbsPath);
258 sys::fs::make_absolute(path&: AbsPath);
259
260 return OnDiskCAS::open(AbsPath);
261#else
262 return createStringError(inconvertibleErrorCode(), "OnDiskCAS is disabled");
263#endif /* LLVM_ENABLE_ONDISK_CAS */
264}
265
266std::unique_ptr<ObjectStore>
267cas::builtin::createObjectStoreFromUnifiedOnDiskCache(
268 std::shared_ptr<ondisk::UnifiedOnDiskCache> UniDB) {
269 return std::make_unique<OnDiskCAS>(args: std::move(UniDB));
270}
271