1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
11/// directory while also restricting storage growth with a scheme of chaining
12/// the two most recent directories (primary & upstream), where the primary
13/// "faults-in" data from the upstream one. When the primary (most recent)
14/// directory exceeds its intended limit a new empty directory becomes the
15/// primary one.
16///
17/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
18/// receives) there are directories named like this:
19///
20/// 'v<version>.<x>'
21/// 'v<version>.<x+1>'
22/// 'v<version>.<x+2>'
23/// ...
24///
25/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
26/// the part after the dot is an increasing integer. The primary directory is
27/// the one with the highest integer and the upstream one is the directory
28/// before it. For example, if the sub-directories contained are:
29///
30/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
31///
32/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
33/// unused directories that can be safely deleted at any time and by any
34/// process.
35///
36/// Contained within the top-level directory is a file named "lock" which is
37/// used for processes to take shared or exclusive locks for the contents of the
38/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
39/// for the top-level directory; when it closes, if the primary sub-directory
40/// exceeded its limit, it attempts to get an exclusive lock in order to create
41/// a new empty primary directory; if it can't get the exclusive lock it gives
42/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
43/// again.
44///
45/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
46/// directory, by any process, the storage size in that directory will keep
47/// growing unrestricted. But the major benefit is that garbage-collection can
48/// be triggered on a directory concurrently, at any time and by any process,
49/// without affecting any active readers/writers in the same process or other
50/// processes.
51///
52/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
53/// the underlying on-disk storage. The low-level storage is designed to remain
54/// coherent across regular process crashes, but may be invalid after power loss
55/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
56/// validating the contents once per boot and can recover by marking invalid
57/// data for garbage collection.
58///
59/// The data recovery described above requires exclusive access to the CAS, and
60/// it is an error to attempt recovery if the CAS is open in any process/thread.
61/// In order to maximize backwards compatibility with tools that do not perform
62/// validation before opening the CAS, we do not attempt to get exclusive access
63/// until recovery is actually performed, meaning as long as the data is valid
64/// it will not conflict with concurrent use.
65//
66//===----------------------------------------------------------------------===//
67
68#include "llvm/CAS/UnifiedOnDiskCache.h"
69#include "OnDiskCommon.h"
70#include "llvm/ADT/STLExtras.h"
71#include "llvm/ADT/ScopeExit.h"
72#include "llvm/ADT/SmallString.h"
73#include "llvm/ADT/SmallVector.h"
74#include "llvm/ADT/StringExtras.h"
75#include "llvm/ADT/StringRef.h"
76#include "llvm/CAS/OnDiskCASLogger.h"
77#include "llvm/CAS/OnDiskGraphDB.h"
78#include "llvm/CAS/OnDiskKeyValueDB.h"
79#include "llvm/Support/Compiler.h"
80#include "llvm/Support/Errc.h"
81#include "llvm/Support/Error.h"
82#include "llvm/Support/FileSystem.h"
83#include "llvm/Support/FileUtilities.h"
84#include "llvm/Support/IOSandbox.h"
85#include "llvm/Support/MemoryBuffer.h"
86#include "llvm/Support/Path.h"
87#include "llvm/Support/Program.h"
88#include "llvm/Support/raw_ostream.h"
89#include <optional>
90
91using namespace llvm;
92using namespace llvm::cas;
93using namespace llvm::cas::ondisk;
94
95/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
96/// how to handle the leftover sub-directories of the previous version, within
97/// the \p UnifiedOnDiskCache::collectGarbage function.
98static constexpr StringLiteral DBDirPrefix = "v1.";
99
100static constexpr StringLiteral ValidationFilename = "v1.validation";
101static constexpr StringLiteral CorruptPrefix = "corrupt.";
102
103ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) {
104 // little endian encoded.
105 assert(Value.size() == sizeof(uint64_t));
106 return ObjectID::fromOpaqueData(Opaque: support::endian::read64le(P: Value.data()));
107}
108
109UnifiedOnDiskCache::ValueBytes
110UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) {
111 // little endian encoded.
112 UnifiedOnDiskCache::ValueBytes ValBytes;
113 static_assert(ValBytes.size() == sizeof(ID.getOpaqueData()));
114 support::endian::write64le(P: ValBytes.data(), V: ID.getOpaqueData());
115 return ValBytes;
116}
117
118Expected<std::optional<ArrayRef<char>>>
119UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
120 assert(UpstreamGraphDB);
121 assert(UpstreamKVDB);
122
123 std::optional<ArrayRef<char>> UpstreamValue;
124 if (Error E = UpstreamKVDB->get(Key).moveInto(Value&: UpstreamValue))
125 return std::move(E);
126 if (!UpstreamValue)
127 return std::nullopt;
128
129 // The value is the \p ObjectID in the context of the upstream
130 // \p OnDiskGraphDB instance. Translate it to the context of the primary
131 // \p OnDiskGraphDB instance.
132 ObjectID UpstreamID = getObjectIDFromValue(Value: *UpstreamValue);
133 auto PrimaryID =
134 PrimaryGraphDB->getReference(Hash: UpstreamGraphDB->getDigest(Ref: UpstreamID));
135 if (LLVM_UNLIKELY(!PrimaryID))
136 return PrimaryID.takeError();
137 return PrimaryKVDB->put(Key, Value: getValueFromObjectID(ID: *PrimaryID));
138}
139
140/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
141/// ascending order of the integer after the dot. Corrupt directories, if
142/// included, will come first.
143static Expected<SmallVector<std::string, 4>>
144getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
145 struct DBDir {
146 uint64_t Order;
147 std::string Name;
148 };
149 SmallVector<DBDir> FoundDBDirs;
150
151 std::error_code EC;
152 for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
153 DirI.increment(ec&: EC)) {
154 if (DirI->type() != sys::fs::file_type::directory_file)
155 continue;
156 StringRef SubDir = sys::path::filename(path: DirI->path());
157 if (IncludeCorrupt && SubDir.starts_with(Prefix: CorruptPrefix)) {
158 FoundDBDirs.push_back(Elt: {.Order: 0, .Name: std::string(SubDir)});
159 continue;
160 }
161 if (!SubDir.starts_with(Prefix: DBDirPrefix))
162 continue;
163 uint64_t Order;
164 if (SubDir.substr(Start: DBDirPrefix.size()).getAsInteger(Radix: 10, Result&: Order))
165 return createStringError(EC: inconvertibleErrorCode(),
166 S: "unexpected directory " + DirI->path());
167 FoundDBDirs.push_back(Elt: {.Order: Order, .Name: std::string(SubDir)});
168 }
169 if (EC)
170 return createFileError(F: Path, EC);
171
172 llvm::sort(C&: FoundDBDirs, Comp: [](const DBDir &LHS, const DBDir &RHS) -> bool {
173 return LHS.Order < RHS.Order;
174 });
175
176 SmallVector<std::string, 4> DBDirs;
177 for (DBDir &Dir : FoundDBDirs)
178 DBDirs.push_back(Elt: std::move(Dir.Name));
179 return DBDirs;
180}
181
182static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) {
183 auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true);
184 if (!DBDirs)
185 return DBDirs.takeError();
186
187 // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
188 // out how to handle the leftover sub-directories of the previous version.
189
190 for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) {
191 StringRef Back(DBDirs->back());
192 if (Back.starts_with(Prefix: CorruptPrefix))
193 break;
194 DBDirs->pop_back();
195 }
196 return *DBDirs;
197}
198
199/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
200/// 'v<version>.<x+1>' name.
201static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
202 assert(DBDir.starts_with(DBDirPrefix));
203 uint64_t Count;
204 bool Failed = DBDir.substr(Start: DBDirPrefix.size()).getAsInteger(Radix: 10, Result&: Count);
205 assert(!Failed);
206 (void)Failed;
207 OS << DBDirPrefix << Count + 1;
208}
209
210static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
211 bool CheckHash) {
212 SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
213 if (CheckHash)
214 Args.push_back(Elt: "-check-hash");
215
216 llvm::SmallString<128> StdErrPath;
217 int StdErrFD = -1;
218 if (std::error_code EC = sys::fs::createTemporaryFile(
219 Prefix: "llvm-cas-validate-stderr", Suffix: "txt", ResultFD&: StdErrFD, ResultPath&: StdErrPath,
220 Flags: llvm::sys::fs::OF_Text))
221 return createStringError(EC, S: "failed to create temporary file");
222 FileRemover OutputRemover(StdErrPath.c_str());
223
224 std::optional<llvm::StringRef> Redirects[] = {
225 {""}, // stdin = /dev/null
226 {""}, // stdout = /dev/null
227 StdErrPath.str(),
228 };
229
230 std::string ErrMsg;
231 int Result =
232 sys::ExecuteAndWait(Program: LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
233 /*SecondsToWait=*/120, /*MemoryLimit=*/0, ErrMsg: &ErrMsg);
234
235 if (Result == -1)
236 return createStringError(S: "failed to exec " + join(R&: Args, Separator: " ") + ": " +
237 ErrMsg);
238 if (Result != 0) {
239 llvm::SmallString<64> Err("cas contents invalid");
240 if (!ErrMsg.empty()) {
241 Err += ": ";
242 Err += ErrMsg;
243 }
244 auto StdErrBuf = MemoryBuffer::getFile(Filename: StdErrPath.c_str());
245 if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
246 Err += ": ";
247 Err += (*StdErrBuf)->getBuffer();
248 }
249 return createStringError(S: Err);
250 }
251 return Error::success();
252}
253
254Error UnifiedOnDiskCache::validateActionCache() const {
255 auto ValidateRef = [this](FileOffset Offset, ArrayRef<char> Value) -> Error {
256 auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
257 auto formatError = [&](Twine Msg) {
258 return createStringError(
259 EC: llvm::errc::illegal_byte_sequence,
260 S: "bad record at 0x" +
261 utohexstr(X: (unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
262 Msg.str());
263 };
264 if (Error E = this->getGraphDB().validateObjectID(ID))
265 return formatError(llvm::toString(E: std::move(E)));
266 return Error::success();
267 };
268 return getKeyValueDB().validate(CheckValue: ValidateRef);
269}
270
271static Error validateInProcess(StringRef RootPath, StringRef HashName,
272 unsigned HashByteSize, bool CheckHash,
273 OnDiskGraphDB::HashingFuncT HashFn) {
274 std::shared_ptr<UnifiedOnDiskCache> UniDB;
275 if (Error E = UnifiedOnDiskCache::open(Path: RootPath, SizeLimit: std::nullopt, HashName,
276 HashByteSize)
277 .moveInto(Value&: UniDB))
278 return E;
279 if (Error E = UniDB->getGraphDB().validate(Deep: CheckHash, Hasher: HashFn))
280 return E;
281 if (Error E = UniDB->validateActionCache())
282 return E;
283 return Error::success();
284}
285
286Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded(
287 StringRef RootPath, StringRef HashName, unsigned HashByteSize,
288 bool CheckHash, OnDiskGraphDB::HashingFuncT HashFn, bool AllowRecovery,
289 bool ForceValidation, std::optional<StringRef> LLVMCasBinaryPath) {
290 if (std::error_code EC = sys::fs::create_directories(path: RootPath))
291 return createFileError(F: RootPath, EC);
292
293 SmallString<256> PathBuf(RootPath);
294 sys::path::append(path&: PathBuf, a: ValidationFilename);
295 int FD = -1;
296 if (std::error_code EC = sys::fs::openFileForReadWrite(
297 Name: PathBuf, ResultFD&: FD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
298 return createFileError(F: PathBuf, EC);
299 assert(FD != -1);
300
301 sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
302 llvm::scope_exit CloseFile([&]() { sys::fs::closeFile(F&: File); });
303
304 if (std::error_code EC = lockFileThreadSafe(FD, Kind: sys::fs::LockKind::Exclusive))
305 return createFileError(F: PathBuf, EC);
306 llvm::scope_exit UnlockFD([&]() { unlockFileThreadSafe(FD); });
307
308 std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
309#ifndef _WIN32
310 if (Error E =
311 ondisk::OnDiskCASLogger::openIfEnabled(Path: RootPath).moveInto(Value&: Logger))
312 return std::move(E);
313#endif
314
315 SmallString<8> Bytes;
316 if (Error E = sys::fs::readNativeFileToEOF(FileHandle: File, Buffer&: Bytes))
317 return createFileError(F: PathBuf, E: std::move(E));
318
319 uint64_t ValidationBootTime = 0;
320 if (!Bytes.empty() &&
321 StringRef(Bytes).trim().getAsInteger(Radix: 10, Result&: ValidationBootTime))
322 return createFileError(F: PathBuf, EC: errc::illegal_byte_sequence,
323 Fmt: "expected integer");
324
325 static uint64_t BootTime = 0;
326 if (BootTime == 0)
327 if (Error E = getBootTime().moveInto(Value&: BootTime))
328 return std::move(E);
329
330 bool Recovered = false;
331 bool Skipped = false;
332 std::string LogValidationError;
333
334 llvm::scope_exit Log([&] {
335 if (!Logger)
336 return;
337 Logger->logUnifiedOnDiskCacheValidateIfNeeded(
338 Path: RootPath, BootTime, ValidationTime: ValidationBootTime, CheckHash, AllowRecovery,
339 Force: ForceValidation, LLVMCas: LLVMCasBinaryPath, ValidationError: LogValidationError, Skipped,
340 Recovered);
341 });
342
343 if (ValidationBootTime == BootTime && !ForceValidation) {
344 Skipped = true;
345 return ValidationResult::Skipped;
346 }
347
348 // Validate!
349 bool NeedsRecovery = false;
350 Error E = LLVMCasBinaryPath
351 ? validateOutOfProcess(LLVMCasBinary: *LLVMCasBinaryPath, RootPath, CheckHash)
352 : validateInProcess(RootPath, HashName, HashByteSize, CheckHash,
353 HashFn);
354 if (E) {
355 if (Logger)
356 LogValidationError = toStringWithoutConsuming(E);
357 if (AllowRecovery) {
358 consumeError(Err: std::move(E));
359 NeedsRecovery = true;
360 } else {
361 return std::move(E);
362 }
363 }
364
365 if (NeedsRecovery) {
366 sys::path::remove_filename(path&: PathBuf);
367 sys::path::append(path&: PathBuf, a: "lock");
368
369 int LockFD = -1;
370 if (std::error_code EC = sys::fs::openFileForReadWrite(
371 Name: PathBuf, ResultFD&: LockFD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
372 return createFileError(F: PathBuf, EC);
373 sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(FD: LockFD);
374 llvm::scope_exit CloseLock([&]() { sys::fs::closeFile(F&: LockFile); });
375 if (std::error_code EC = tryLockFileThreadSafe(FD: LockFD)) {
376 if (EC == std::errc::no_lock_available)
377 return createFileError(
378 F: PathBuf, EC,
379 Fmt: "CAS validation requires exclusive access but CAS was in use");
380 return createFileError(F: PathBuf, EC);
381 }
382 llvm::scope_exit UnlockFD([&]() { unlockFileThreadSafe(FD: LockFD); });
383
384 auto DBDirs = getAllDBDirs(Path: RootPath);
385 if (!DBDirs)
386 return DBDirs.takeError();
387
388 for (StringRef DBDir : *DBDirs) {
389 sys::path::remove_filename(path&: PathBuf);
390 sys::path::append(path&: PathBuf, a: DBDir);
391 std::error_code EC;
392 int Attempt = 0, MaxAttempts = 100;
393 SmallString<128> GCPath;
394 for (; Attempt < MaxAttempts; ++Attempt) {
395 GCPath.assign(RHS: RootPath);
396 sys::path::append(path&: GCPath, a: CorruptPrefix + std::to_string(val: Attempt) +
397 "." + DBDir);
398 EC = sys::fs::rename(from: PathBuf, to: GCPath);
399 // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
400 if (EC != errc::directory_not_empty && EC != errc::file_exists)
401 break;
402 }
403 if (Attempt == MaxAttempts)
404 return createStringError(
405 EC, S: "rename " + PathBuf +
406 " failed: too many CAS directories awaiting pruning");
407 if (EC)
408 return createStringError(EC, S: "rename " + PathBuf + " to " + GCPath +
409 " failed: " + EC.message());
410 }
411 Recovered = true;
412 }
413
414 if (ValidationBootTime != BootTime) {
415 // Fix filename in case we have error to report.
416 sys::path::remove_filename(path&: PathBuf);
417 sys::path::append(path&: PathBuf, a: ValidationFilename);
418 if (std::error_code EC = sys::fs::resize_file(FD, Size: 0))
419 return createFileError(F: PathBuf, EC);
420 raw_fd_ostream OS(FD, /*shouldClose=*/false);
421 OS.seek(off: 0); // resize does not reset position
422 OS << BootTime << '\n';
423 if (OS.has_error())
424 return createFileError(F: PathBuf, EC: OS.error());
425 }
426
427 return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
428}
429
430Expected<std::unique_ptr<UnifiedOnDiskCache>>
431UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
432 StringRef HashName, unsigned HashByteSize,
433 OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
434 auto BypassSandbox = sys::sandbox::scopedDisable();
435
436 if (std::error_code EC = sys::fs::create_directories(path: RootPath))
437 return createFileError(F: RootPath, EC);
438
439 SmallString<256> PathBuf(RootPath);
440 sys::path::append(path&: PathBuf, a: "lock");
441 int LockFD = -1;
442 if (std::error_code EC = sys::fs::openFileForReadWrite(
443 Name: PathBuf, ResultFD&: LockFD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
444 return createFileError(F: PathBuf, EC);
445 assert(LockFD != -1);
446 // Locking the directory using shared lock, which will prevent other processes
447 // from creating a new chain (essentially while a \p UnifiedOnDiskCache
448 // instance holds a shared lock the storage for the primary directory will
449 // grow unrestricted).
450 if (std::error_code EC =
451 lockFileThreadSafe(FD: LockFD, Kind: sys::fs::LockKind::Shared))
452 return createFileError(F: PathBuf, EC);
453
454 auto DBDirs = getAllDBDirs(Path: RootPath);
455 if (!DBDirs)
456 return DBDirs.takeError();
457 if (DBDirs->empty())
458 DBDirs->push_back(Elt: (Twine(DBDirPrefix) + "1").str());
459
460 std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
461#ifndef _WIN32
462 if (Error E =
463 ondisk::OnDiskCASLogger::openIfEnabled(Path: RootPath).moveInto(Value&: Logger))
464 return std::move(E);
465#endif
466
467 /// If there is only one directory open databases on it. If there are 2 or
468 /// more directories, get the most recent directories and chain them, with the
469 /// most recent being the primary one. The remaining directories are unused
470 /// data than can be garbage-collected.
471 auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
472 std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
473 std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
474 if (DBDirs->size() > 1) {
475 StringRef UpstreamDir = *(DBDirs->end() - 2);
476 PathBuf = RootPath;
477 sys::path::append(path&: PathBuf, a: UpstreamDir);
478 if (Error E =
479 OnDiskGraphDB::open(Path: PathBuf, HashName, HashByteSize,
480 /*UpstreamDB=*/nullptr, Logger, Policy: FaultInPolicy)
481 .moveInto(Value&: UpstreamGraphDB))
482 return std::move(E);
483 if (Error E = OnDiskKeyValueDB::open(Path: PathBuf, HashName, KeySize: HashByteSize,
484 /*ValueName=*/"objectid",
485 /*ValueSize=*/sizeof(uint64_t),
486 /*UnifiedCache=*/nullptr, Logger)
487 .moveInto(Value&: UpstreamKVDB))
488 return std::move(E);
489 }
490
491 StringRef PrimaryDir = *(DBDirs->end() - 1);
492 PathBuf = RootPath;
493 sys::path::append(path&: PathBuf, a: PrimaryDir);
494 std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
495 if (Error E =
496 OnDiskGraphDB::open(Path: PathBuf, HashName, HashByteSize,
497 UpstreamDB: UpstreamGraphDB.get(), Logger, Policy: FaultInPolicy)
498 .moveInto(Value&: PrimaryGraphDB))
499 return std::move(E);
500 std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
501 // \p UnifiedOnDiskCache does manual chaining for key-value requests,
502 // including an extra translation step of the value during fault-in.
503 if (Error E = OnDiskKeyValueDB::open(Path: PathBuf, HashName, KeySize: HashByteSize,
504 /*ValueName=*/"objectid",
505 /*ValueSize=*/sizeof(uint64_t),
506 UnifiedCache: UniDB.get(), Logger)
507 .moveInto(Value&: PrimaryKVDB))
508 return std::move(E);
509
510 UniDB->RootPath = RootPath;
511 UniDB->SizeLimit = SizeLimit.value_or(u: 0);
512 UniDB->LockFD = LockFD;
513 UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
514 UniDB->PrimaryDBDir = PrimaryDir;
515 UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
516 UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
517 UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
518 UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
519 UniDB->Logger = std::move(Logger);
520
521 return std::move(UniDB);
522}
523
524void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
525 this->SizeLimit = SizeLimit.value_or(u: 0);
526}
527
528uint64_t UnifiedOnDiskCache::getStorageSize() const {
529 uint64_t TotalSize = getPrimaryStorageSize();
530 if (UpstreamGraphDB)
531 TotalSize += UpstreamGraphDB->getStorageSize();
532 if (UpstreamKVDB)
533 TotalSize += UpstreamKVDB->getStorageSize();
534 return TotalSize;
535}
536
537uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
538 return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
539}
540
541bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
542 uint64_t CurSizeLimit = SizeLimit;
543 if (!CurSizeLimit)
544 return false;
545
546 // If the hard limit is beyond 85%, declare above limit and request clean up.
547 unsigned CurrentPercent =
548 std::max(a: PrimaryGraphDB->getHardStorageLimitUtilization(),
549 b: PrimaryKVDB->getHardStorageLimitUtilization());
550 if (CurrentPercent > 85)
551 return true;
552
553 // We allow each of the directories in the chain to reach up to half the
554 // intended size limit. Check whether the primary directory has exceeded half
555 // the limit or not, in order to decide whether we need to start a new chain.
556 //
557 // We could check the size limit against the sum of sizes of both the primary
558 // and upstream directories but then if the upstream is significantly larger
559 // than the intended limit, it would trigger a new chain to be created before
560 // the primary has reached its own limit. Essentially in such situation we
561 // prefer reclaiming the storage later in order to have more consistent cache
562 // hits behavior.
563 return (CurSizeLimit / 2) < getPrimaryStorageSize();
564}
565
566Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
567 auto BypassSandbox = sys::sandbox::scopedDisable();
568
569 if (LockFD == -1)
570 return Error::success(); // already closed.
571 llvm::scope_exit CloseLock([&]() {
572 assert(LockFD >= 0);
573 sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(FD: LockFD);
574 sys::fs::closeFile(F&: LockFile);
575 LockFD = -1;
576 });
577
578 bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
579 UpstreamKVDB.reset();
580 PrimaryKVDB.reset();
581 UpstreamGraphDB.reset();
582 PrimaryGraphDB.reset();
583 if (std::error_code EC = unlockFileThreadSafe(FD: LockFD))
584 return createFileError(F: RootPath, EC);
585
586 if (!ExceededSizeLimit)
587 return Error::success();
588
589 // The primary directory exceeded its intended size limit. Try to get an
590 // exclusive lock in order to create a new primary directory for next time
591 // this \p UnifiedOnDiskCache path is opened.
592
593 if (std::error_code EC = tryLockFileThreadSafe(
594 FD: LockFD, Timeout: std::chrono::milliseconds(0), Kind: sys::fs::LockKind::Exclusive)) {
595 if (EC == errc::no_lock_available)
596 return Error::success(); // couldn't get exclusive lock, give up.
597 return createFileError(F: RootPath, EC);
598 }
599 llvm::scope_exit UnlockFile([&]() { unlockFileThreadSafe(FD: LockFD); });
600
601 // Managed to get an exclusive lock which means there are no other open
602 // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
603 // new primary directory. To start a new primary directory we just have to
604 // create a new empty directory with the next consecutive index; since this is
605 // an atomic operation we will leave the top-level directory in a consistent
606 // state even if the process dies during this code-path.
607
608 SmallString<256> PathBuf(RootPath);
609 raw_svector_ostream OS(PathBuf);
610 OS << sys::path::get_separator();
611 getNextDBDirName(DBDir: PrimaryDBDir, OS);
612 if (std::error_code EC = sys::fs::create_directory(path: PathBuf))
613 return createFileError(F: PathBuf, EC);
614
615 NeedsGarbageCollection = true;
616 return Error::success();
617}
618
619UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
620
621UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(Err: close()); }
622
623Error UnifiedOnDiskCache::collectGarbage(StringRef Path,
624 ondisk::OnDiskCASLogger *Logger) {
625 auto DBDirs = getAllGarbageDirs(Path);
626 if (!DBDirs)
627 return DBDirs.takeError();
628
629 SmallString<256> PathBuf(Path);
630 for (StringRef UnusedSubDir : *DBDirs) {
631 sys::path::append(path&: PathBuf, a: UnusedSubDir);
632 if (Logger)
633 Logger->logUnifiedOnDiskCacheCollectGarbage(Path: PathBuf);
634 if (std::error_code EC = sys::fs::remove_directories(path: PathBuf))
635 return createFileError(F: PathBuf, EC);
636 sys::path::remove_filename(path&: PathBuf);
637 }
638 return Error::success();
639}
640
641Error UnifiedOnDiskCache::collectGarbage() {
642 return collectGarbage(Path: RootPath, Logger: Logger.get());
643}
644