1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
11/// directory while also restricting storage growth with a scheme of chaining
12/// the two most recent directories (primary & upstream), where the primary
13/// "faults-in" data from the upstream one. When the primary (most recent)
14/// directory exceeds its intended limit a new empty directory becomes the
15/// primary one.
16///
17/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
18/// receives) there are directories named like this:
19///
20/// 'v<version>.<x>'
21/// 'v<version>.<x+1>'
22/// 'v<version>.<x+2>'
23/// ...
24///
25/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
26/// the part after the dot is an increasing integer. The primary directory is
27/// the one with the highest integer and the upstream one is the directory
28/// before it. For example, if the sub-directories contained are:
29///
30/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
31///
32/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
33/// unused directories that can be safely deleted at any time and by any
34/// process.
35///
36/// Contained within the top-level directory is a file named "lock" which is
37/// used for processes to take shared or exclusive locks for the contents of the
38/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
39/// for the top-level directory; when it closes, if the primary sub-directory
40/// exceeded its limit, it attempts to get an exclusive lock in order to create
41/// a new empty primary directory; if it can't get the exclusive lock it gives
42/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
43/// again.
44///
45/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
46/// directory, by any process, the storage size in that directory will keep
47/// growing unrestricted. But the major benefit is that garbage-collection can
48/// be triggered on a directory concurrently, at any time and by any process,
49/// without affecting any active readers/writers in the same process or other
50/// processes.
51///
52/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
53/// the underlying on-disk storage. The low-level storage is designed to remain
54/// coherent across regular process crashes, but may be invalid after power loss
55/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
56/// validating the contents once per boot and can recover by marking invalid
57/// data for garbage collection.
58///
59/// The data recovery described above requires exclusive access to the CAS, and
60/// it is an error to attempt recovery if the CAS is open in any process/thread.
61/// In order to maximize backwards compatibility with tools that do not perform
62/// validation before opening the CAS, we do not attempt to get exclusive access
63/// until recovery is actually performed, meaning as long as the data is valid
64/// it will not conflict with concurrent use.
65//
66//===----------------------------------------------------------------------===//
67
68#include "llvm/CAS/UnifiedOnDiskCache.h"
69#include "OnDiskCommon.h"
70#include "llvm/ADT/STLExtras.h"
71#include "llvm/ADT/ScopeExit.h"
72#include "llvm/ADT/SmallString.h"
73#include "llvm/ADT/SmallVector.h"
74#include "llvm/ADT/StringExtras.h"
75#include "llvm/ADT/StringRef.h"
76#include "llvm/CAS/OnDiskCASLogger.h"
77#include "llvm/CAS/OnDiskGraphDB.h"
78#include "llvm/CAS/OnDiskKeyValueDB.h"
79#include "llvm/Support/Compiler.h"
80#include "llvm/Support/Errc.h"
81#include "llvm/Support/Error.h"
82#include "llvm/Support/FileSystem.h"
83#include "llvm/Support/FileUtilities.h"
84#include "llvm/Support/IOSandbox.h"
85#include "llvm/Support/MemoryBuffer.h"
86#include "llvm/Support/Path.h"
87#include "llvm/Support/Program.h"
88#include "llvm/Support/raw_ostream.h"
89#include <optional>
90
91using namespace llvm;
92using namespace llvm::cas;
93using namespace llvm::cas::ondisk;
94
95/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
96/// how to handle the leftover sub-directories of the previous version, within
97/// the \p UnifiedOnDiskCache::collectGarbage function.
98static constexpr StringLiteral DBDirPrefix = "v1.";
99
100static constexpr StringLiteral ValidationFilename = "v1.validation";
101static constexpr StringLiteral CorruptPrefix = "corrupt.";
102
103ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef<char> Value) {
104 // little endian encoded.
105 assert(Value.size() == sizeof(uint64_t));
106 return ObjectID::fromOpaqueData(Opaque: support::endian::read64le(P: Value.data()));
107}
108
109UnifiedOnDiskCache::ValueBytes
110UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) {
111 // little endian encoded.
112 UnifiedOnDiskCache::ValueBytes ValBytes;
113 static_assert(ValBytes.size() == sizeof(ID.getOpaqueData()));
114 support::endian::write64le(P: ValBytes.data(), V: ID.getOpaqueData());
115 return ValBytes;
116}
117
118Expected<std::optional<ArrayRef<char>>>
119UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef<uint8_t> Key) {
120 assert(UpstreamGraphDB);
121 assert(UpstreamKVDB);
122
123 std::optional<ArrayRef<char>> UpstreamValue;
124 if (Error E = UpstreamKVDB->get(Key).moveInto(Value&: UpstreamValue))
125 return std::move(E);
126 if (!UpstreamValue)
127 return std::nullopt;
128
129 // The value is the \p ObjectID in the context of the upstream
130 // \p OnDiskGraphDB instance. Translate it to the context of the primary
131 // \p OnDiskGraphDB instance.
132 ObjectID UpstreamID = getObjectIDFromValue(Value: *UpstreamValue);
133 auto PrimaryID =
134 PrimaryGraphDB->getReference(Hash: UpstreamGraphDB->getDigest(Ref: UpstreamID));
135 if (LLVM_UNLIKELY(!PrimaryID))
136 return PrimaryID.takeError();
137 return PrimaryKVDB->put(Key, Value: getValueFromObjectID(ID: *PrimaryID));
138}
139
140/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
141/// ascending order of the integer after the dot. Corrupt directories, if
142/// included, will come first.
143static Expected<SmallVector<std::string, 4>>
144getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) {
145 struct DBDir {
146 uint64_t Order;
147 std::string Name;
148 };
149 SmallVector<DBDir> FoundDBDirs;
150
151 std::error_code EC;
152 for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE;
153 DirI.increment(ec&: EC)) {
154 if (DirI->type() != sys::fs::file_type::directory_file)
155 continue;
156 StringRef SubDir = sys::path::filename(path: DirI->path());
157 if (IncludeCorrupt && SubDir.starts_with(Prefix: CorruptPrefix)) {
158 FoundDBDirs.push_back(Elt: {.Order: 0, .Name: std::string(SubDir)});
159 continue;
160 }
161 if (!SubDir.starts_with(Prefix: DBDirPrefix))
162 continue;
163 uint64_t Order;
164 if (SubDir.substr(Start: DBDirPrefix.size()).getAsInteger(Radix: 10, Result&: Order))
165 return createStringError(EC: inconvertibleErrorCode(),
166 S: "unexpected directory " + DirI->path());
167 FoundDBDirs.push_back(Elt: {.Order: Order, .Name: std::string(SubDir)});
168 }
169 if (EC)
170 return createFileError(F: Path, EC);
171
172 llvm::sort(C&: FoundDBDirs, Comp: [](const DBDir &LHS, const DBDir &RHS) -> bool {
173 return LHS.Order < RHS.Order;
174 });
175
176 SmallVector<std::string, 4> DBDirs;
177 for (DBDir &Dir : FoundDBDirs)
178 DBDirs.push_back(Elt: std::move(Dir.Name));
179 return DBDirs;
180}
181
182static Expected<SmallVector<std::string, 4>> getAllGarbageDirs(StringRef Path) {
183 auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true);
184 if (!DBDirs)
185 return DBDirs.takeError();
186
187 // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
188 // out how to handle the leftover sub-directories of the previous version.
189
190 for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) {
191 StringRef Back(DBDirs->back());
192 if (Back.starts_with(Prefix: CorruptPrefix))
193 break;
194 DBDirs->pop_back();
195 }
196 return *DBDirs;
197}
198
199/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
200/// 'v<version>.<x+1>' name.
201static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS) {
202 assert(DBDir.starts_with(DBDirPrefix));
203 uint64_t Count;
204 bool Failed = DBDir.substr(Start: DBDirPrefix.size()).getAsInteger(Radix: 10, Result&: Count);
205 assert(!Failed);
206 (void)Failed;
207 OS << DBDirPrefix << Count + 1;
208}
209
210static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath,
211 bool CheckHash) {
212 SmallVector<StringRef> Args{LLVMCasBinary, "-cas", RootPath, "-validate"};
213 if (CheckHash)
214 Args.push_back(Elt: "-check-hash");
215
216 llvm::SmallString<128> StdErrPath;
217 int StdErrFD = -1;
218 if (std::error_code EC = sys::fs::createTemporaryFile(
219 Prefix: "llvm-cas-validate-stderr", Suffix: "txt", ResultFD&: StdErrFD, ResultPath&: StdErrPath,
220 Flags: llvm::sys::fs::OF_Text))
221 return createStringError(EC, S: "failed to create temporary file");
222 FileRemover OutputRemover(StdErrPath.c_str());
223
224 std::optional<llvm::StringRef> Redirects[] = {
225 {""}, // stdin = /dev/null
226 {""}, // stdout = /dev/null
227 StdErrPath.str(),
228 };
229
230 std::string ErrMsg;
231 int Result =
232 sys::ExecuteAndWait(Program: LLVMCasBinary, Args, /*Env=*/std::nullopt, Redirects,
233 /*SecondsToWait=*/120, /*MemoryLimit=*/0, ErrMsg: &ErrMsg);
234
235 if (Result == -1)
236 return createStringError(S: "failed to exec " + join(R&: Args, Separator: " ") + ": " +
237 ErrMsg);
238 if (Result != 0) {
239 llvm::SmallString<64> Err("cas contents invalid");
240 if (!ErrMsg.empty()) {
241 Err += ": ";
242 Err += ErrMsg;
243 }
244 auto StdErrBuf = MemoryBuffer::getFile(Filename: StdErrPath.c_str());
245 if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
246 Err += ": ";
247 Err += (*StdErrBuf)->getBuffer();
248 }
249 return createStringError(S: Err);
250 }
251 return Error::success();
252}
253
254Error UnifiedOnDiskCache::validateActionCache() const {
255 return getKeyValueDB().validate();
256}
257
258static Error validateInProcess(StringRef RootPath, StringRef HashName,
259 unsigned HashByteSize, bool CheckHash,
260 OnDiskGraphDB::HashingFuncT HashFn) {
261 std::shared_ptr<UnifiedOnDiskCache> UniDB;
262 if (Error E = UnifiedOnDiskCache::open(Path: RootPath, SizeLimit: std::nullopt, HashName,
263 HashByteSize)
264 .moveInto(Value&: UniDB))
265 return E;
266 if (Error E = UniDB->getGraphDB().validate(Deep: CheckHash, Hasher: HashFn))
267 return E;
268 if (Error E = UniDB->validateActionCache())
269 return E;
270 return Error::success();
271}
272
273Expected<ValidationResult> UnifiedOnDiskCache::validateIfNeeded(
274 StringRef RootPath, StringRef HashName, unsigned HashByteSize,
275 bool CheckHash, OnDiskGraphDB::HashingFuncT HashFn, bool AllowRecovery,
276 bool ForceValidation, std::optional<StringRef> LLVMCasBinaryPath) {
277 if (std::error_code EC = sys::fs::create_directories(path: RootPath))
278 return createFileError(F: RootPath, EC);
279
280 SmallString<256> PathBuf(RootPath);
281 sys::path::append(path&: PathBuf, a: ValidationFilename);
282 int FD = -1;
283 if (std::error_code EC = sys::fs::openFileForReadWrite(
284 Name: PathBuf, ResultFD&: FD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
285 return createFileError(F: PathBuf, EC);
286 assert(FD != -1);
287
288 sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD);
289 llvm::scope_exit CloseFile([&]() { sys::fs::closeFile(F&: File); });
290
291 if (std::error_code EC = lockFileThreadSafe(FD, Kind: sys::fs::LockKind::Exclusive))
292 return createFileError(F: PathBuf, EC);
293 llvm::scope_exit UnlockFD([&]() { unlockFileThreadSafe(FD); });
294
295 std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
296#ifndef _WIN32
297 if (Error E =
298 ondisk::OnDiskCASLogger::openIfEnabled(Path: RootPath).moveInto(Value&: Logger))
299 return std::move(E);
300#endif
301
302 SmallString<8> Bytes;
303 if (Error E = sys::fs::readNativeFileToEOF(FileHandle: File, Buffer&: Bytes))
304 return createFileError(F: PathBuf, E: std::move(E));
305
306 uint64_t ValidationBootTime = 0;
307 if (!Bytes.empty() &&
308 StringRef(Bytes).trim().getAsInteger(Radix: 10, Result&: ValidationBootTime))
309 return createFileError(F: PathBuf, EC: errc::illegal_byte_sequence,
310 Fmt: "expected integer");
311
312 static uint64_t BootTime = 0;
313 if (BootTime == 0)
314 if (Error E = getBootTime().moveInto(Value&: BootTime))
315 return std::move(E);
316
317 bool Recovered = false;
318 bool Skipped = false;
319 std::string LogValidationError;
320
321 llvm::scope_exit Log([&] {
322 if (!Logger)
323 return;
324 Logger->logUnifiedOnDiskCacheValidateIfNeeded(
325 Path: RootPath, BootTime, ValidationTime: ValidationBootTime, CheckHash, AllowRecovery,
326 Force: ForceValidation, LLVMCas: LLVMCasBinaryPath, ValidationError: LogValidationError, Skipped,
327 Recovered);
328 });
329
330 if (ValidationBootTime == BootTime && !ForceValidation) {
331 Skipped = true;
332 return ValidationResult::Skipped;
333 }
334
335 // Validate!
336 bool NeedsRecovery = false;
337 Error E = LLVMCasBinaryPath
338 ? validateOutOfProcess(LLVMCasBinary: *LLVMCasBinaryPath, RootPath, CheckHash)
339 : validateInProcess(RootPath, HashName, HashByteSize, CheckHash,
340 HashFn);
341 if (E) {
342 if (Logger)
343 LogValidationError = toStringWithoutConsuming(E);
344 if (AllowRecovery) {
345 consumeError(Err: std::move(E));
346 NeedsRecovery = true;
347 } else {
348 return std::move(E);
349 }
350 }
351
352 if (NeedsRecovery) {
353 sys::path::remove_filename(path&: PathBuf);
354 sys::path::append(path&: PathBuf, a: "lock");
355
356 int LockFD = -1;
357 if (std::error_code EC = sys::fs::openFileForReadWrite(
358 Name: PathBuf, ResultFD&: LockFD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
359 return createFileError(F: PathBuf, EC);
360 sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(FD: LockFD);
361 llvm::scope_exit CloseLock([&]() { sys::fs::closeFile(F&: LockFile); });
362 if (std::error_code EC = tryLockFileThreadSafe(FD: LockFD)) {
363 if (EC == std::errc::no_lock_available)
364 return createFileError(
365 F: PathBuf, EC,
366 Fmt: "CAS validation requires exclusive access but CAS was in use");
367 return createFileError(F: PathBuf, EC);
368 }
369 llvm::scope_exit UnlockFD([&]() { unlockFileThreadSafe(FD: LockFD); });
370
371 auto DBDirs = getAllDBDirs(Path: RootPath);
372 if (!DBDirs)
373 return DBDirs.takeError();
374
375 for (StringRef DBDir : *DBDirs) {
376 sys::path::remove_filename(path&: PathBuf);
377 sys::path::append(path&: PathBuf, a: DBDir);
378 std::error_code EC;
379 int Attempt = 0, MaxAttempts = 100;
380 SmallString<128> GCPath;
381 for (; Attempt < MaxAttempts; ++Attempt) {
382 GCPath.assign(RHS: RootPath);
383 sys::path::append(path&: GCPath, a: CorruptPrefix + std::to_string(val: Attempt) +
384 "." + DBDir);
385 EC = sys::fs::rename(from: PathBuf, to: GCPath);
386 // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
387 if (EC != errc::directory_not_empty && EC != errc::file_exists)
388 break;
389 }
390 if (Attempt == MaxAttempts)
391 return createStringError(
392 EC, S: "rename " + PathBuf +
393 " failed: too many CAS directories awaiting pruning");
394 if (EC)
395 return createStringError(EC, S: "rename " + PathBuf + " to " + GCPath +
396 " failed: " + EC.message());
397 }
398 Recovered = true;
399 }
400
401 if (ValidationBootTime != BootTime) {
402 // Fix filename in case we have error to report.
403 sys::path::remove_filename(path&: PathBuf);
404 sys::path::append(path&: PathBuf, a: ValidationFilename);
405 if (std::error_code EC = sys::fs::resize_file(FD, Size: 0))
406 return createFileError(F: PathBuf, EC);
407 raw_fd_ostream OS(FD, /*shouldClose=*/false);
408 OS.seek(off: 0); // resize does not reset position
409 OS << BootTime << '\n';
410 if (OS.has_error())
411 return createFileError(F: PathBuf, EC: OS.error());
412 }
413
414 return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid;
415}
416
417Expected<std::unique_ptr<UnifiedOnDiskCache>>
418UnifiedOnDiskCache::open(StringRef RootPath, std::optional<uint64_t> SizeLimit,
419 StringRef HashName, unsigned HashByteSize,
420 OnDiskGraphDB::FaultInPolicy FaultInPolicy) {
421 auto BypassSandbox = sys::sandbox::scopedDisable();
422
423 if (std::error_code EC = sys::fs::create_directories(path: RootPath))
424 return createFileError(F: RootPath, EC);
425
426 SmallString<256> PathBuf(RootPath);
427 sys::path::append(path&: PathBuf, a: "lock");
428 int LockFD = -1;
429 if (std::error_code EC = sys::fs::openFileForReadWrite(
430 Name: PathBuf, ResultFD&: LockFD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
431 return createFileError(F: PathBuf, EC);
432 assert(LockFD != -1);
433 // Locking the directory using shared lock, which will prevent other processes
434 // from creating a new chain (essentially while a \p UnifiedOnDiskCache
435 // instance holds a shared lock the storage for the primary directory will
436 // grow unrestricted).
437 if (std::error_code EC =
438 lockFileThreadSafe(FD: LockFD, Kind: sys::fs::LockKind::Shared))
439 return createFileError(F: PathBuf, EC);
440
441 auto DBDirs = getAllDBDirs(Path: RootPath);
442 if (!DBDirs)
443 return DBDirs.takeError();
444 if (DBDirs->empty())
445 DBDirs->push_back(Elt: (Twine(DBDirPrefix) + "1").str());
446
447 std::shared_ptr<ondisk::OnDiskCASLogger> Logger;
448#ifndef _WIN32
449 if (Error E =
450 ondisk::OnDiskCASLogger::openIfEnabled(Path: RootPath).moveInto(Value&: Logger))
451 return std::move(E);
452#endif
453
454 /// If there is only one directory open databases on it. If there are 2 or
455 /// more directories, get the most recent directories and chain them, with the
456 /// most recent being the primary one. The remaining directories are unused
457 /// data than can be garbage-collected.
458 auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(new UnifiedOnDiskCache());
459 std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
460 std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
461 if (DBDirs->size() > 1) {
462 StringRef UpstreamDir = *(DBDirs->end() - 2);
463 PathBuf = RootPath;
464 sys::path::append(path&: PathBuf, a: UpstreamDir);
465 if (Error E =
466 OnDiskGraphDB::open(Path: PathBuf, HashName, HashByteSize,
467 /*UpstreamDB=*/nullptr, Logger, Policy: FaultInPolicy)
468 .moveInto(Value&: UpstreamGraphDB))
469 return std::move(E);
470 if (Error E = OnDiskKeyValueDB::open(Path: PathBuf, HashName, KeySize: HashByteSize,
471 /*ValueName=*/"objectid",
472 /*ValueSize=*/sizeof(uint64_t),
473 /*UnifiedCache=*/nullptr, Logger)
474 .moveInto(Value&: UpstreamKVDB))
475 return std::move(E);
476 }
477
478 StringRef PrimaryDir = *(DBDirs->end() - 1);
479 PathBuf = RootPath;
480 sys::path::append(path&: PathBuf, a: PrimaryDir);
481 std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
482 if (Error E =
483 OnDiskGraphDB::open(Path: PathBuf, HashName, HashByteSize,
484 UpstreamDB: UpstreamGraphDB.get(), Logger, Policy: FaultInPolicy)
485 .moveInto(Value&: PrimaryGraphDB))
486 return std::move(E);
487 std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
488 // \p UnifiedOnDiskCache does manual chaining for key-value requests,
489 // including an extra translation step of the value during fault-in.
490 if (Error E = OnDiskKeyValueDB::open(Path: PathBuf, HashName, KeySize: HashByteSize,
491 /*ValueName=*/"objectid",
492 /*ValueSize=*/sizeof(uint64_t),
493 UnifiedCache: UniDB.get(), Logger)
494 .moveInto(Value&: PrimaryKVDB))
495 return std::move(E);
496
497 UniDB->RootPath = RootPath;
498 UniDB->SizeLimit = SizeLimit.value_or(u: 0);
499 UniDB->LockFD = LockFD;
500 UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
501 UniDB->PrimaryDBDir = PrimaryDir;
502 UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
503 UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
504 UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
505 UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
506 UniDB->Logger = std::move(Logger);
507
508 return std::move(UniDB);
509}
510
511void UnifiedOnDiskCache::setSizeLimit(std::optional<uint64_t> SizeLimit) {
512 this->SizeLimit = SizeLimit.value_or(u: 0);
513}
514
515uint64_t UnifiedOnDiskCache::getStorageSize() const {
516 uint64_t TotalSize = getPrimaryStorageSize();
517 if (UpstreamGraphDB)
518 TotalSize += UpstreamGraphDB->getStorageSize();
519 if (UpstreamKVDB)
520 TotalSize += UpstreamKVDB->getStorageSize();
521 return TotalSize;
522}
523
524uint64_t UnifiedOnDiskCache::getPrimaryStorageSize() const {
525 return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
526}
527
528bool UnifiedOnDiskCache::hasExceededSizeLimit() const {
529 uint64_t CurSizeLimit = SizeLimit;
530 if (!CurSizeLimit)
531 return false;
532
533 // If the hard limit is beyond 85%, declare above limit and request clean up.
534 unsigned CurrentPercent =
535 std::max(a: PrimaryGraphDB->getHardStorageLimitUtilization(),
536 b: PrimaryKVDB->getHardStorageLimitUtilization());
537 if (CurrentPercent > 85)
538 return true;
539
540 // We allow each of the directories in the chain to reach up to half the
541 // intended size limit. Check whether the primary directory has exceeded half
542 // the limit or not, in order to decide whether we need to start a new chain.
543 //
544 // We could check the size limit against the sum of sizes of both the primary
545 // and upstream directories but then if the upstream is significantly larger
546 // than the intended limit, it would trigger a new chain to be created before
547 // the primary has reached its own limit. Essentially in such situation we
548 // prefer reclaiming the storage later in order to have more consistent cache
549 // hits behavior.
550 return (CurSizeLimit / 2) < getPrimaryStorageSize();
551}
552
553Error UnifiedOnDiskCache::close(bool CheckSizeLimit) {
554 auto BypassSandbox = sys::sandbox::scopedDisable();
555
556 if (LockFD == -1)
557 return Error::success(); // already closed.
558 llvm::scope_exit CloseLock([&]() {
559 assert(LockFD >= 0);
560 sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(FD: LockFD);
561 sys::fs::closeFile(F&: LockFile);
562 LockFD = -1;
563 });
564
565 bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false;
566 UpstreamKVDB.reset();
567 PrimaryKVDB.reset();
568 UpstreamGraphDB.reset();
569 PrimaryGraphDB.reset();
570 if (std::error_code EC = unlockFileThreadSafe(FD: LockFD))
571 return createFileError(F: RootPath, EC);
572
573 if (!ExceededSizeLimit)
574 return Error::success();
575
576 // The primary directory exceeded its intended size limit. Try to get an
577 // exclusive lock in order to create a new primary directory for next time
578 // this \p UnifiedOnDiskCache path is opened.
579
580 if (std::error_code EC = tryLockFileThreadSafe(
581 FD: LockFD, Timeout: std::chrono::milliseconds(0), Kind: sys::fs::LockKind::Exclusive)) {
582 if (EC == errc::no_lock_available)
583 return Error::success(); // couldn't get exclusive lock, give up.
584 return createFileError(F: RootPath, EC);
585 }
586 llvm::scope_exit UnlockFile([&]() { unlockFileThreadSafe(FD: LockFD); });
587
588 // Managed to get an exclusive lock which means there are no other open
589 // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
590 // new primary directory. To start a new primary directory we just have to
591 // create a new empty directory with the next consecutive index; since this is
592 // an atomic operation we will leave the top-level directory in a consistent
593 // state even if the process dies during this code-path.
594
595 SmallString<256> PathBuf(RootPath);
596 raw_svector_ostream OS(PathBuf);
597 OS << sys::path::get_separator();
598 getNextDBDirName(DBDir: PrimaryDBDir, OS);
599 if (std::error_code EC = sys::fs::create_directory(path: PathBuf))
600 return createFileError(F: PathBuf, EC);
601
602 NeedsGarbageCollection = true;
603 return Error::success();
604}
605
606UnifiedOnDiskCache::UnifiedOnDiskCache() = default;
607
608UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(Err: close()); }
609
610Error UnifiedOnDiskCache::collectGarbage(StringRef Path,
611 ondisk::OnDiskCASLogger *Logger) {
612 auto DBDirs = getAllGarbageDirs(Path);
613 if (!DBDirs)
614 return DBDirs.takeError();
615
616 SmallString<256> PathBuf(Path);
617 for (StringRef UnusedSubDir : *DBDirs) {
618 sys::path::append(path&: PathBuf, a: UnusedSubDir);
619 if (Logger)
620 Logger->logUnifiedOnDiskCacheCollectGarbage(Path: PathBuf);
621 if (std::error_code EC = sys::fs::remove_directories(path: PathBuf))
622 return createFileError(F: PathBuf, EC);
623 sys::path::remove_filename(path&: PathBuf);
624 }
625 return Error::success();
626}
627
628Error UnifiedOnDiskCache::collectGarbage() {
629 return collectGarbage(Path: RootPath, Logger: Logger.get());
630}
631