1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file Implements MappedFileRegionArena.
9///
10/// A bump pointer allocator, backed by a memory-mapped file.
11///
12/// The effect we want is:
13///
14/// Step 1. If it doesn't exist, create the file with an initial size.
15/// Step 2. Reserve virtual memory large enough for the max file size.
16/// Step 3. Map the file into memory in the reserved region.
17/// Step 4. Increase the file size and update the mapping when necessary.
18///
19/// However, updating the mapping is challenging when it needs to work portably,
20/// and across multiple processes without locking for every read. Our current
21/// implementation handles the steps above in following ways:
22///
23/// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max
24/// size (typically several GB). If the file system doesn't support
25/// sparse file, this may return a fully allocated file.
26/// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file.
27/// Step 3. [Automatic as part of step 2.]
28/// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file
29/// system storage for the sparse file so we won't end up with partial
30/// file if the disk is out of space.
31///
32/// Additionally, we attempt to resize the file to its actual data size when
33/// closing the mapping, if this is the only concurrent instance. This is done
34/// using file locks. Shrinking the file mitigates problems with having large
35/// files: on filesystems without sparse files it avoids unnecessary space use;
36/// it also avoids allocating the full size if another process copies the file,
37/// which typically loses sparseness. These mitigations only work while the file
38/// is not in use.
39///
40/// The capacity and the header offset is determined by the first user of the
41/// MappedFileRegionArena instance and any future mismatched value from the
42/// original will result in error on creation.
43///
44/// To support resizing, we use two separate file locks:
45/// 1. We use a shared reader lock on a ".shared" file until destruction.
46/// 2. We use a lock on the main file during initialization - shared to check
47/// the status, upgraded to exclusive to resize/initialize the file.
48///
49/// Then during destruction we attempt to get exclusive access on (1), which
50/// requires no concurrent readers. If so, we shrink the file. Using two
51/// separate locks simplifies the implementation and enables it to work on
52/// platforms (e.g. Windows) where a shared/reader lock prevents writing.
53//===----------------------------------------------------------------------===//
54
55#include "llvm/CAS/MappedFileRegionArena.h"
56#include "OnDiskCommon.h"
57#include "llvm/ADT/StringExtras.h"
58#include "llvm/CAS/OnDiskCASLogger.h"
59#include "llvm/Support/Errno.h"
60
61#if LLVM_ON_UNIX
62#include <sys/stat.h>
63#if __has_include(<sys/param.h>)
64#include <sys/param.h>
65#endif
66#ifdef DEV_BSIZE
67#define MAPPED_FILE_BSIZE DEV_BSIZE
68#elif __linux__
69#define MAPPED_FILE_BSIZE 512
70#endif
71#endif
72
73using namespace llvm;
74using namespace llvm::cas;
75using namespace llvm::cas::ondisk;
76
77namespace {
78struct FileWithLock {
79 std::string Path;
80 int FD = -1;
81 std::optional<sys::fs::LockKind> Locked;
82
83private:
84 FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) {
85 ErrorAsOutParameter EOP(&E);
86 if (std::error_code EC = sys::fs::openFileForReadWrite(
87 Name: Path, ResultFD&: FD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None))
88 E = createFileError(F: Path, EC);
89 }
90
91public:
92 FileWithLock(FileWithLock &) = delete;
93 FileWithLock(FileWithLock &&Other) {
94 Path = std::move(Other.Path);
95 FD = Other.FD;
96 Other.FD = -1;
97 Locked = Other.Locked;
98 Other.Locked = std::nullopt;
99 }
100
101 ~FileWithLock() { consumeError(Err: unlock()); }
102
103 static Expected<FileWithLock> open(StringRef Path) {
104 Error E = Error::success();
105 FileWithLock Result(Path.str(), E);
106 if (E)
107 return std::move(E);
108 return std::move(Result);
109 }
110
111 Error lock(sys::fs::LockKind LK) {
112 assert(!Locked && "already locked");
113 if (std::error_code EC = lockFileThreadSafe(FD, Kind: LK))
114 return createFileError(F: Path, EC);
115 Locked = LK;
116 return Error::success();
117 }
118
119 Error switchLock(sys::fs::LockKind LK) {
120 assert(Locked && "not locked");
121 if (auto E = unlock())
122 return E;
123
124 return lock(LK);
125 }
126
127 Error unlock() {
128 if (Locked) {
129 Locked = std::nullopt;
130 if (std::error_code EC = unlockFileThreadSafe(FD))
131 return createFileError(F: Path, EC);
132 }
133 return Error::success();
134 }
135
136 // Return true if succeed to lock the file exclusively.
137 bool tryLockExclusive() {
138 assert(!Locked && "can only try to lock if not locked");
139 if (tryLockFileThreadSafe(FD) == std::error_code()) {
140 Locked = sys::fs::LockKind::Exclusive;
141 return true;
142 }
143
144 return false;
145 }
146
147 // Release the lock so it will not be unlocked on destruction.
148 void release() {
149 Locked = std::nullopt;
150 FD = -1;
151 }
152};
153
154struct FileSizeInfo {
155 uint64_t Size;
156 uint64_t AllocatedSize;
157
158 static ErrorOr<FileSizeInfo> get(sys::fs::file_t File);
159};
160} // end anonymous namespace
161
162Expected<MappedFileRegionArena> MappedFileRegionArena::create(
163 const Twine &Path, uint64_t Capacity, uint64_t HeaderOffset,
164 std::shared_ptr<ondisk::OnDiskCASLogger> Logger,
165 function_ref<Error(MappedFileRegionArena &)> NewFileConstructor) {
166 uint64_t MinCapacity = HeaderOffset + sizeof(Header);
167 if (Capacity < MinCapacity)
168 return createStringError(
169 EC: std::make_error_code(e: std::errc::invalid_argument),
170 S: "capacity is too small to hold MappedFileRegionArena");
171
172 MappedFileRegionArena Result;
173 Result.Path = Path.str();
174 Result.Logger = std::move(Logger);
175
176 // Open the support file. See file comment for details of locking scheme.
177 SmallString<128> SharedFilePath(Result.Path);
178 SharedFilePath.append(RHS: ".shared");
179
180 auto SharedFileLock = FileWithLock::open(Path: SharedFilePath);
181 if (!SharedFileLock)
182 return SharedFileLock.takeError();
183 Result.SharedLockFD = SharedFileLock->FD;
184
185 // Take shared/reader lock that will be held until destroyImpl if construction
186 // is successful.
187 if (auto E = SharedFileLock->lock(LK: sys::fs::LockKind::Shared))
188 return std::move(E);
189
190 // Take shared/reader lock for initialization.
191 auto MainFile = FileWithLock::open(Path: Result.Path);
192 if (!MainFile)
193 return MainFile.takeError();
194 if (Error E = MainFile->lock(LK: sys::fs::LockKind::Shared))
195 return std::move(E);
196 Result.FD = MainFile->FD;
197
198 sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD: MainFile->FD);
199 auto FileSize = FileSizeInfo::get(File);
200 if (!FileSize)
201 return createFileError(F: Result.Path, EC: FileSize.getError());
202
203 // If the size is smaller than the capacity, we need to initialize the file.
204 // It maybe empty, or may have been shrunk during a previous close.
205 if (FileSize->Size < Capacity) {
206 // Lock the file exclusively so only one process will do the initialization.
207 if (Error E = MainFile->switchLock(LK: sys::fs::LockKind::Exclusive))
208 return std::move(E);
209 // Retrieve the current size now that we have exclusive access.
210 FileSize = FileSizeInfo::get(File);
211 if (!FileSize)
212 return createFileError(F: Result.Path, EC: FileSize.getError());
213 }
214
215 if (FileSize->Size >= MinCapacity) {
216 // File is initialized. Read out the header to check for capacity and
217 // offset.
218 SmallVector<char, sizeof(Header)> HeaderContent(sizeof(Header));
219 auto Size = sys::fs::readNativeFileSlice(FileHandle: File, Buf: HeaderContent, Offset: HeaderOffset);
220 if (!Size)
221 return Size.takeError();
222
223 Header H;
224 memcpy(dest: &H, src: HeaderContent.data(), n: sizeof(H));
225 if (H.HeaderOffset != HeaderOffset)
226 return createStringError(
227 EC: std::make_error_code(e: std::errc::invalid_argument),
228 S: "specified header offset (" + utostr(X: HeaderOffset) +
229 ") does not match existing config (" + utostr(X: H.HeaderOffset) +
230 ")");
231
232 if (H.Capacity < MinCapacity)
233 return createStringError(
234 EC: std::make_error_code(e: std::errc::bad_file_descriptor),
235 S: "capacity inside the MappedFileRegionArena is too small");
236
237 // If the capacity doesn't match, use the existing capacity instead.
238 if (H.Capacity != Capacity)
239 Capacity = H.Capacity;
240 }
241
242 // If the size is smaller than capacity, we need to resize the file.
243 if (FileSize->Size < Capacity) {
244 // Acquire the exclusive lock before resizing the file. In the rare case
245 // when opening a large CAS using a small requested size, a shared lock
246 // needs to switch to an exclusive lock here.
247 if (MainFile->Locked != sys::fs::LockKind::Exclusive) {
248 if (Error E = MainFile->switchLock(LK: sys::fs::LockKind::Exclusive))
249 return std::move(E);
250 }
251 if (std::error_code EC =
252 sys::fs::resize_file_sparse(FD: MainFile->FD, Size: Capacity))
253 return createFileError(F: Result.Path, EC);
254 if (Result.Logger)
255 Result.Logger->logMappedFileRegionArenaResizeFile(
256 Path: Result.Path, Before: FileSize->Size, After: Capacity);
257 }
258
259 // Create the mapped region.
260 {
261 std::error_code EC;
262 const char *Name = nullptr;
263#ifdef _WIN32
264 // Give the file mapping a name to ensure the same mappings are
265 // shared across processes.
266 std::string MapName = Result.Path;
267 std::replace(MapName.begin(), MapName.end(), '\\', '/');
268 MapName = "Local\\" + MapName;
269 Name = MapName.c_str();
270#endif
271 sys::fs::mapped_file_region Map(
272 File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC, Name);
273 if (EC)
274 return createFileError(F: Result.Path, EC);
275 Result.Region = std::move(Map);
276 }
277
278 // Initialize the header.
279 if (Error E = Result.initializeHeader(HeaderOffset))
280 return std::move(E);
281
282 if (FileSize->Size < MinCapacity) {
283 assert(MainFile->Locked == sys::fs::LockKind::Exclusive);
284 // If we need to fully initialize the file, call NewFileConstructor.
285 if (Error E = NewFileConstructor(Result))
286 return std::move(E);
287
288 Result.H->HeaderOffset.exchange(i: HeaderOffset);
289 Result.H->Capacity.exchange(i: Capacity);
290 }
291
292 if (MainFile->Locked == sys::fs::LockKind::Exclusive) {
293 // If holding an exclusive lock, we might have resized the file and
294 // performed some read/write to the file. Query the file size again to make
295 // sure everything is up-to-date. Otherwise, FileSize info is already
296 // up-to-date.
297 FileSize = FileSizeInfo::get(File);
298 if (!FileSize)
299 return createFileError(F: Result.Path, EC: FileSize.getError());
300 Result.H->AllocatedSize.exchange(i: FileSize->AllocatedSize);
301 }
302
303 // Release the shared lock so it can be closed in destoryImpl().
304 SharedFileLock->release();
305 return std::move(Result);
306}
307
308void MappedFileRegionArena::destroyImpl() {
309 if (!FD)
310 return;
311
312 // Drop the shared lock indicating we are no longer accessing the file.
313 if (SharedLockFD)
314 (void)unlockFileThreadSafe(FD: *SharedLockFD);
315
316 // Attempt to truncate the file if we can get exclusive access. Ignore any
317 // errors.
318 if (H) {
319 assert(SharedLockFD && "Must have shared lock file open");
320 if (tryLockFileThreadSafe(FD: *SharedLockFD) == std::error_code()) {
321 size_t Size = size();
322 size_t Capacity = capacity();
323 // sync to file system to make sure all contents are up-to-date.
324 (void)Region.sync();
325 // unmap the file before resizing since that is the requirement for
326 // some platforms.
327 Region.unmap();
328 (void)sys::fs::resize_file(FD: *FD, Size);
329 (void)unlockFileThreadSafe(FD: *SharedLockFD);
330 if (Logger)
331 Logger->logMappedFileRegionArenaResizeFile(Path, Before: Capacity, After: Size);
332 }
333 }
334
335 auto Close = [](std::optional<int> &FD) {
336 if (FD) {
337 sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD: *FD);
338 sys::fs::closeFile(F&: File);
339 FD = std::nullopt;
340 }
341 };
342
343 // Close the file and shared lock.
344 Close(FD);
345 Close(SharedLockFD);
346
347 if (Logger)
348 Logger->logMappedFileRegionArenaClose(Path);
349}
350
351Error MappedFileRegionArena::initializeHeader(uint64_t HeaderOffset) {
352 if (capacity() >= static_cast<uint64_t>(INT64_MAX))
353 return createStringError(EC: make_error_code(e: std::errc::protocol_error),
354 S: "arena capacity does not fit in int64_t");
355 uint64_t HeaderEndOffset = HeaderOffset + sizeof(decltype(*H));
356 if (HeaderEndOffset > capacity())
357 return createStringError(EC: make_error_code(e: std::errc::protocol_error),
358 S: "arena header extends past capacity");
359 if (!isAligned(Lhs: Align::Of<decltype(*H)>(), SizeInBytes: HeaderOffset))
360 return createStringError(EC: make_error_code(e: std::errc::protocol_error),
361 S: "arena header offset is not aligned");
362 H = reinterpret_cast<decltype(H)>(data() + HeaderOffset);
363
364 uint64_t ExistingValue = 0;
365 if (!H->BumpPtr.compare_exchange_strong(i1&: ExistingValue, i2: HeaderEndOffset))
366 if (ExistingValue < HeaderEndOffset)
367 return createStringError(
368 EC: make_error_code(e: std::errc::protocol_error),
369 S: "arena bump pointer is corrupt: 0x" +
370 utohexstr(X: ExistingValue, /*LowerCase=*/true));
371 if (Logger)
372 Logger->logMappedFileRegionArenaCreate(Path, FD: *FD, Region: data(), Capacity: capacity(),
373 Size: size());
374 return Error::success();
375}
376
377static Error createAllocatorOutOfSpaceError() {
378 return createStringError(EC: std::make_error_code(e: std::errc::not_enough_memory),
379 S: "memory mapped file allocator is out of space");
380}
381
382Expected<int64_t> MappedFileRegionArena::allocateOffset(uint64_t AllocSize) {
383 AllocSize = alignTo(Size: AllocSize, A: getAlign());
384 uint64_t OldEnd = H->BumpPtr.fetch_add(i: AllocSize);
385 uint64_t NewEnd = OldEnd + AllocSize;
386 if (LLVM_UNLIKELY(NewEnd > capacity())) {
387 // Return the allocation. If the start already passed the end, that means
388 // some other concurrent allocations already consumed all the capacity.
389 // There is no need to return the original value. If the start was not
390 // passed the end, current allocation certainly bumped it passed the end.
391 // All other allocation afterwards must have failed and current allocation
392 // is in charge of return the allocation back to a valid value.
393 if (OldEnd <= capacity())
394 (void)H->BumpPtr.exchange(i: OldEnd);
395
396 if (Logger)
397 Logger->logMappedFileRegionArenaOom(Path, Capacity: capacity(), Size: OldEnd, AllocSize);
398
399 return createAllocatorOutOfSpaceError();
400 }
401
402 uint64_t DiskSize = H->AllocatedSize;
403 if (LLVM_UNLIKELY(NewEnd > DiskSize)) {
404 uint64_t NewSize;
405 // The minimum increment is a page, but allocate more to amortize the cost.
406 constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB
407 if (Error E = preallocateFileTail(FD: *FD, CurrentSize: DiskSize, NewSize: DiskSize + Increment)
408 .moveInto(Value&: NewSize))
409 return std::move(E);
410 assert(NewSize >= DiskSize + Increment);
411 // FIXME: on Darwin this can under-count the size if there is a race to
412 // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes
413 // to the end of the file, not to allocate up to a fixed size.
414 // Any discrepancy will be resolved the next time the file is truncated and
415 // then reopend.
416 while (DiskSize < NewSize)
417 H->AllocatedSize.compare_exchange_strong(i1&: DiskSize, i2: NewSize);
418 }
419
420 if (Logger)
421 Logger->logMappedFileRegionArenaAllocate(Region: data(), Off: OldEnd, Size: AllocSize);
422
423 return OldEnd;
424}
425
426ErrorOr<FileSizeInfo> FileSizeInfo::get(sys::fs::file_t File) {
427#if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE)
428 struct stat Status;
429 int StatRet = sys::RetryAfterSignal(Fail: -1, F&: ::fstat, As: File, As: &Status);
430 if (StatRet)
431 return errnoAsErrorCode();
432 uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE;
433 return FileSizeInfo{.Size: uint64_t(Status.st_size), .AllocatedSize: AllocatedSize};
434#else
435 // Fallback: assume the file is fully allocated. Note: this may result in
436 // data loss on out-of-space.
437 sys::fs::file_status Status;
438 if (std::error_code EC = sys::fs::status(File, Status))
439 return EC;
440 return FileSizeInfo{Status.getSize(), Status.getSize()};
441#endif
442}
443