| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file Implements MappedFileRegionArena. |
| 9 | /// |
| 10 | /// A bump pointer allocator, backed by a memory-mapped file. |
| 11 | /// |
| 12 | /// The effect we want is: |
| 13 | /// |
| 14 | /// Step 1. If it doesn't exist, create the file with an initial size. |
| 15 | /// Step 2. Reserve virtual memory large enough for the max file size. |
| 16 | /// Step 3. Map the file into memory in the reserved region. |
| 17 | /// Step 4. Increase the file size and update the mapping when necessary. |
| 18 | /// |
| 19 | /// However, updating the mapping is challenging when it needs to work portably, |
| 20 | /// and across multiple processes without locking for every read. Our current |
| 21 | /// implementation handles the steps above in following ways: |
| 22 | /// |
| 23 | /// Step 1. Use \ref sys::fs::resize_file_sparse to grow the file to its max |
| 24 | /// size (typically several GB). If the file system doesn't support |
| 25 | /// sparse file, this may return a fully allocated file. |
| 26 | /// Step 2. Call \ref sys::fs::mapped_file_region to map the entire file. |
| 27 | /// Step 3. [Automatic as part of step 2.] |
| 28 | /// Step 4. If supported, use \c fallocate or similiar APIs to ensure the file |
| 29 | /// system storage for the sparse file so we won't end up with partial |
| 30 | /// file if the disk is out of space. |
| 31 | /// |
| 32 | /// Additionally, we attempt to resize the file to its actual data size when |
| 33 | /// closing the mapping, if this is the only concurrent instance. This is done |
| 34 | /// using file locks. Shrinking the file mitigates problems with having large |
| 35 | /// files: on filesystems without sparse files it avoids unnecessary space use; |
| 36 | /// it also avoids allocating the full size if another process copies the file, |
| 37 | /// which typically loses sparseness. These mitigations only work while the file |
| 38 | /// is not in use. |
| 39 | /// |
| 40 | /// The capacity and the header offset is determined by the first user of the |
| 41 | /// MappedFileRegionArena instance and any future mismatched value from the |
| 42 | /// original will result in error on creation. |
| 43 | /// |
| 44 | /// To support resizing, we use two separate file locks: |
| 45 | /// 1. We use a shared reader lock on a ".shared" file until destruction. |
| 46 | /// 2. We use a lock on the main file during initialization - shared to check |
| 47 | /// the status, upgraded to exclusive to resize/initialize the file. |
| 48 | /// |
| 49 | /// Then during destruction we attempt to get exclusive access on (1), which |
| 50 | /// requires no concurrent readers. If so, we shrink the file. Using two |
| 51 | /// separate locks simplifies the implementation and enables it to work on |
| 52 | /// platforms (e.g. Windows) where a shared/reader lock prevents writing. |
| 53 | //===----------------------------------------------------------------------===// |
| 54 | |
| 55 | #include "llvm/CAS/MappedFileRegionArena.h" |
| 56 | #include "OnDiskCommon.h" |
| 57 | #include "llvm/ADT/StringExtras.h" |
| 58 | #include "llvm/CAS/OnDiskCASLogger.h" |
| 59 | |
| 60 | #if LLVM_ON_UNIX |
| 61 | #include <sys/stat.h> |
| 62 | #if __has_include(<sys/param.h>) |
| 63 | #include <sys/param.h> |
| 64 | #endif |
| 65 | #ifdef DEV_BSIZE |
| 66 | #define MAPPED_FILE_BSIZE DEV_BSIZE |
| 67 | #elif __linux__ |
| 68 | #define MAPPED_FILE_BSIZE 512 |
| 69 | #endif |
| 70 | #endif |
| 71 | |
| 72 | using namespace llvm; |
| 73 | using namespace llvm::cas; |
| 74 | using namespace llvm::cas::ondisk; |
| 75 | |
| 76 | namespace { |
| 77 | struct FileWithLock { |
| 78 | std::string Path; |
| 79 | int FD = -1; |
| 80 | std::optional<sys::fs::LockKind> Locked; |
| 81 | |
| 82 | private: |
| 83 | FileWithLock(std::string PathStr, Error &E) : Path(std::move(PathStr)) { |
| 84 | ErrorAsOutParameter EOP(&E); |
| 85 | if (std::error_code EC = sys::fs::openFileForReadWrite( |
| 86 | Name: Path, ResultFD&: FD, Disp: sys::fs::CD_OpenAlways, Flags: sys::fs::OF_None)) |
| 87 | E = createFileError(F: Path, EC); |
| 88 | } |
| 89 | |
| 90 | public: |
| 91 | FileWithLock(FileWithLock &) = delete; |
| 92 | FileWithLock(FileWithLock &&Other) { |
| 93 | Path = std::move(Other.Path); |
| 94 | FD = Other.FD; |
| 95 | Other.FD = -1; |
| 96 | Locked = Other.Locked; |
| 97 | Other.Locked = std::nullopt; |
| 98 | } |
| 99 | |
| 100 | ~FileWithLock() { consumeError(Err: unlock()); } |
| 101 | |
| 102 | static Expected<FileWithLock> open(StringRef Path) { |
| 103 | Error E = Error::success(); |
| 104 | FileWithLock Result(Path.str(), E); |
| 105 | if (E) |
| 106 | return std::move(E); |
| 107 | return std::move(Result); |
| 108 | } |
| 109 | |
| 110 | Error lock(sys::fs::LockKind LK) { |
| 111 | assert(!Locked && "already locked" ); |
| 112 | if (std::error_code EC = lockFileThreadSafe(FD, Kind: LK)) |
| 113 | return createFileError(F: Path, EC); |
| 114 | Locked = LK; |
| 115 | return Error::success(); |
| 116 | } |
| 117 | |
| 118 | Error switchLock(sys::fs::LockKind LK) { |
| 119 | assert(Locked && "not locked" ); |
| 120 | if (auto E = unlock()) |
| 121 | return E; |
| 122 | |
| 123 | return lock(LK); |
| 124 | } |
| 125 | |
| 126 | Error unlock() { |
| 127 | if (Locked) { |
| 128 | Locked = std::nullopt; |
| 129 | if (std::error_code EC = unlockFileThreadSafe(FD)) |
| 130 | return createFileError(F: Path, EC); |
| 131 | } |
| 132 | return Error::success(); |
| 133 | } |
| 134 | |
| 135 | // Return true if succeed to lock the file exclusively. |
| 136 | bool tryLockExclusive() { |
| 137 | assert(!Locked && "can only try to lock if not locked" ); |
| 138 | if (tryLockFileThreadSafe(FD) == std::error_code()) { |
| 139 | Locked = sys::fs::LockKind::Exclusive; |
| 140 | return true; |
| 141 | } |
| 142 | |
| 143 | return false; |
| 144 | } |
| 145 | |
| 146 | // Release the lock so it will not be unlocked on destruction. |
| 147 | void release() { |
| 148 | Locked = std::nullopt; |
| 149 | FD = -1; |
| 150 | } |
| 151 | }; |
| 152 | |
| 153 | struct FileSizeInfo { |
| 154 | uint64_t Size; |
| 155 | uint64_t AllocatedSize; |
| 156 | |
| 157 | static ErrorOr<FileSizeInfo> get(sys::fs::file_t File); |
| 158 | }; |
| 159 | } // end anonymous namespace |
| 160 | |
| 161 | Expected<MappedFileRegionArena> MappedFileRegionArena::create( |
| 162 | const Twine &Path, uint64_t Capacity, uint64_t , |
| 163 | std::shared_ptr<ondisk::OnDiskCASLogger> Logger, |
| 164 | function_ref<Error(MappedFileRegionArena &)> NewFileConstructor) { |
| 165 | uint64_t MinCapacity = HeaderOffset + sizeof(Header); |
| 166 | if (Capacity < MinCapacity) |
| 167 | return createStringError( |
| 168 | EC: std::make_error_code(e: std::errc::invalid_argument), |
| 169 | S: "capacity is too small to hold MappedFileRegionArena" ); |
| 170 | |
| 171 | MappedFileRegionArena Result; |
| 172 | Result.Path = Path.str(); |
| 173 | Result.Logger = std::move(Logger); |
| 174 | |
| 175 | // Open the support file. See file comment for details of locking scheme. |
| 176 | SmallString<128> SharedFilePath(Result.Path); |
| 177 | SharedFilePath.append(RHS: ".shared" ); |
| 178 | |
| 179 | auto SharedFileLock = FileWithLock::open(Path: SharedFilePath); |
| 180 | if (!SharedFileLock) |
| 181 | return SharedFileLock.takeError(); |
| 182 | Result.SharedLockFD = SharedFileLock->FD; |
| 183 | |
| 184 | // Take shared/reader lock that will be held until destroyImpl if construction |
| 185 | // is successful. |
| 186 | if (auto E = SharedFileLock->lock(LK: sys::fs::LockKind::Shared)) |
| 187 | return std::move(E); |
| 188 | |
| 189 | // Take shared/reader lock for initialization. |
| 190 | auto MainFile = FileWithLock::open(Path: Result.Path); |
| 191 | if (!MainFile) |
| 192 | return MainFile.takeError(); |
| 193 | if (Error E = MainFile->lock(LK: sys::fs::LockKind::Shared)) |
| 194 | return std::move(E); |
| 195 | Result.FD = MainFile->FD; |
| 196 | |
| 197 | sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD: MainFile->FD); |
| 198 | auto FileSize = FileSizeInfo::get(File); |
| 199 | if (!FileSize) |
| 200 | return createFileError(F: Result.Path, EC: FileSize.getError()); |
| 201 | |
| 202 | // If the size is smaller than the capacity, we need to initialize the file. |
| 203 | // It maybe empty, or may have been shrunk during a previous close. |
| 204 | if (FileSize->Size < Capacity) { |
| 205 | // Lock the file exclusively so only one process will do the initialization. |
| 206 | if (Error E = MainFile->switchLock(LK: sys::fs::LockKind::Exclusive)) |
| 207 | return std::move(E); |
| 208 | // Retrieve the current size now that we have exclusive access. |
| 209 | FileSize = FileSizeInfo::get(File); |
| 210 | if (!FileSize) |
| 211 | return createFileError(F: Result.Path, EC: FileSize.getError()); |
| 212 | } |
| 213 | |
| 214 | if (FileSize->Size >= MinCapacity) { |
| 215 | // File is initialized. Read out the header to check for capacity and |
| 216 | // offset. |
| 217 | SmallVector<char, sizeof(Header)> (sizeof(Header)); |
| 218 | auto Size = sys::fs::readNativeFileSlice(FileHandle: File, Buf: HeaderContent, Offset: HeaderOffset); |
| 219 | if (!Size) |
| 220 | return Size.takeError(); |
| 221 | |
| 222 | Header H; |
| 223 | memcpy(dest: &H, src: HeaderContent.data(), n: sizeof(H)); |
| 224 | if (H.HeaderOffset != HeaderOffset) |
| 225 | return createStringError( |
| 226 | EC: std::make_error_code(e: std::errc::invalid_argument), |
| 227 | S: "specified header offset (" + utostr(X: HeaderOffset) + |
| 228 | ") does not match existing config (" + utostr(X: H.HeaderOffset) + |
| 229 | ")" ); |
| 230 | |
| 231 | // If the capacity doesn't match, use the existing capacity instead. |
| 232 | if (H.Capacity != Capacity) |
| 233 | Capacity = H.Capacity; |
| 234 | } |
| 235 | |
| 236 | // If the size is smaller than capacity, we need to resize the file. |
| 237 | if (FileSize->Size < Capacity) { |
| 238 | assert(MainFile->Locked == sys::fs::LockKind::Exclusive); |
| 239 | if (std::error_code EC = |
| 240 | sys::fs::resize_file_sparse(FD: MainFile->FD, Size: Capacity)) |
| 241 | return createFileError(F: Result.Path, EC); |
| 242 | if (Result.Logger) |
| 243 | Result.Logger->logMappedFileRegionArenaResizeFile( |
| 244 | Path: Result.Path, Before: FileSize->Size, After: Capacity); |
| 245 | } |
| 246 | |
| 247 | // Create the mapped region. |
| 248 | { |
| 249 | std::error_code EC; |
| 250 | sys::fs::mapped_file_region Map( |
| 251 | File, sys::fs::mapped_file_region::readwrite, Capacity, 0, EC); |
| 252 | if (EC) |
| 253 | return createFileError(F: Result.Path, EC); |
| 254 | Result.Region = std::move(Map); |
| 255 | } |
| 256 | |
| 257 | // Initialize the header. |
| 258 | Result.initializeHeader(HeaderOffset); |
| 259 | |
| 260 | if (FileSize->Size < MinCapacity) { |
| 261 | assert(MainFile->Locked == sys::fs::LockKind::Exclusive); |
| 262 | // If we need to fully initialize the file, call NewFileConstructor. |
| 263 | if (Error E = NewFileConstructor(Result)) |
| 264 | return std::move(E); |
| 265 | |
| 266 | Result.H->HeaderOffset.exchange(i: HeaderOffset); |
| 267 | Result.H->Capacity.exchange(i: Capacity); |
| 268 | } |
| 269 | |
| 270 | if (MainFile->Locked == sys::fs::LockKind::Exclusive) { |
| 271 | // If holding an exclusive lock, we might have resized the file and |
| 272 | // performed some read/write to the file. Query the file size again to make |
| 273 | // sure everything is up-to-date. Otherwise, FileSize info is already |
| 274 | // up-to-date. |
| 275 | FileSize = FileSizeInfo::get(File); |
| 276 | if (!FileSize) |
| 277 | return createFileError(F: Result.Path, EC: FileSize.getError()); |
| 278 | Result.H->AllocatedSize.exchange(i: FileSize->AllocatedSize); |
| 279 | } |
| 280 | |
| 281 | // Release the shared lock so it can be closed in destoryImpl(). |
| 282 | SharedFileLock->release(); |
| 283 | return std::move(Result); |
| 284 | } |
| 285 | |
| 286 | void MappedFileRegionArena::destroyImpl() { |
| 287 | if (!FD) |
| 288 | return; |
| 289 | |
| 290 | // Drop the shared lock indicating we are no longer accessing the file. |
| 291 | if (SharedLockFD) |
| 292 | (void)unlockFileThreadSafe(FD: *SharedLockFD); |
| 293 | |
| 294 | // Attempt to truncate the file if we can get exclusive access. Ignore any |
| 295 | // errors. |
| 296 | if (H) { |
| 297 | assert(SharedLockFD && "Must have shared lock file open" ); |
| 298 | if (tryLockFileThreadSafe(FD: *SharedLockFD) == std::error_code()) { |
| 299 | size_t Size = size(); |
| 300 | size_t Capacity = capacity(); |
| 301 | // sync to file system to make sure all contents are up-to-date. |
| 302 | (void)Region.sync(); |
| 303 | // unmap the file before resizing since that is the requirement for |
| 304 | // some platforms. |
| 305 | Region.unmap(); |
| 306 | (void)sys::fs::resize_file(FD: *FD, Size); |
| 307 | (void)unlockFileThreadSafe(FD: *SharedLockFD); |
| 308 | if (Logger) |
| 309 | Logger->logMappedFileRegionArenaResizeFile(Path, Before: Capacity, After: Size); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | auto Close = [](std::optional<int> &FD) { |
| 314 | if (FD) { |
| 315 | sys::fs::file_t File = sys::fs::convertFDToNativeFile(FD: *FD); |
| 316 | sys::fs::closeFile(F&: File); |
| 317 | FD = std::nullopt; |
| 318 | } |
| 319 | }; |
| 320 | |
| 321 | // Close the file and shared lock. |
| 322 | Close(FD); |
| 323 | Close(SharedLockFD); |
| 324 | |
| 325 | if (Logger) |
| 326 | Logger->logMappedFileRegionArenaClose(Path); |
| 327 | } |
| 328 | |
| 329 | void MappedFileRegionArena::(uint64_t ) { |
| 330 | assert(capacity() < (uint64_t)INT64_MAX && "capacity must fit in int64_t" ); |
| 331 | uint64_t = HeaderOffset + sizeof(decltype(*H)); |
| 332 | assert(HeaderEndOffset <= capacity() && |
| 333 | "Expected end offset to be pre-allocated" ); |
| 334 | assert(isAligned(Align::Of<decltype(*H)>(), HeaderOffset) && |
| 335 | "Expected end offset to be aligned" ); |
| 336 | H = reinterpret_cast<decltype(H)>(data() + HeaderOffset); |
| 337 | |
| 338 | uint64_t ExistingValue = 0; |
| 339 | if (!H->BumpPtr.compare_exchange_strong(i1&: ExistingValue, i2: HeaderEndOffset)) |
| 340 | assert(ExistingValue >= HeaderEndOffset && |
| 341 | "Expected 0, or past the end of the header itself" ); |
| 342 | if (Logger) |
| 343 | Logger->logMappedFileRegionArenaCreate(Path, FD: *FD, Region: data(), Capacity: capacity(), |
| 344 | Size: size()); |
| 345 | } |
| 346 | |
| 347 | static Error createAllocatorOutOfSpaceError() { |
| 348 | return createStringError(EC: std::make_error_code(e: std::errc::not_enough_memory), |
| 349 | S: "memory mapped file allocator is out of space" ); |
| 350 | } |
| 351 | |
| 352 | Expected<int64_t> MappedFileRegionArena::allocateOffset(uint64_t AllocSize) { |
| 353 | AllocSize = alignTo(Size: AllocSize, A: getAlign()); |
| 354 | uint64_t OldEnd = H->BumpPtr.fetch_add(i: AllocSize); |
| 355 | uint64_t NewEnd = OldEnd + AllocSize; |
| 356 | if (LLVM_UNLIKELY(NewEnd > capacity())) { |
| 357 | // Return the allocation. If the start already passed the end, that means |
| 358 | // some other concurrent allocations already consumed all the capacity. |
| 359 | // There is no need to return the original value. If the start was not |
| 360 | // passed the end, current allocation certainly bumped it passed the end. |
| 361 | // All other allocation afterwards must have failed and current allocation |
| 362 | // is in charge of return the allocation back to a valid value. |
| 363 | if (OldEnd <= capacity()) |
| 364 | (void)H->BumpPtr.exchange(i: OldEnd); |
| 365 | |
| 366 | if (Logger) |
| 367 | Logger->logMappedFileRegionArenaOom(Path, Capacity: capacity(), Size: OldEnd, AllocSize); |
| 368 | |
| 369 | return createAllocatorOutOfSpaceError(); |
| 370 | } |
| 371 | |
| 372 | uint64_t DiskSize = H->AllocatedSize; |
| 373 | if (LLVM_UNLIKELY(NewEnd > DiskSize)) { |
| 374 | uint64_t NewSize; |
| 375 | // The minimum increment is a page, but allocate more to amortize the cost. |
| 376 | constexpr uint64_t Increment = 1 * 1024 * 1024; // 1 MB |
| 377 | if (Error E = preallocateFileTail(FD: *FD, CurrentSize: DiskSize, NewSize: DiskSize + Increment) |
| 378 | .moveInto(Value&: NewSize)) |
| 379 | return std::move(E); |
| 380 | assert(NewSize >= DiskSize + Increment); |
| 381 | // FIXME: on Darwin this can under-count the size if there is a race to |
| 382 | // preallocate disk, because the semantics of F_PREALLOCATE are to add bytes |
| 383 | // to the end of the file, not to allocate up to a fixed size. |
| 384 | // Any discrepancy will be resolved the next time the file is truncated and |
| 385 | // then reopend. |
| 386 | while (DiskSize < NewSize) |
| 387 | H->AllocatedSize.compare_exchange_strong(i1&: DiskSize, i2: NewSize); |
| 388 | } |
| 389 | |
| 390 | if (Logger) |
| 391 | Logger->logMappedFileRegionArenaAllocate(Region: data(), Off: OldEnd, Size: AllocSize); |
| 392 | |
| 393 | return OldEnd; |
| 394 | } |
| 395 | |
| 396 | ErrorOr<FileSizeInfo> FileSizeInfo::get(sys::fs::file_t File) { |
| 397 | #if LLVM_ON_UNIX && defined(MAPPED_FILE_BSIZE) |
| 398 | struct stat Status; |
| 399 | int StatRet = ::fstat(fd: File, buf: &Status); |
| 400 | if (StatRet) |
| 401 | return errnoAsErrorCode(); |
| 402 | uint64_t AllocatedSize = uint64_t(Status.st_blksize) * MAPPED_FILE_BSIZE; |
| 403 | return FileSizeInfo{.Size: uint64_t(Status.st_size), .AllocatedSize: AllocatedSize}; |
| 404 | #else |
| 405 | // Fallback: assume the file is fully allocated. Note: this may result in |
| 406 | // data loss on out-of-space. |
| 407 | sys::fs::file_status Status; |
| 408 | if (std::error_code EC = sys::fs::status(File, Status)) |
| 409 | return EC; |
| 410 | return FileSizeInfo{Status.getSize(), Status.getSize()}; |
| 411 | #endif |
| 412 | } |
| 413 | |