1 | //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the pruning of a directory based on least recently used. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Support/CachePruning.h" |
14 | #include "llvm/ADT/StringRef.h" |
15 | #include "llvm/Support/Debug.h" |
16 | #include "llvm/Support/Errc.h" |
17 | #include "llvm/Support/Error.h" |
18 | #include "llvm/Support/FileSystem.h" |
19 | #include "llvm/Support/Path.h" |
20 | #include "llvm/Support/WithColor.h" |
21 | #include "llvm/Support/raw_ostream.h" |
22 | |
23 | #define DEBUG_TYPE "cache-pruning" |
24 | |
25 | #include <set> |
26 | #include <system_error> |
27 | |
28 | using namespace llvm; |
29 | |
30 | namespace { |
31 | struct FileInfo { |
32 | sys::TimePoint<> Time; |
33 | uint64_t Size; |
34 | std::string Path; |
35 | |
36 | /// Used to determine which files to prune first. Also used to determine |
37 | /// set membership, so must take into account all fields. |
38 | bool operator<(const FileInfo &Other) const { |
39 | return std::tie(args: Time, args: Other.Size, args: Path) < |
40 | std::tie(args: Other.Time, args: Size, args: Other.Path); |
41 | } |
42 | }; |
43 | } // anonymous namespace |
44 | |
45 | /// Write a new timestamp file with the given path. This is used for the pruning |
46 | /// interval option. |
47 | static void writeTimestampFile(StringRef TimestampFile) { |
48 | std::error_code EC; |
49 | raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None); |
50 | } |
51 | |
52 | static Expected<std::chrono::seconds> parseDuration(StringRef Duration) { |
53 | if (Duration.empty()) |
54 | return make_error<StringError>(Args: "Duration must not be empty" , |
55 | Args: inconvertibleErrorCode()); |
56 | |
57 | StringRef NumStr = Duration.slice(Start: 0, End: Duration.size()-1); |
58 | uint64_t Num; |
59 | if (NumStr.getAsInteger(Radix: 0, Result&: Num)) |
60 | return make_error<StringError>(Args: "'" + NumStr + "' not an integer" , |
61 | Args: inconvertibleErrorCode()); |
62 | |
63 | switch (Duration.back()) { |
64 | case 's': |
65 | return std::chrono::seconds(Num); |
66 | case 'm': |
67 | return std::chrono::minutes(Num); |
68 | case 'h': |
69 | return std::chrono::hours(Num); |
70 | default: |
71 | return make_error<StringError>(Args: "'" + Duration + |
72 | "' must end with one of 's', 'm' or 'h'" , |
73 | Args: inconvertibleErrorCode()); |
74 | } |
75 | } |
76 | |
77 | Expected<CachePruningPolicy> |
78 | llvm::parseCachePruningPolicy(StringRef PolicyStr) { |
79 | CachePruningPolicy Policy; |
80 | std::pair<StringRef, StringRef> P = {"" , PolicyStr}; |
81 | while (!P.second.empty()) { |
82 | P = P.second.split(Separator: ':'); |
83 | |
84 | StringRef Key, Value; |
85 | std::tie(args&: Key, args&: Value) = P.first.split(Separator: '='); |
86 | if (Key == "prune_interval" ) { |
87 | auto DurationOrErr = parseDuration(Duration: Value); |
88 | if (!DurationOrErr) |
89 | return DurationOrErr.takeError(); |
90 | Policy.Interval = *DurationOrErr; |
91 | } else if (Key == "prune_after" ) { |
92 | auto DurationOrErr = parseDuration(Duration: Value); |
93 | if (!DurationOrErr) |
94 | return DurationOrErr.takeError(); |
95 | Policy.Expiration = *DurationOrErr; |
96 | } else if (Key == "cache_size" ) { |
97 | if (Value.back() != '%') |
98 | return make_error<StringError>(Args: "'" + Value + "' must be a percentage" , |
99 | Args: inconvertibleErrorCode()); |
100 | StringRef SizeStr = Value.drop_back(); |
101 | uint64_t Size; |
102 | if (SizeStr.getAsInteger(Radix: 0, Result&: Size)) |
103 | return make_error<StringError>(Args: "'" + SizeStr + "' not an integer" , |
104 | Args: inconvertibleErrorCode()); |
105 | if (Size > 100) |
106 | return make_error<StringError>(Args: "'" + SizeStr + |
107 | "' must be between 0 and 100" , |
108 | Args: inconvertibleErrorCode()); |
109 | Policy.MaxSizePercentageOfAvailableSpace = Size; |
110 | } else if (Key == "cache_size_bytes" ) { |
111 | uint64_t Mult = 1; |
112 | switch (tolower(c: Value.back())) { |
113 | case 'k': |
114 | Mult = 1024; |
115 | Value = Value.drop_back(); |
116 | break; |
117 | case 'm': |
118 | Mult = 1024 * 1024; |
119 | Value = Value.drop_back(); |
120 | break; |
121 | case 'g': |
122 | Mult = 1024 * 1024 * 1024; |
123 | Value = Value.drop_back(); |
124 | break; |
125 | } |
126 | uint64_t Size; |
127 | if (Value.getAsInteger(Radix: 0, Result&: Size)) |
128 | return make_error<StringError>(Args: "'" + Value + "' not an integer" , |
129 | Args: inconvertibleErrorCode()); |
130 | Policy.MaxSizeBytes = Size * Mult; |
131 | } else if (Key == "cache_size_files" ) { |
132 | if (Value.getAsInteger(Radix: 0, Result&: Policy.MaxSizeFiles)) |
133 | return make_error<StringError>(Args: "'" + Value + "' not an integer" , |
134 | Args: inconvertibleErrorCode()); |
135 | } else { |
136 | return make_error<StringError>(Args: "Unknown key: '" + Key + "'" , |
137 | Args: inconvertibleErrorCode()); |
138 | } |
139 | } |
140 | |
141 | return Policy; |
142 | } |
143 | |
144 | /// Prune the cache of files that haven't been accessed in a long time. |
145 | bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy, |
146 | const std::vector<std::unique_ptr<MemoryBuffer>> &Files) { |
147 | using namespace std::chrono; |
148 | |
149 | if (Path.empty()) |
150 | return false; |
151 | |
152 | bool isPathDir; |
153 | if (sys::fs::is_directory(path: Path, result&: isPathDir)) |
154 | return false; |
155 | |
156 | if (!isPathDir) |
157 | return false; |
158 | |
159 | Policy.MaxSizePercentageOfAvailableSpace = |
160 | std::min(a: Policy.MaxSizePercentageOfAvailableSpace, b: 100u); |
161 | |
162 | if (Policy.Expiration == seconds(0) && |
163 | Policy.MaxSizePercentageOfAvailableSpace == 0 && |
164 | Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) { |
165 | LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n" ); |
166 | // Nothing will be pruned, early exit |
167 | return false; |
168 | } |
169 | |
170 | // Try to stat() the timestamp file. |
171 | SmallString<128> TimestampFile(Path); |
172 | sys::path::append(path&: TimestampFile, a: "llvmcache.timestamp" ); |
173 | sys::fs::file_status FileStatus; |
174 | const auto CurrentTime = system_clock::now(); |
175 | if (auto EC = sys::fs::status(path: TimestampFile, result&: FileStatus)) { |
176 | if (EC == errc::no_such_file_or_directory) { |
177 | // If the timestamp file wasn't there, create one now. |
178 | writeTimestampFile(TimestampFile); |
179 | } else { |
180 | // Unknown error? |
181 | return false; |
182 | } |
183 | } else { |
184 | if (!Policy.Interval) |
185 | return false; |
186 | if (Policy.Interval != seconds(0)) { |
187 | // Check whether the time stamp is older than our pruning interval. |
188 | // If not, do nothing. |
189 | const auto TimeStampModTime = FileStatus.getLastModificationTime(); |
190 | auto TimeStampAge = CurrentTime - TimeStampModTime; |
191 | if (TimeStampAge <= *Policy.Interval) { |
192 | LLVM_DEBUG(dbgs() << "Timestamp file too recent (" |
193 | << duration_cast<seconds>(TimeStampAge).count() |
194 | << "s old), do not prune.\n" ); |
195 | return false; |
196 | } |
197 | } |
198 | // Write a new timestamp file so that nobody else attempts to prune. |
199 | // There is a benign race condition here, if two processes happen to |
200 | // notice at the same time that the timestamp is out-of-date. |
201 | writeTimestampFile(TimestampFile); |
202 | } |
203 | |
204 | // Keep track of files to delete to get below the size limit. |
205 | // Order by time of last use so that recently used files are preserved. |
206 | std::set<FileInfo> FileInfos; |
207 | uint64_t TotalSize = 0; |
208 | |
209 | // Walk the entire directory cache, looking for unused files. |
210 | std::error_code EC; |
211 | SmallString<128> CachePathNative; |
212 | sys::path::native(path: Path, result&: CachePathNative); |
213 | // Walk all of the files within this directory. |
214 | for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd; |
215 | File != FileEnd && !EC; File.increment(ec&: EC)) { |
216 | // Ignore filenames not beginning with "llvmcache-" or "Thin-". This |
217 | // includes the timestamp file as well as any files created by the user. |
218 | // This acts as a safeguard against data loss if the user specifies the |
219 | // wrong directory as their cache directory. |
220 | StringRef filename = sys::path::filename(path: File->path()); |
221 | if (!filename.starts_with(Prefix: "llvmcache-" ) && !filename.starts_with(Prefix: "Thin-" )) |
222 | continue; |
223 | |
224 | // Look at this file. If we can't stat it, there's nothing interesting |
225 | // there. |
226 | ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status(); |
227 | if (!StatusOrErr) { |
228 | LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n" ); |
229 | continue; |
230 | } |
231 | |
232 | // If the file hasn't been used recently enough, delete it |
233 | const auto FileAccessTime = StatusOrErr->getLastAccessedTime(); |
234 | auto FileAge = CurrentTime - FileAccessTime; |
235 | if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) { |
236 | LLVM_DEBUG(dbgs() << "Remove " << File->path() << " (" |
237 | << duration_cast<seconds>(FileAge).count() |
238 | << "s old)\n" ); |
239 | sys::fs::remove(path: File->path()); |
240 | continue; |
241 | } |
242 | |
243 | // Leave it here for now, but add it to the list of size-based pruning. |
244 | TotalSize += StatusOrErr->getSize(); |
245 | FileInfos.insert(x: {.Time: FileAccessTime, .Size: StatusOrErr->getSize(), .Path: File->path()}); |
246 | } |
247 | |
248 | auto FileInfo = FileInfos.begin(); |
249 | size_t NumFiles = FileInfos.size(); |
250 | |
251 | auto RemoveCacheFile = [&]() { |
252 | // Remove the file. |
253 | sys::fs::remove(path: FileInfo->Path); |
254 | // Update size |
255 | TotalSize -= FileInfo->Size; |
256 | NumFiles--; |
257 | LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size " |
258 | << FileInfo->Size << "), new occupancy is " << TotalSize |
259 | << "%\n" ); |
260 | ++FileInfo; |
261 | }; |
262 | |
263 | // files.size() is greater the number of inputs by one. However, a timestamp |
264 | // file is created and stored in the cache directory if --thinlto-cache-policy |
265 | // option is used. Therefore, files.size() is used as ActualNums. |
266 | const size_t ActualNums = Files.size(); |
267 | if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles) |
268 | WithColor::warning() |
269 | << "ThinLTO cache pruning happens since the number of created files (" |
270 | << ActualNums << ") exceeds the maximum number of files (" |
271 | << Policy.MaxSizeFiles |
272 | << "); consider adjusting --thinlto-cache-policy\n" ; |
273 | |
274 | // Prune for number of files. |
275 | if (Policy.MaxSizeFiles) |
276 | while (NumFiles > Policy.MaxSizeFiles) |
277 | RemoveCacheFile(); |
278 | |
279 | // Prune for size now if needed |
280 | if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) { |
281 | auto ErrOrSpaceInfo = sys::fs::disk_space(Path); |
282 | if (!ErrOrSpaceInfo) { |
283 | report_fatal_error(reason: "Can't get available size" ); |
284 | } |
285 | sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get(); |
286 | auto AvailableSpace = TotalSize + SpaceInfo.free; |
287 | |
288 | if (Policy.MaxSizePercentageOfAvailableSpace == 0) |
289 | Policy.MaxSizePercentageOfAvailableSpace = 100; |
290 | if (Policy.MaxSizeBytes == 0) |
291 | Policy.MaxSizeBytes = AvailableSpace; |
292 | auto TotalSizeTarget = std::min<uint64_t>( |
293 | a: AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull, |
294 | b: Policy.MaxSizeBytes); |
295 | |
296 | LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace) |
297 | << "% target is: " |
298 | << Policy.MaxSizePercentageOfAvailableSpace << "%, " |
299 | << Policy.MaxSizeBytes << " bytes\n" ); |
300 | |
301 | size_t ActualSizes = 0; |
302 | for (const auto &File : Files) |
303 | if (File) |
304 | ActualSizes += File->getBufferSize(); |
305 | |
306 | if (ActualSizes > TotalSizeTarget) |
307 | WithColor::warning() |
308 | << "ThinLTO cache pruning happens since the total size of the cache " |
309 | "files consumed by the current link job (" |
310 | << ActualSizes << " bytes) exceeds maximum cache size (" |
311 | << TotalSizeTarget |
312 | << " bytes); consider adjusting --thinlto-cache-policy\n" ; |
313 | |
314 | // Remove the oldest accessed files first, till we get below the threshold. |
315 | while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end()) |
316 | RemoveCacheFile(); |
317 | } |
318 | return true; |
319 | } |
320 | |