1//===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10///
11/// This file contains several definitions for the debuginfod client and server.
12/// For the client, this file defines the fetchInfo function. For the server,
13/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15/// function retrieves any of the three supported artifact types: (executable,
16/// debuginfo, source file) associated with a build-id from debuginfod servers.
17/// If a source file is to be fetched, its absolute path must be specified in
18/// the Description argument to fetchInfo. The DebuginfodLogEntry,
19/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20/// scan the local filesystem for binaries and serve the debuginfod protocol.
21///
22//===----------------------------------------------------------------------===//
23
24#include "llvm/Debuginfod/Debuginfod.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/BinaryFormat/Magic.h"
28#include "llvm/DebugInfo/DWARF/DWARFContext.h"
29#include "llvm/DebugInfo/Symbolize/Symbolize.h"
30#include "llvm/HTTP/HTTPClient.h"
31#include "llvm/HTTP/StreamedHTTPResponseHandler.h"
32#include "llvm/Object/BuildID.h"
33#include "llvm/Object/ELFObjectFile.h"
34#include "llvm/Support/CachePruning.h"
35#include "llvm/Support/Caching.h"
36#include "llvm/Support/Errc.h"
37#include "llvm/Support/Error.h"
38#include "llvm/Support/FileUtilities.h"
39#include "llvm/Support/MemoryBuffer.h"
40#include "llvm/Support/Path.h"
41#include "llvm/Support/ThreadPool.h"
42#include "llvm/Support/xxhash.h"
43
44#include <atomic>
45#include <optional>
46#include <thread>
47
48namespace llvm {
49
50using llvm::object::BuildIDRef;
51
52namespace {
53std::optional<SmallVector<StringRef>> DebuginfodUrls;
54// Many Readers/Single Writer lock protecting the global debuginfod URL list.
55llvm::sys::RWMutex UrlsMutex;
56} // namespace
57
58std::string getDebuginfodCacheKey(llvm::StringRef S) {
59 return utostr(X: xxh3_64bits(data: S));
60}
61
62// Returns a binary BuildID as a normalized hex string.
63// Uses lowercase for compatibility with common debuginfod servers.
64static std::string buildIDToString(BuildIDRef ID) {
65 return llvm::toHex(Input: ID, /*LowerCase=*/true);
66}
67
68bool canUseDebuginfod() {
69 return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
70}
71
72SmallVector<StringRef> getDefaultDebuginfodUrls() {
73 std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);
74 if (!DebuginfodUrls) {
75 // Only read from the environment variable if the user hasn't already
76 // set the value.
77 ReadGuard.unlock();
78 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
79 DebuginfodUrls = SmallVector<StringRef>();
80 if (const char *DebuginfodUrlsEnv = std::getenv(name: "DEBUGINFOD_URLS")) {
81 StringRef(DebuginfodUrlsEnv)
82 .split(A&: DebuginfodUrls.value(), Separator: " ", MaxSplit: -1, KeepEmpty: false);
83 }
84 WriteGuard.unlock();
85 ReadGuard.lock();
86 }
87 return DebuginfodUrls.value();
88}
89
90// Set the default debuginfod URL list, override the environment variable.
91void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {
92 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
93 DebuginfodUrls = URLs;
94}
95
96/// Finds a default local file caching directory for the debuginfod client,
97/// first checking DEBUGINFOD_CACHE_PATH.
98Expected<std::string> getDefaultDebuginfodCacheDirectory() {
99 if (const char *CacheDirectoryEnv = std::getenv(name: "DEBUGINFOD_CACHE_PATH"))
100 return CacheDirectoryEnv;
101
102 SmallString<64> CacheDirectory;
103 if (!sys::path::cache_directory(result&: CacheDirectory))
104 return createStringError(
105 EC: errc::io_error, S: "Unable to determine appropriate cache directory.");
106 sys::path::append(path&: CacheDirectory, a: "llvm-debuginfod", b: "client");
107 return std::string(CacheDirectory);
108}
109
110std::chrono::milliseconds getDefaultDebuginfodTimeout() {
111 long Timeout;
112 const char *DebuginfodTimeoutEnv = std::getenv(name: "DEBUGINFOD_TIMEOUT");
113 if (DebuginfodTimeoutEnv &&
114 to_integer(S: StringRef(DebuginfodTimeoutEnv).trim(), Num&: Timeout, Base: 10))
115 return std::chrono::milliseconds(Timeout * 1000);
116
117 return std::chrono::milliseconds(90 * 1000);
118}
119
120/// The following functions fetch a debuginfod artifact to a file in a local
121/// cache and return the cached file path. They first search the local cache,
122/// followed by the debuginfod servers.
123
124std::string getDebuginfodSourceUrlPath(BuildIDRef ID,
125 StringRef SourceFilePath) {
126 SmallString<64> UrlPath;
127 sys::path::append(path&: UrlPath, style: sys::path::Style::posix, a: "buildid",
128 b: buildIDToString(ID), c: "source",
129 d: sys::path::convert_to_slash(path: SourceFilePath));
130 return std::string(UrlPath);
131}
132
133Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
134 StringRef SourceFilePath) {
135 std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath);
136 return getCachedOrDownloadArtifact(UniqueKey: getDebuginfodCacheKey(S: UrlPath), UrlPath);
137}
138
139std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) {
140 SmallString<64> UrlPath;
141 sys::path::append(path&: UrlPath, style: sys::path::Style::posix, a: "buildid",
142 b: buildIDToString(ID), c: "executable");
143 return std::string(UrlPath);
144}
145
146Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
147 std::string UrlPath = getDebuginfodExecutableUrlPath(ID);
148 return getCachedOrDownloadArtifact(UniqueKey: getDebuginfodCacheKey(S: UrlPath), UrlPath);
149}
150
151std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) {
152 SmallString<64> UrlPath;
153 sys::path::append(path&: UrlPath, style: sys::path::Style::posix, a: "buildid",
154 b: buildIDToString(ID), c: "debuginfo");
155 return std::string(UrlPath);
156}
157
158Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
159 std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID);
160 return getCachedOrDownloadArtifact(UniqueKey: getDebuginfodCacheKey(S: UrlPath), UrlPath);
161}
162
163// General fetching function.
164Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
165 StringRef UrlPath) {
166 SmallString<10> CacheDir;
167
168 Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
169 if (!CacheDirOrErr)
170 return CacheDirOrErr.takeError();
171 CacheDir = *CacheDirOrErr;
172
173 return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDirectoryPath: CacheDir,
174 DebuginfodUrls: getDefaultDebuginfodUrls(),
175 Timeout: getDefaultDebuginfodTimeout());
176}
177
178// An over-accepting simplification of the HTTP RFC 7230 spec.
179static bool isHeader(StringRef S) {
180 StringRef Name;
181 StringRef Value;
182 std::tie(args&: Name, args&: Value) = S.split(Separator: ':');
183 if (Name.empty() || Value.empty())
184 return false;
185 return all_of(Range&: Name, P: [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
186 all_of(Range&: Value, P: [](char C) { return llvm::isPrint(C) || C == '\t'; });
187}
188
189static SmallVector<std::string, 0> getHeaders() {
190 const char *Filename = getenv(name: "DEBUGINFOD_HEADERS_FILE");
191 if (!Filename)
192 return {};
193 ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
194 MemoryBuffer::getFile(Filename, /*IsText=*/true);
195 if (!HeadersFile)
196 return {};
197
198 SmallVector<std::string, 0> Headers;
199 uint64_t LineNumber = 0;
200 for (StringRef Line : llvm::split(Str: (*HeadersFile)->getBuffer(), Separator: '\n')) {
201 LineNumber++;
202 Line.consume_back(Suffix: "\r");
203 if (!isHeader(S: Line)) {
204 if (!all_of(Range&: Line, P: llvm::isSpace))
205 WithColor::warning()
206 << "could not parse debuginfod header: " << Filename << ':'
207 << LineNumber << '\n';
208 continue;
209 }
210 Headers.emplace_back(Args&: Line);
211 }
212 return Headers;
213}
214
215Expected<std::string> getCachedOrDownloadArtifact(
216 StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
217 ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
218 SmallString<64> AbsCachedArtifactPath;
219 sys::path::append(path&: AbsCachedArtifactPath, a: CacheDirectoryPath,
220 b: "llvmcache-" + UniqueKey);
221
222 Expected<FileCache> CacheOrErr =
223 localCache(CacheNameRef: "Debuginfod-client", TempFilePrefixRef: ".debuginfod-client", CacheDirectoryPathRef: CacheDirectoryPath);
224 if (!CacheOrErr)
225 return CacheOrErr.takeError();
226
227 FileCache Cache = *CacheOrErr;
228 // We choose an arbitrary Task parameter as we do not make use of it.
229 unsigned Task = 0;
230 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
231 if (!CacheAddStreamOrErr)
232 return CacheAddStreamOrErr.takeError();
233 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
234 if (!CacheAddStream)
235 return std::string(AbsCachedArtifactPath);
236 // The artifact was not found in the local cache, query the debuginfod
237 // servers.
238 if (!HTTPClient::isAvailable())
239 return createStringError(EC: errc::io_error,
240 S: "No working HTTP client is available.");
241
242 if (!HTTPClient::IsInitialized)
243 return createStringError(
244 EC: errc::io_error,
245 S: "A working HTTP client is available, but it is not initialized. To "
246 "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
247 "at the beginning of main.");
248
249 HTTPClient Client;
250 Client.setTimeout(Timeout);
251 for (StringRef ServerUrl : DebuginfodUrls) {
252 SmallString<64> ArtifactUrl;
253 sys::path::append(path&: ArtifactUrl, style: sys::path::Style::posix, a: ServerUrl, b: UrlPath);
254
255 // Perform the HTTP request and if successful, write the response body to
256 // the cache.
257 {
258 StreamedHTTPResponseHandler Handler(
259 [&]() { return CacheAddStream(Task, ""); }, Client);
260 HTTPRequest Request(ArtifactUrl);
261 Request.Headers = getHeaders();
262 Error Err = Client.perform(Request, Handler);
263 if (Err)
264 return std::move(Err);
265 if ((Err = Handler.commit()))
266 return std::move(Err);
267
268 unsigned Code = Client.responseCode();
269 if (Code && Code != 200)
270 continue;
271 }
272
273 Expected<CachePruningPolicy> PruningPolicyOrErr =
274 parseCachePruningPolicy(PolicyStr: std::getenv(name: "DEBUGINFOD_CACHE_POLICY"));
275 if (!PruningPolicyOrErr)
276 return PruningPolicyOrErr.takeError();
277
278 Expected<bool> PrunedOrErr =
279 pruneCache(Path: CacheDirectoryPath, Policy: *PruningPolicyOrErr);
280 // Log the error but continue execution: failure to prune the cache is not
281 // fatal.
282 if (!PrunedOrErr)
283 logAllUnhandledErrors(E: PrunedOrErr.takeError(), OS&: WithColor::warning());
284
285 // Return the path to the artifact on disk.
286 return std::string(AbsCachedArtifactPath);
287 }
288
289 return createStringError(EC: errc::argument_out_of_domain, S: "build id not found");
290}
291
292DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
293 : Message(Message.str()) {}
294
295void DebuginfodLog::push(const Twine &Message) {
296 push(Entry: DebuginfodLogEntry(Message));
297}
298
299void DebuginfodLog::push(DebuginfodLogEntry Entry) {
300 {
301 std::lock_guard<std::mutex> Guard(QueueMutex);
302 LogEntryQueue.push(x: Entry);
303 }
304 QueueCondition.notify_one();
305}
306
307DebuginfodLogEntry DebuginfodLog::pop() {
308 {
309 std::unique_lock<std::mutex> Guard(QueueMutex);
310 // Wait for messages to be pushed into the queue.
311 QueueCondition.wait(lock&: Guard, p: [&] { return !LogEntryQueue.empty(); });
312 }
313 std::lock_guard<std::mutex> Guard(QueueMutex);
314 if (!LogEntryQueue.size())
315 llvm_unreachable("Expected message in the queue.");
316
317 DebuginfodLogEntry Entry = LogEntryQueue.front();
318 LogEntryQueue.pop();
319 return Entry;
320}
321
322DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
323 DebuginfodLog &Log,
324 ThreadPoolInterface &Pool,
325 double MinInterval)
326 : Log(Log), Pool(Pool), MinInterval(MinInterval) {
327 for (StringRef Path : PathsRef)
328 Paths.push_back(Elt: Path.str());
329}
330
331Error DebuginfodCollection::update() {
332 std::lock_guard<sys::Mutex> Guard(UpdateMutex);
333 if (UpdateTimer.isRunning())
334 UpdateTimer.stopTimer();
335 UpdateTimer.clear();
336 for (const std::string &Path : Paths) {
337 Log.push(Message: "Updating binaries at path " + Path);
338 if (Error Err = findBinaries(Path))
339 return Err;
340 }
341 Log.push(Message: "Updated collection");
342 UpdateTimer.startTimer();
343 return Error::success();
344}
345
346Expected<bool> DebuginfodCollection::updateIfStale() {
347 if (!UpdateTimer.isRunning())
348 return false;
349 UpdateTimer.stopTimer();
350 double Time = UpdateTimer.getTotalTime().getWallTime();
351 UpdateTimer.startTimer();
352 if (Time < MinInterval)
353 return false;
354 if (Error Err = update())
355 return std::move(Err);
356 return true;
357}
358
359Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
360 while (true) {
361 if (Error Err = update())
362 return Err;
363 std::this_thread::sleep_for(rtime: Interval);
364 }
365 llvm_unreachable("updateForever loop should never end");
366}
367
368static bool hasELFMagic(StringRef FilePath) {
369 file_magic Type;
370 std::error_code EC = identify_magic(path: FilePath, result&: Type);
371 if (EC)
372 return false;
373 switch (Type) {
374 case file_magic::elf:
375 case file_magic::elf_relocatable:
376 case file_magic::elf_executable:
377 case file_magic::elf_shared_object:
378 case file_magic::elf_core:
379 return true;
380 default:
381 return false;
382 }
383}
384
385Error DebuginfodCollection::findBinaries(StringRef Path) {
386 std::error_code EC;
387 sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
388 std::mutex IteratorMutex;
389 ThreadPoolTaskGroup IteratorGroup(Pool);
390 for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getMaxConcurrency();
391 WorkerIndex++) {
392 IteratorGroup.async(F: [&, this]() -> void {
393 std::string FilePath;
394 while (true) {
395 {
396 // Check if iteration is over or there is an error during iteration
397 std::lock_guard<std::mutex> Guard(IteratorMutex);
398 if (I == E || EC)
399 return;
400 // Grab a file path from the directory iterator and advance the
401 // iterator.
402 FilePath = I->path();
403 I.increment(ec&: EC);
404 }
405
406 // Inspect the file at this path to determine if it is debuginfo.
407 if (!hasELFMagic(FilePath))
408 continue;
409
410 Expected<object::OwningBinary<object::Binary>> BinOrErr =
411 object::createBinary(Path: FilePath);
412
413 if (!BinOrErr) {
414 consumeError(Err: BinOrErr.takeError());
415 continue;
416 }
417 object::Binary *Bin = std::move(BinOrErr.get().getBinary());
418 if (!Bin->isObject())
419 continue;
420
421 // TODO: Support non-ELF binaries
422 object::ELFObjectFileBase *Object =
423 dyn_cast<object::ELFObjectFileBase>(Val: Bin);
424 if (!Object)
425 continue;
426
427 BuildIDRef ID = getBuildID(Obj: Object);
428 if (ID.empty())
429 continue;
430
431 std::string IDString = buildIDToString(ID);
432 if (Object->hasDebugInfo()) {
433 std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
434 (void)DebugBinaries.try_emplace(Key: IDString, Args: std::move(FilePath));
435 } else {
436 std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
437 (void)Binaries.try_emplace(Key: IDString, Args: std::move(FilePath));
438 }
439 }
440 });
441 }
442 IteratorGroup.wait();
443 std::unique_lock<std::mutex> Guard(IteratorMutex);
444 if (EC)
445 return errorCodeToError(EC);
446 return Error::success();
447}
448
449Expected<std::optional<std::string>>
450DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
451 Log.push(Message: "getting binary path of ID " + buildIDToString(ID));
452 std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
453 auto Loc = Binaries.find(Key: buildIDToString(ID));
454 if (Loc != Binaries.end()) {
455 std::string Path = Loc->getValue();
456 return Path;
457 }
458 return std::nullopt;
459}
460
461Expected<std::optional<std::string>>
462DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
463 Log.push(Message: "getting debug binary path of ID " + buildIDToString(ID));
464 std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
465 auto Loc = DebugBinaries.find(Key: buildIDToString(ID));
466 if (Loc != DebugBinaries.end()) {
467 std::string Path = Loc->getValue();
468 return Path;
469 }
470 return std::nullopt;
471}
472
473Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
474 {
475 // Check collection; perform on-demand update if stale.
476 Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
477 if (!PathOrErr)
478 return PathOrErr.takeError();
479 std::optional<std::string> Path = *PathOrErr;
480 if (!Path) {
481 Expected<bool> UpdatedOrErr = updateIfStale();
482 if (!UpdatedOrErr)
483 return UpdatedOrErr.takeError();
484 if (*UpdatedOrErr) {
485 // Try once more.
486 PathOrErr = getBinaryPath(ID);
487 if (!PathOrErr)
488 return PathOrErr.takeError();
489 Path = *PathOrErr;
490 }
491 }
492 if (Path)
493 return *Path;
494 }
495
496 // Try federation.
497 Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
498 if (!PathOrErr)
499 consumeError(Err: PathOrErr.takeError());
500
501 // Fall back to debug binary.
502 return findDebugBinaryPath(ID);
503}
504
505Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
506 // Check collection; perform on-demand update if stale.
507 Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
508 if (!PathOrErr)
509 return PathOrErr.takeError();
510 std::optional<std::string> Path = *PathOrErr;
511 if (!Path) {
512 Expected<bool> UpdatedOrErr = updateIfStale();
513 if (!UpdatedOrErr)
514 return UpdatedOrErr.takeError();
515 if (*UpdatedOrErr) {
516 // Try once more.
517 PathOrErr = getBinaryPath(ID);
518 if (!PathOrErr)
519 return PathOrErr.takeError();
520 Path = *PathOrErr;
521 }
522 }
523 if (Path)
524 return *Path;
525
526 // Try federation.
527 return getCachedOrDownloadDebuginfo(ID);
528}
529
530DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
531 DebuginfodCollection &Collection)
532 : Log(Log), Collection(Collection) {
533 cantFail(
534 Err: Server.get(UrlPathPattern: R"(/buildid/(.*)/debuginfo)", Handler: [&](HTTPServerRequest Request) {
535 Log.push(Message: "GET " + Request.UrlPath);
536 std::string IDString;
537 if (!tryGetFromHex(Input: Request.UrlPathMatches[0], Output&: IDString)) {
538 Request.setResponse(
539 {.Code: 404, .ContentType: "text/plain", .Body: "Build ID is not a hex string\n"});
540 return;
541 }
542 object::BuildID ID(IDString.begin(), IDString.end());
543 Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
544 if (Error Err = PathOrErr.takeError()) {
545 consumeError(Err: std::move(Err));
546 Request.setResponse({.Code: 404, .ContentType: "text/plain", .Body: "Build ID not found\n"});
547 return;
548 }
549 streamFile(Request, FilePath: *PathOrErr);
550 }));
551 cantFail(
552 Err: Server.get(UrlPathPattern: R"(/buildid/(.*)/executable)", Handler: [&](HTTPServerRequest Request) {
553 Log.push(Message: "GET " + Request.UrlPath);
554 std::string IDString;
555 if (!tryGetFromHex(Input: Request.UrlPathMatches[0], Output&: IDString)) {
556 Request.setResponse(
557 {.Code: 404, .ContentType: "text/plain", .Body: "Build ID is not a hex string\n"});
558 return;
559 }
560 object::BuildID ID(IDString.begin(), IDString.end());
561 Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
562 if (Error Err = PathOrErr.takeError()) {
563 consumeError(Err: std::move(Err));
564 Request.setResponse({.Code: 404, .ContentType: "text/plain", .Body: "Build ID not found\n"});
565 return;
566 }
567 streamFile(Request, FilePath: *PathOrErr);
568 }));
569}
570
571} // namespace llvm
572