1//===- ClangScanDeps.cpp - Implementation of clang-scan-deps --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Driver/Compilation.h"
10#include "clang/Driver/Driver.h"
11#include "clang/Frontend/CompilerInstance.h"
12#include "clang/Frontend/TextDiagnosticPrinter.h"
13#include "clang/Tooling/CommonOptionsParser.h"
14#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
15#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
16#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
17#include "clang/Tooling/JSONCompilationDatabase.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/FileUtilities.h"
22#include "llvm/Support/Format.h"
23#include "llvm/Support/JSON.h"
24#include "llvm/Support/LLVMDriver.h"
25#include "llvm/Support/Program.h"
26#include "llvm/Support/Signals.h"
27#include "llvm/Support/ThreadPool.h"
28#include "llvm/Support/Threading.h"
29#include "llvm/Support/Timer.h"
30#include "llvm/TargetParser/Host.h"
31#include <mutex>
32#include <optional>
33#include <thread>
34
35#include "Opts.inc"
36
37using namespace clang;
38using namespace tooling::dependencies;
39
40namespace {
41
42using namespace llvm::opt;
43enum ID {
44 OPT_INVALID = 0, // This is not an option ID.
45#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
46#include "Opts.inc"
47#undef OPTION
48};
49
50#define PREFIX(NAME, VALUE) \
51 constexpr llvm::StringLiteral NAME##_init[] = VALUE; \
52 constexpr llvm::ArrayRef<llvm::StringLiteral> NAME( \
53 NAME##_init, std::size(NAME##_init) - 1);
54#include "Opts.inc"
55#undef PREFIX
56
57const llvm::opt::OptTable::Info InfoTable[] = {
58#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
59#include "Opts.inc"
60#undef OPTION
61};
62
63class ScanDepsOptTable : public llvm::opt::GenericOptTable {
64public:
65 ScanDepsOptTable() : GenericOptTable(InfoTable) {
66 setGroupedShortOptions(true);
67 }
68};
69
70enum ResourceDirRecipeKind {
71 RDRK_ModifyCompilerPath,
72 RDRK_InvokeCompiler,
73};
74
75static std::string OutputFileName = "-";
76static ScanningMode ScanMode = ScanningMode::DependencyDirectivesScan;
77static ScanningOutputFormat Format = ScanningOutputFormat::Make;
78static ScanningOptimizations OptimizeArgs;
79static std::string ModuleFilesDir;
80static bool EagerLoadModules;
81static unsigned NumThreads = 0;
82static std::string CompilationDB;
83static std::string ModuleName;
84static std::vector<std::string> ModuleDepTargets;
85static bool DeprecatedDriverCommand;
86static ResourceDirRecipeKind ResourceDirRecipe;
87static bool Verbose;
88static bool PrintTiming;
89static llvm::BumpPtrAllocator Alloc;
90static llvm::StringSaver Saver{Alloc};
91static std::vector<const char *> CommandLine;
92
93#ifndef NDEBUG
94static constexpr bool DoRoundTripDefault = true;
95#else
96static constexpr bool DoRoundTripDefault = false;
97#endif
98
99static bool RoundTripArgs = DoRoundTripDefault;
100
101static void ParseArgs(int argc, char **argv) {
102 ScanDepsOptTable Tbl;
103 llvm::StringRef ToolName = argv[0];
104 llvm::opt::InputArgList Args =
105 Tbl.parseArgs(Argc: argc, Argv: argv, Unknown: OPT_UNKNOWN, Saver, ErrorFn: [&](StringRef Msg) {
106 llvm::errs() << Msg << '\n';
107 std::exit(status: 1);
108 });
109
110 if (Args.hasArg(Ids: OPT_help)) {
111 Tbl.printHelp(OS&: llvm::outs(), Usage: "clang-scan-deps [options]", Title: "clang-scan-deps");
112 std::exit(status: 0);
113 }
114 if (Args.hasArg(Ids: OPT_version)) {
115 llvm::outs() << ToolName << '\n';
116 llvm::cl::PrintVersionMessage();
117 std::exit(status: 0);
118 }
119 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_mode_EQ)) {
120 auto ModeType =
121 llvm::StringSwitch<std::optional<ScanningMode>>(A->getValue())
122 .Case(S: "preprocess-dependency-directives",
123 Value: ScanningMode::DependencyDirectivesScan)
124 .Case(S: "preprocess", Value: ScanningMode::CanonicalPreprocessing)
125 .Default(Value: std::nullopt);
126 if (!ModeType) {
127 llvm::errs() << ToolName
128 << ": for the --mode option: Cannot find option named '"
129 << A->getValue() << "'\n";
130 std::exit(status: 1);
131 }
132 ScanMode = *ModeType;
133 }
134
135 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_format_EQ)) {
136 auto FormatType =
137 llvm::StringSwitch<std::optional<ScanningOutputFormat>>(A->getValue())
138 .Case(S: "make", Value: ScanningOutputFormat::Make)
139 .Case(S: "p1689", Value: ScanningOutputFormat::P1689)
140 .Case(S: "experimental-full", Value: ScanningOutputFormat::Full)
141 .Default(Value: std::nullopt);
142 if (!FormatType) {
143 llvm::errs() << ToolName
144 << ": for the --format option: Cannot find option named '"
145 << A->getValue() << "'\n";
146 std::exit(status: 1);
147 }
148 Format = *FormatType;
149 }
150
151 std::vector<std::string> OptimizationFlags =
152 Args.getAllArgValues(Id: OPT_optimize_args_EQ);
153 OptimizeArgs = ScanningOptimizations::None;
154 for (const auto &Arg : OptimizationFlags) {
155 auto Optimization =
156 llvm::StringSwitch<std::optional<ScanningOptimizations>>(Arg)
157 .Case(S: "none", Value: ScanningOptimizations::None)
158 .Case(S: "header-search", Value: ScanningOptimizations::HeaderSearch)
159 .Case(S: "system-warnings", Value: ScanningOptimizations::SystemWarnings)
160 .Case(S: "vfs", Value: ScanningOptimizations::VFS)
161 .Case(S: "canonicalize-macros", Value: ScanningOptimizations::Macros)
162 .Case(S: "all", Value: ScanningOptimizations::All)
163 .Default(Value: std::nullopt);
164 if (!Optimization) {
165 llvm::errs()
166 << ToolName
167 << ": for the --optimize-args option: Cannot find option named '"
168 << Arg << "'\n";
169 std::exit(status: 1);
170 }
171 OptimizeArgs |= *Optimization;
172 }
173 if (OptimizationFlags.empty())
174 OptimizeArgs = ScanningOptimizations::Default;
175
176 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_module_files_dir_EQ))
177 ModuleFilesDir = A->getValue();
178
179 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_o))
180 OutputFileName = A->getValue();
181
182 EagerLoadModules = Args.hasArg(Ids: OPT_eager_load_pcm);
183
184 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_j)) {
185 StringRef S{A->getValue()};
186 if (!llvm::to_integer(S, Num&: NumThreads, Base: 0)) {
187 llvm::errs() << ToolName << ": for the -j option: '" << S
188 << "' value invalid for uint argument!\n";
189 std::exit(status: 1);
190 }
191 }
192
193 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_compilation_database_EQ))
194 CompilationDB = A->getValue();
195
196 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_module_name_EQ))
197 ModuleName = A->getValue();
198
199 for (const llvm::opt::Arg *A : Args.filtered(Ids: OPT_dependency_target_EQ))
200 ModuleDepTargets.emplace_back(args: A->getValue());
201
202 DeprecatedDriverCommand = Args.hasArg(Ids: OPT_deprecated_driver_command);
203
204 if (const llvm::opt::Arg *A = Args.getLastArg(Ids: OPT_resource_dir_recipe_EQ)) {
205 auto Kind =
206 llvm::StringSwitch<std::optional<ResourceDirRecipeKind>>(A->getValue())
207 .Case(S: "modify-compiler-path", Value: RDRK_ModifyCompilerPath)
208 .Case(S: "invoke-compiler", Value: RDRK_InvokeCompiler)
209 .Default(Value: std::nullopt);
210 if (!Kind) {
211 llvm::errs() << ToolName
212 << ": for the --resource-dir-recipe option: Cannot find "
213 "option named '"
214 << A->getValue() << "'\n";
215 std::exit(status: 1);
216 }
217 ResourceDirRecipe = *Kind;
218 }
219
220 PrintTiming = Args.hasArg(Ids: OPT_print_timing);
221
222 Verbose = Args.hasArg(Ids: OPT_verbose);
223
224 RoundTripArgs = Args.hasArg(Ids: OPT_round_trip_args);
225
226 if (const llvm::opt::Arg *A = Args.getLastArgNoClaim(Ids: OPT_DASH_DASH))
227 CommandLine.assign(first: A->getValues().begin(), last: A->getValues().end());
228}
229
230class SharedStream {
231public:
232 SharedStream(raw_ostream &OS) : OS(OS) {}
233 void applyLocked(llvm::function_ref<void(raw_ostream &OS)> Fn) {
234 std::unique_lock<std::mutex> LockGuard(Lock);
235 Fn(OS);
236 OS.flush();
237 }
238
239private:
240 std::mutex Lock;
241 raw_ostream &OS;
242};
243
244class ResourceDirectoryCache {
245public:
246 /// findResourceDir finds the resource directory relative to the clang
247 /// compiler being used in Args, by running it with "-print-resource-dir"
248 /// option and cache the results for reuse. \returns resource directory path
249 /// associated with the given invocation command or empty string if the
250 /// compiler path is NOT an absolute path.
251 StringRef findResourceDir(const tooling::CommandLineArguments &Args,
252 bool ClangCLMode) {
253 if (Args.size() < 1)
254 return "";
255
256 const std::string &ClangBinaryPath = Args[0];
257 if (!llvm::sys::path::is_absolute(path: ClangBinaryPath))
258 return "";
259
260 const std::string &ClangBinaryName =
261 std::string(llvm::sys::path::filename(path: ClangBinaryPath));
262
263 std::unique_lock<std::mutex> LockGuard(CacheLock);
264 const auto &CachedResourceDir = Cache.find(x: ClangBinaryPath);
265 if (CachedResourceDir != Cache.end())
266 return CachedResourceDir->second;
267
268 std::vector<StringRef> PrintResourceDirArgs{ClangBinaryName};
269 if (ClangCLMode)
270 PrintResourceDirArgs.push_back(x: "/clang:-print-resource-dir");
271 else
272 PrintResourceDirArgs.push_back(x: "-print-resource-dir");
273
274 llvm::SmallString<64> OutputFile, ErrorFile;
275 llvm::sys::fs::createTemporaryFile(Prefix: "print-resource-dir-output",
276 Suffix: "" /*no-suffix*/, ResultPath&: OutputFile);
277 llvm::sys::fs::createTemporaryFile(Prefix: "print-resource-dir-error",
278 Suffix: "" /*no-suffix*/, ResultPath&: ErrorFile);
279 llvm::FileRemover OutputRemover(OutputFile.c_str());
280 llvm::FileRemover ErrorRemover(ErrorFile.c_str());
281 std::optional<StringRef> Redirects[] = {
282 {""}, // Stdin
283 OutputFile.str(),
284 ErrorFile.str(),
285 };
286 if (llvm::sys::ExecuteAndWait(Program: ClangBinaryPath, Args: PrintResourceDirArgs, Env: {},
287 Redirects)) {
288 auto ErrorBuf = llvm::MemoryBuffer::getFile(Filename: ErrorFile.c_str());
289 llvm::errs() << ErrorBuf.get()->getBuffer();
290 return "";
291 }
292
293 auto OutputBuf = llvm::MemoryBuffer::getFile(Filename: OutputFile.c_str());
294 if (!OutputBuf)
295 return "";
296 StringRef Output = OutputBuf.get()->getBuffer().rtrim(Char: '\n');
297
298 Cache[ClangBinaryPath] = Output.str();
299 return Cache[ClangBinaryPath];
300 }
301
302private:
303 std::map<std::string, std::string> Cache;
304 std::mutex CacheLock;
305};
306
307} // end anonymous namespace
308
309/// Takes the result of a dependency scan and prints error / dependency files
310/// based on the result.
311///
312/// \returns True on error.
313static bool
314handleMakeDependencyToolResult(const std::string &Input,
315 llvm::Expected<std::string> &MaybeFile,
316 SharedStream &OS, SharedStream &Errs) {
317 if (!MaybeFile) {
318 llvm::handleAllErrors(
319 E: MaybeFile.takeError(), Handlers: [&Input, &Errs](llvm::StringError &Err) {
320 Errs.applyLocked(Fn: [&](raw_ostream &OS) {
321 OS << "Error while scanning dependencies for " << Input << ":\n";
322 OS << Err.getMessage();
323 });
324 });
325 return true;
326 }
327 OS.applyLocked(Fn: [&](raw_ostream &OS) { OS << *MaybeFile; });
328 return false;
329}
330
331static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
332 std::vector<llvm::StringRef> Strings;
333 for (auto &&I : Set)
334 Strings.push_back(x: I.getKey());
335 llvm::sort(C&: Strings);
336 return llvm::json::Array(Strings);
337}
338
339// Technically, we don't need to sort the dependency list to get determinism.
340// Leaving these be will simply preserve the import order.
341static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) {
342 llvm::sort(C&: V);
343
344 llvm::json::Array Ret;
345 for (const ModuleID &MID : V)
346 Ret.push_back(E: llvm::json::Object(
347 {{.K: "module-name", .V: MID.ModuleName}, {.K: "context-hash", .V: MID.ContextHash}}));
348 return Ret;
349}
350
351static llvm::json::Array
352toJSONSorted(llvm::SmallVector<Module::LinkLibrary, 2> &LinkLibs) {
353 llvm::sort(C&: LinkLibs, Comp: [](const Module::LinkLibrary &lhs,
354 const Module::LinkLibrary &rhs) {
355 return lhs.Library < rhs.Library;
356 });
357
358 llvm::json::Array Ret;
359 for (const Module::LinkLibrary &LL : LinkLibs)
360 Ret.push_back(E: llvm::json::Object(
361 {{.K: "link-name", .V: LL.Library}, {.K: "isFramework", .V: LL.IsFramework}}));
362 return Ret;
363}
364
365// Thread safe.
366class FullDeps {
367public:
368 FullDeps(size_t NumInputs) : Inputs(NumInputs) {}
369
370 void mergeDeps(StringRef Input, TranslationUnitDeps TUDeps,
371 size_t InputIndex) {
372 mergeDeps(Graph: std::move(TUDeps.ModuleGraph), InputIndex);
373
374 InputDeps ID;
375 ID.FileName = std::string(Input);
376 ID.ContextHash = std::move(TUDeps.ID.ContextHash);
377 ID.FileDeps = std::move(TUDeps.FileDeps);
378 ID.ModuleDeps = std::move(TUDeps.ClangModuleDeps);
379 ID.DriverCommandLine = std::move(TUDeps.DriverCommandLine);
380 ID.Commands = std::move(TUDeps.Commands);
381
382 assert(InputIndex < Inputs.size() && "Input index out of bounds");
383 assert(Inputs[InputIndex].FileName.empty() && "Result already populated");
384 Inputs[InputIndex] = std::move(ID);
385 }
386
387 void mergeDeps(ModuleDepsGraph Graph, size_t InputIndex) {
388 std::vector<ModuleDeps *> NewMDs;
389 {
390 std::unique_lock<std::mutex> ul(Lock);
391 for (const ModuleDeps &MD : Graph) {
392 auto I = Modules.find(x: {.ID: MD.ID, .InputIndex: 0});
393 if (I != Modules.end()) {
394 I->first.InputIndex = std::min(a: I->first.InputIndex, b: InputIndex);
395 continue;
396 }
397 auto Res = Modules.insert(hint: I, x: {{.ID: MD.ID, .InputIndex: InputIndex}, std::move(MD)});
398 NewMDs.push_back(x: &Res->second);
399 }
400 // First call to \c getBuildArguments is somewhat expensive. Let's call it
401 // on the current thread (instead of the main one), and outside the
402 // critical section.
403 for (ModuleDeps *MD : NewMDs)
404 (void)MD->getBuildArguments();
405 }
406 }
407
408 bool roundTripCommand(ArrayRef<std::string> ArgStrs,
409 DiagnosticsEngine &Diags) {
410 if (ArgStrs.empty() || ArgStrs[0] != "-cc1")
411 return false;
412 SmallVector<const char *> Args;
413 for (const std::string &Arg : ArgStrs)
414 Args.push_back(Elt: Arg.c_str());
415 return !CompilerInvocation::checkCC1RoundTrip(Args, Diags);
416 }
417
418 // Returns \c true if any command lines fail to round-trip. We expect
419 // commands already be canonical when output by the scanner.
420 bool roundTripCommands(raw_ostream &ErrOS) {
421 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{};
422 TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts);
423 IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
424 CompilerInstance::createDiagnostics(Opts: &*DiagOpts, Client: &DiagConsumer,
425 /*ShouldOwnClient=*/false);
426
427 for (auto &&M : Modules)
428 if (roundTripCommand(ArgStrs: M.second.getBuildArguments(), Diags&: *Diags))
429 return true;
430
431 for (auto &&I : Inputs)
432 for (const auto &Cmd : I.Commands)
433 if (roundTripCommand(ArgStrs: Cmd.Arguments, Diags&: *Diags))
434 return true;
435
436 return false;
437 }
438
439 void printFullOutput(raw_ostream &OS) {
440 // Skip sorting modules and constructing the JSON object if the output
441 // cannot be observed anyway. This makes timings less noisy.
442 if (&OS == &llvm::nulls())
443 return;
444
445 // Sort the modules by name to get a deterministic order.
446 std::vector<IndexedModuleID> ModuleIDs;
447 for (auto &&M : Modules)
448 ModuleIDs.push_back(x: M.first);
449 llvm::sort(C&: ModuleIDs);
450
451 using namespace llvm::json;
452
453 Array OutModules;
454 for (auto &&ModID : ModuleIDs) {
455 auto &MD = Modules[ModID];
456 Object O{{.K: "name", .V: MD.ID.ModuleName},
457 {.K: "context-hash", .V: MD.ID.ContextHash},
458 {.K: "file-deps", .V: toJSONSorted(Set: MD.FileDeps)},
459 {.K: "clang-module-deps", .V: toJSONSorted(V: MD.ClangModuleDeps)},
460 {.K: "clang-modulemap-file", .V: MD.ClangModuleMapFile},
461 {.K: "command-line", .V: MD.getBuildArguments()},
462 {.K: "link-libraries", .V: toJSONSorted(LinkLibs&: MD.LinkLibraries)}};
463 OutModules.push_back(E: std::move(O));
464 }
465
466 Array TUs;
467 for (auto &&I : Inputs) {
468 Array Commands;
469 if (I.DriverCommandLine.empty()) {
470 for (const auto &Cmd : I.Commands) {
471 Object O{
472 {.K: "input-file", .V: I.FileName},
473 {.K: "clang-context-hash", .V: I.ContextHash},
474 {.K: "file-deps", .V: I.FileDeps},
475 {.K: "clang-module-deps", .V: toJSONSorted(V: I.ModuleDeps)},
476 {.K: "executable", .V: Cmd.Executable},
477 {.K: "command-line", .V: Cmd.Arguments},
478 };
479 Commands.push_back(E: std::move(O));
480 }
481 } else {
482 Object O{
483 {.K: "input-file", .V: I.FileName},
484 {.K: "clang-context-hash", .V: I.ContextHash},
485 {.K: "file-deps", .V: I.FileDeps},
486 {.K: "clang-module-deps", .V: toJSONSorted(V: I.ModuleDeps)},
487 {.K: "executable", .V: "clang"},
488 {.K: "command-line", .V: I.DriverCommandLine},
489 };
490 Commands.push_back(E: std::move(O));
491 }
492 TUs.push_back(E: Object{
493 {.K: "commands", .V: std::move(Commands)},
494 });
495 }
496
497 Object Output{
498 {.K: "modules", .V: std::move(OutModules)},
499 {.K: "translation-units", .V: std::move(TUs)},
500 };
501
502 OS << llvm::formatv(Fmt: "{0:2}\n", Vals: Value(std::move(Output)));
503 }
504
505private:
506 struct IndexedModuleID {
507 ModuleID ID;
508
509 // FIXME: This is mutable so that it can still be updated after insertion
510 // into an unordered associative container. This is "fine", since this
511 // field doesn't contribute to the hash, but it's a brittle hack.
512 mutable size_t InputIndex;
513
514 bool operator==(const IndexedModuleID &Other) const {
515 return ID == Other.ID;
516 }
517
518 bool operator<(const IndexedModuleID &Other) const {
519 /// We need the output of clang-scan-deps to be deterministic. However,
520 /// the dependency graph may contain two modules with the same name. How
521 /// do we decide which one to print first? If we made that decision based
522 /// on the context hash, the ordering would be deterministic, but
523 /// different across machines. This can happen for example when the inputs
524 /// or the SDKs (which both contribute to the "context" hash) live in
525 /// different absolute locations. We solve that by tracking the index of
526 /// the first input TU that (transitively) imports the dependency, which
527 /// is always the same for the same input, resulting in deterministic
528 /// sorting that's also reproducible across machines.
529 return std::tie(args: ID.ModuleName, args&: InputIndex) <
530 std::tie(args: Other.ID.ModuleName, args&: Other.InputIndex);
531 }
532
533 struct Hasher {
534 std::size_t operator()(const IndexedModuleID &IMID) const {
535 return llvm::hash_value(ID: IMID.ID);
536 }
537 };
538 };
539
540 struct InputDeps {
541 std::string FileName;
542 std::string ContextHash;
543 std::vector<std::string> FileDeps;
544 std::vector<ModuleID> ModuleDeps;
545 std::vector<std::string> DriverCommandLine;
546 std::vector<Command> Commands;
547 };
548
549 std::mutex Lock;
550 std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher>
551 Modules;
552 std::vector<InputDeps> Inputs;
553};
554
555static bool handleTranslationUnitResult(
556 StringRef Input, llvm::Expected<TranslationUnitDeps> &MaybeTUDeps,
557 FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
558 if (!MaybeTUDeps) {
559 llvm::handleAllErrors(
560 E: MaybeTUDeps.takeError(), Handlers: [&Input, &Errs](llvm::StringError &Err) {
561 Errs.applyLocked(Fn: [&](raw_ostream &OS) {
562 OS << "Error while scanning dependencies for " << Input << ":\n";
563 OS << Err.getMessage();
564 });
565 });
566 return true;
567 }
568 FD.mergeDeps(Input, TUDeps: std::move(*MaybeTUDeps), InputIndex);
569 return false;
570}
571
572static bool handleModuleResult(
573 StringRef ModuleName, llvm::Expected<ModuleDepsGraph> &MaybeModuleGraph,
574 FullDeps &FD, size_t InputIndex, SharedStream &OS, SharedStream &Errs) {
575 if (!MaybeModuleGraph) {
576 llvm::handleAllErrors(E: MaybeModuleGraph.takeError(),
577 Handlers: [&ModuleName, &Errs](llvm::StringError &Err) {
578 Errs.applyLocked(Fn: [&](raw_ostream &OS) {
579 OS << "Error while scanning dependencies for "
580 << ModuleName << ":\n";
581 OS << Err.getMessage();
582 });
583 });
584 return true;
585 }
586 FD.mergeDeps(Graph: std::move(*MaybeModuleGraph), InputIndex);
587 return false;
588}
589
590class P1689Deps {
591public:
592 void printDependencies(raw_ostream &OS) {
593 addSourcePathsToRequires();
594 // Sort the modules by name to get a deterministic order.
595 llvm::sort(C&: Rules, Comp: [](const P1689Rule &A, const P1689Rule &B) {
596 return A.PrimaryOutput < B.PrimaryOutput;
597 });
598
599 using namespace llvm::json;
600 Array OutputRules;
601 for (const P1689Rule &R : Rules) {
602 Object O{{.K: "primary-output", .V: R.PrimaryOutput}};
603
604 if (R.Provides) {
605 Array Provides;
606 Object Provided{{.K: "logical-name", .V: R.Provides->ModuleName},
607 {.K: "source-path", .V: R.Provides->SourcePath},
608 {.K: "is-interface", .V: R.Provides->IsStdCXXModuleInterface}};
609 Provides.push_back(E: std::move(Provided));
610 O.insert(E: {.K: "provides", .V: std::move(Provides)});
611 }
612
613 Array Requires;
614 for (const P1689ModuleInfo &Info : R.Requires) {
615 Object RequiredInfo{{.K: "logical-name", .V: Info.ModuleName}};
616 if (!Info.SourcePath.empty())
617 RequiredInfo.insert(E: {.K: "source-path", .V: Info.SourcePath});
618 Requires.push_back(E: std::move(RequiredInfo));
619 }
620
621 if (!Requires.empty())
622 O.insert(E: {.K: "requires", .V: std::move(Requires)});
623
624 OutputRules.push_back(E: std::move(O));
625 }
626
627 Object Output{
628 {.K: "version", .V: 1}, {.K: "revision", .V: 0}, {.K: "rules", .V: std::move(OutputRules)}};
629
630 OS << llvm::formatv(Fmt: "{0:2}\n", Vals: Value(std::move(Output)));
631 }
632
633 void addRules(P1689Rule &Rule) {
634 std::unique_lock<std::mutex> LockGuard(Lock);
635 Rules.push_back(x: Rule);
636 }
637
638private:
639 void addSourcePathsToRequires() {
640 llvm::DenseMap<StringRef, StringRef> ModuleSourceMapper;
641 for (const P1689Rule &R : Rules)
642 if (R.Provides && !R.Provides->SourcePath.empty())
643 ModuleSourceMapper[R.Provides->ModuleName] = R.Provides->SourcePath;
644
645 for (P1689Rule &R : Rules) {
646 for (P1689ModuleInfo &Info : R.Requires) {
647 auto Iter = ModuleSourceMapper.find(Val: Info.ModuleName);
648 if (Iter != ModuleSourceMapper.end())
649 Info.SourcePath = Iter->second;
650 }
651 }
652 }
653
654 std::mutex Lock;
655 std::vector<P1689Rule> Rules;
656};
657
658static bool
659handleP1689DependencyToolResult(const std::string &Input,
660 llvm::Expected<P1689Rule> &MaybeRule,
661 P1689Deps &PD, SharedStream &Errs) {
662 if (!MaybeRule) {
663 llvm::handleAllErrors(
664 E: MaybeRule.takeError(), Handlers: [&Input, &Errs](llvm::StringError &Err) {
665 Errs.applyLocked(Fn: [&](raw_ostream &OS) {
666 OS << "Error while scanning dependencies for " << Input << ":\n";
667 OS << Err.getMessage();
668 });
669 });
670 return true;
671 }
672 PD.addRules(Rule&: *MaybeRule);
673 return false;
674}
675
676/// Construct a path for the explicitly built PCM.
677static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) {
678 SmallString<256> ExplicitPCMPath(OutputDir);
679 llvm::sys::path::append(path&: ExplicitPCMPath, a: MID.ContextHash,
680 b: MID.ModuleName + "-" + MID.ContextHash + ".pcm");
681 return std::string(ExplicitPCMPath);
682}
683
684static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK,
685 StringRef OutputDir) {
686 std::string PCMPath = constructPCMPath(MID, OutputDir);
687 switch (MOK) {
688 case ModuleOutputKind::ModuleFile:
689 return PCMPath;
690 case ModuleOutputKind::DependencyFile:
691 return PCMPath + ".d";
692 case ModuleOutputKind::DependencyTargets:
693 // Null-separate the list of targets.
694 return join(R&: ModuleDepTargets, Separator: StringRef("\0", 1));
695 case ModuleOutputKind::DiagnosticSerializationFile:
696 return PCMPath + ".diag";
697 }
698 llvm_unreachable("Fully covered switch above!");
699}
700
701static std::string getModuleCachePath(ArrayRef<std::string> Args) {
702 for (StringRef Arg : llvm::reverse(C&: Args)) {
703 Arg.consume_front(Prefix: "/clang:");
704 if (Arg.consume_front(Prefix: "-fmodules-cache-path="))
705 return std::string(Arg);
706 }
707 SmallString<128> Path;
708 driver::Driver::getDefaultModuleCachePath(Result&: Path);
709 return std::string(Path);
710}
711
712/// Attempts to construct the compilation database from '-compilation-database'
713/// or from the arguments following the positional '--'.
714static std::unique_ptr<tooling::CompilationDatabase>
715getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) {
716 ParseArgs(argc, argv);
717
718 if (!(CommandLine.empty() ^ CompilationDB.empty())) {
719 llvm::errs() << "The compilation command line must be provided either via "
720 "'-compilation-database' or after '--'.";
721 return nullptr;
722 }
723
724 if (!CompilationDB.empty())
725 return tooling::JSONCompilationDatabase::loadFromFile(
726 FilePath: CompilationDB, ErrorMessage,
727 Syntax: tooling::JSONCommandLineSyntax::AutoDetect);
728
729 llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
730 CompilerInstance::createDiagnostics(Opts: new DiagnosticOptions);
731 driver::Driver TheDriver(CommandLine[0], llvm::sys::getDefaultTargetTriple(),
732 *Diags);
733 TheDriver.setCheckInputsExist(false);
734 std::unique_ptr<driver::Compilation> C(
735 TheDriver.BuildCompilation(Args: CommandLine));
736 if (!C || C->getJobs().empty())
737 return nullptr;
738
739 auto Cmd = C->getJobs().begin();
740 auto CI = std::make_unique<CompilerInvocation>();
741 CompilerInvocation::CreateFromArgs(Res&: *CI, CommandLineArgs: Cmd->getArguments(), Diags&: *Diags,
742 Argv0: CommandLine[0]);
743 if (!CI)
744 return nullptr;
745
746 FrontendOptions &FEOpts = CI->getFrontendOpts();
747 if (FEOpts.Inputs.size() != 1) {
748 llvm::errs()
749 << "Exactly one input file is required in the per-file mode ('--').\n";
750 return nullptr;
751 }
752
753 // There might be multiple jobs for a compilation. Extract the specified
754 // output filename from the last job.
755 auto LastCmd = C->getJobs().end();
756 LastCmd--;
757 if (LastCmd->getOutputFilenames().size() != 1) {
758 llvm::errs()
759 << "Exactly one output file is required in the per-file mode ('--').\n";
760 return nullptr;
761 }
762 StringRef OutputFile = LastCmd->getOutputFilenames().front();
763
764 class InplaceCompilationDatabase : public tooling::CompilationDatabase {
765 public:
766 InplaceCompilationDatabase(StringRef InputFile, StringRef OutputFile,
767 ArrayRef<const char *> CommandLine)
768 : Command(".", InputFile, {}, OutputFile) {
769 for (auto *C : CommandLine)
770 Command.CommandLine.push_back(x: C);
771 }
772
773 std::vector<tooling::CompileCommand>
774 getCompileCommands(StringRef FilePath) const override {
775 if (FilePath != Command.Filename)
776 return {};
777 return {Command};
778 }
779
780 std::vector<std::string> getAllFiles() const override {
781 return {Command.Filename};
782 }
783
784 std::vector<tooling::CompileCommand>
785 getAllCompileCommands() const override {
786 return {Command};
787 }
788
789 private:
790 tooling::CompileCommand Command;
791 };
792
793 return std::make_unique<InplaceCompilationDatabase>(
794 args: FEOpts.Inputs[0].getFile(), args&: OutputFile, args&: CommandLine);
795}
796
797int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
798 std::string ErrorMessage;
799 std::unique_ptr<tooling::CompilationDatabase> Compilations =
800 getCompilationDatabase(argc, argv, ErrorMessage);
801 if (!Compilations) {
802 llvm::errs() << ErrorMessage << "\n";
803 return 1;
804 }
805
806 llvm::cl::PrintOptionValues();
807
808 // Expand response files in advance, so that we can "see" all the arguments
809 // when adjusting below.
810 Compilations = expandResponseFiles(Base: std::move(Compilations),
811 FS: llvm::vfs::getRealFileSystem());
812
813 // The command options are rewritten to run Clang in preprocessor only mode.
814 auto AdjustingCompilations =
815 std::make_unique<tooling::ArgumentsAdjustingCompilations>(
816 args: std::move(Compilations));
817 ResourceDirectoryCache ResourceDirCache;
818
819 AdjustingCompilations->appendArgumentsAdjuster(
820 Adjuster: [&ResourceDirCache](const tooling::CommandLineArguments &Args,
821 StringRef FileName) {
822 std::string LastO;
823 bool HasResourceDir = false;
824 bool ClangCLMode = false;
825 auto FlagsEnd = llvm::find(Range: Args, Val: "--");
826 if (FlagsEnd != Args.begin()) {
827 ClangCLMode =
828 llvm::sys::path::stem(path: Args[0]).contains_insensitive(Other: "clang-cl") ||
829 llvm::is_contained(Range: Args, Element: "--driver-mode=cl");
830
831 // Reverse scan, starting at the end or at the element before "--".
832 auto R = std::make_reverse_iterator(i: FlagsEnd);
833 for (auto I = R, E = Args.rend(); I != E; ++I) {
834 StringRef Arg = *I;
835 if (ClangCLMode) {
836 // Ignore arguments that are preceded by "-Xclang".
837 if ((I + 1) != E && I[1] == "-Xclang")
838 continue;
839 if (LastO.empty()) {
840 // With clang-cl, the output obj file can be specified with
841 // "/opath", "/o path", "/Fopath", and the dash counterparts.
842 // Also, clang-cl adds ".obj" extension if none is found.
843 if ((Arg == "-o" || Arg == "/o") && I != R)
844 LastO = I[-1]; // Next argument (reverse iterator)
845 else if (Arg.starts_with(Prefix: "/Fo") || Arg.starts_with(Prefix: "-Fo"))
846 LastO = Arg.drop_front(N: 3).str();
847 else if (Arg.starts_with(Prefix: "/o") || Arg.starts_with(Prefix: "-o"))
848 LastO = Arg.drop_front(N: 2).str();
849
850 if (!LastO.empty() && !llvm::sys::path::has_extension(path: LastO))
851 LastO.append(s: ".obj");
852 }
853 }
854 if (Arg == "-resource-dir")
855 HasResourceDir = true;
856 }
857 }
858 tooling::CommandLineArguments AdjustedArgs(Args.begin(), FlagsEnd);
859 // The clang-cl driver passes "-o -" to the frontend. Inject the real
860 // file here to ensure "-MT" can be deduced if need be.
861 if (ClangCLMode && !LastO.empty()) {
862 AdjustedArgs.push_back(x: "/clang:-o");
863 AdjustedArgs.push_back(x: "/clang:" + LastO);
864 }
865
866 if (!HasResourceDir && ResourceDirRecipe == RDRK_InvokeCompiler) {
867 StringRef ResourceDir =
868 ResourceDirCache.findResourceDir(Args, ClangCLMode);
869 if (!ResourceDir.empty()) {
870 AdjustedArgs.push_back(x: "-resource-dir");
871 AdjustedArgs.push_back(x: std::string(ResourceDir));
872 }
873 }
874 AdjustedArgs.insert(position: AdjustedArgs.end(), first: FlagsEnd, last: Args.end());
875 return AdjustedArgs;
876 });
877
878 SharedStream Errs(llvm::errs());
879
880 std::optional<llvm::raw_fd_ostream> FileOS;
881 llvm::raw_ostream &ThreadUnsafeDependencyOS = [&]() -> llvm::raw_ostream & {
882 if (OutputFileName == "-")
883 return llvm::outs();
884
885 if (OutputFileName == "/dev/null")
886 return llvm::nulls();
887
888 std::error_code EC;
889 FileOS.emplace(args&: OutputFileName, args&: EC);
890 if (EC) {
891 llvm::errs() << "Failed to open output file '" << OutputFileName
892 << "': " << llvm::errorCodeToError(EC) << '\n';
893 std::exit(status: 1);
894 }
895 return *FileOS;
896 }();
897 SharedStream DependencyOS(ThreadUnsafeDependencyOS);
898
899 std::vector<tooling::CompileCommand> Inputs =
900 AdjustingCompilations->getAllCompileCommands();
901
902 std::atomic<bool> HadErrors(false);
903 std::optional<FullDeps> FD;
904 P1689Deps PD;
905
906 std::mutex Lock;
907 size_t Index = 0;
908 auto GetNextInputIndex = [&]() -> std::optional<size_t> {
909 std::unique_lock<std::mutex> LockGuard(Lock);
910 if (Index < Inputs.size())
911 return Index++;
912 return {};
913 };
914
915 if (Format == ScanningOutputFormat::Full)
916 FD.emplace(args: ModuleName.empty() ? Inputs.size() : 0);
917
918 auto ScanningTask = [&](DependencyScanningService &Service) {
919 DependencyScanningTool WorkerTool(Service);
920
921 llvm::DenseSet<ModuleID> AlreadySeenModules;
922 while (auto MaybeInputIndex = GetNextInputIndex()) {
923 size_t LocalIndex = *MaybeInputIndex;
924 const tooling::CompileCommand *Input = &Inputs[LocalIndex];
925 std::string Filename = std::move(Input->Filename);
926 std::string CWD = std::move(Input->Directory);
927
928 std::optional<StringRef> MaybeModuleName;
929 if (!ModuleName.empty())
930 MaybeModuleName = ModuleName;
931
932 std::string OutputDir(ModuleFilesDir);
933 if (OutputDir.empty())
934 OutputDir = getModuleCachePath(Args: Input->CommandLine);
935 auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
936 return ::lookupModuleOutput(MID, MOK, OutputDir);
937 };
938
939 // Run the tool on it.
940 if (Format == ScanningOutputFormat::Make) {
941 auto MaybeFile = WorkerTool.getDependencyFile(CommandLine: Input->CommandLine, CWD);
942 if (handleMakeDependencyToolResult(Input: Filename, MaybeFile, OS&: DependencyOS,
943 Errs))
944 HadErrors = true;
945 } else if (Format == ScanningOutputFormat::P1689) {
946 // It is useful to generate the make-format dependency output during
947 // the scanning for P1689. Otherwise the users need to scan again for
948 // it. We will generate the make-format dependency output if we find
949 // `-MF` in the command lines.
950 std::string MakeformatOutputPath;
951 std::string MakeformatOutput;
952
953 auto MaybeRule = WorkerTool.getP1689ModuleDependencyFile(
954 Command: *Input, CWD, MakeformatOutput, MakeformatOutputPath);
955
956 if (handleP1689DependencyToolResult(Input: Filename, MaybeRule, PD, Errs))
957 HadErrors = true;
958
959 if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
960 !HadErrors) {
961 static std::mutex Lock;
962 // With compilation database, we may open different files
963 // concurrently or we may write the same file concurrently. So we
964 // use a map here to allow multiple compile commands to write to the
965 // same file. Also we need a lock here to avoid data race.
966 static llvm::StringMap<llvm::raw_fd_ostream> OSs;
967 std::unique_lock<std::mutex> LockGuard(Lock);
968
969 auto OSIter = OSs.find(Key: MakeformatOutputPath);
970 if (OSIter == OSs.end()) {
971 std::error_code EC;
972 OSIter =
973 OSs.try_emplace(Key: MakeformatOutputPath, Args&: MakeformatOutputPath, Args&: EC)
974 .first;
975 if (EC)
976 llvm::errs() << "Failed to open P1689 make format output file \""
977 << MakeformatOutputPath << "\" for " << EC.message()
978 << "\n";
979 }
980
981 SharedStream MakeformatOS(OSIter->second);
982 llvm::Expected<std::string> MaybeOutput(MakeformatOutput);
983 if (handleMakeDependencyToolResult(Input: Filename, MaybeFile&: MaybeOutput,
984 OS&: MakeformatOS, Errs))
985 HadErrors = true;
986 }
987 } else if (MaybeModuleName) {
988 auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
989 ModuleName: *MaybeModuleName, CommandLine: Input->CommandLine, CWD, AlreadySeen: AlreadySeenModules,
990 LookupModuleOutput: LookupOutput);
991 if (handleModuleResult(ModuleName: *MaybeModuleName, MaybeModuleGraph&: MaybeModuleDepsGraph, FD&: *FD,
992 InputIndex: LocalIndex, OS&: DependencyOS, Errs))
993 HadErrors = true;
994 } else {
995 auto MaybeTUDeps = WorkerTool.getTranslationUnitDependencies(
996 CommandLine: Input->CommandLine, CWD, AlreadySeen: AlreadySeenModules, LookupModuleOutput: LookupOutput);
997 if (handleTranslationUnitResult(Input: Filename, MaybeTUDeps, FD&: *FD, InputIndex: LocalIndex,
998 OS&: DependencyOS, Errs))
999 HadErrors = true;
1000 }
1001 }
1002 };
1003
1004 DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
1005 EagerLoadModules);
1006
1007 llvm::Timer T;
1008 T.startTimer();
1009
1010 if (Inputs.size() == 1) {
1011 ScanningTask(Service);
1012 } else {
1013 llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(ThreadCount: NumThreads));
1014
1015 if (Verbose) {
1016 llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
1017 << " files using " << Pool.getMaxConcurrency()
1018 << " workers\n";
1019 }
1020
1021 for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
1022 Pool.async(F: [ScanningTask, &Service]() { ScanningTask(Service); });
1023
1024 Pool.wait();
1025 }
1026
1027 T.stopTimer();
1028 if (PrintTiming)
1029 llvm::errs() << llvm::format(
1030 Fmt: "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n",
1031 Vals: T.getTotalTime().getWallTime(), Vals: T.getTotalTime().getProcessTime());
1032
1033 if (RoundTripArgs)
1034 if (FD && FD->roundTripCommands(ErrOS&: llvm::errs()))
1035 HadErrors = true;
1036
1037 if (Format == ScanningOutputFormat::Full)
1038 FD->printFullOutput(OS&: ThreadUnsafeDependencyOS);
1039 else if (Format == ScanningOutputFormat::P1689)
1040 PD.printDependencies(OS&: ThreadUnsafeDependencyOS);
1041
1042 return HadErrors;
1043}
1044