1//===- CompilationDatabase.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains implementations of the CompilationDatabase base class
10// and the FixedCompilationDatabase.
11//
12// FIXME: Various functions that take a string &ErrorMessage should be upgraded
13// to Expected.
14//
15//===----------------------------------------------------------------------===//
16
17#include "clang/Tooling/CompilationDatabase.h"
18#include "clang/Basic/Diagnostic.h"
19#include "clang/Basic/DiagnosticIDs.h"
20#include "clang/Basic/DiagnosticOptions.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/Driver/Action.h"
23#include "clang/Driver/Compilation.h"
24#include "clang/Driver/Driver.h"
25#include "clang/Driver/Job.h"
26#include "clang/Frontend/TextDiagnosticPrinter.h"
27#include "clang/Tooling/CompilationDatabasePluginRegistry.h"
28#include "clang/Tooling/Tooling.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Option/Arg.h"
32#include "llvm/Support/Compiler.h"
33#include "llvm/Support/ErrorOr.h"
34#include "llvm/Support/LineIterator.h"
35#include "llvm/Support/MemoryBuffer.h"
36#include "llvm/Support/Path.h"
37#include "llvm/Support/raw_ostream.h"
38#include "llvm/TargetParser/Host.h"
39#include <algorithm>
40#include <cassert>
41#include <cstring>
42#include <iterator>
43#include <memory>
44#include <sstream>
45#include <string>
46#include <system_error>
47#include <utility>
48#include <vector>
49
50using namespace clang;
51using namespace tooling;
52
53LLVM_INSTANTIATE_REGISTRY(CompilationDatabasePluginRegistry)
54
55CompilationDatabase::~CompilationDatabase() = default;
56
57std::unique_ptr<CompilationDatabase>
58CompilationDatabase::loadFromDirectory(StringRef BuildDirectory,
59 std::string &ErrorMessage) {
60 llvm::raw_string_ostream ErrorStream(ErrorMessage);
61 for (const CompilationDatabasePluginRegistry::entry &Database :
62 CompilationDatabasePluginRegistry::entries()) {
63 std::string DatabaseErrorMessage;
64 std::unique_ptr<CompilationDatabasePlugin> Plugin(Database.instantiate());
65 if (std::unique_ptr<CompilationDatabase> DB =
66 Plugin->loadFromDirectory(Directory: BuildDirectory, ErrorMessage&: DatabaseErrorMessage))
67 return DB;
68 ErrorStream << Database.getName() << ": " << DatabaseErrorMessage << "\n";
69 }
70 return nullptr;
71}
72
73static std::unique_ptr<CompilationDatabase>
74findCompilationDatabaseFromDirectory(StringRef Directory,
75 std::string &ErrorMessage) {
76 std::stringstream ErrorStream;
77 bool HasErrorMessage = false;
78 while (!Directory.empty()) {
79 std::string LoadErrorMessage;
80
81 if (std::unique_ptr<CompilationDatabase> DB =
82 CompilationDatabase::loadFromDirectory(BuildDirectory: Directory, ErrorMessage&: LoadErrorMessage))
83 return DB;
84
85 if (!HasErrorMessage) {
86 ErrorStream << "No compilation database found in " << Directory.str()
87 << " or any parent directory\n" << LoadErrorMessage;
88 HasErrorMessage = true;
89 }
90
91 Directory = llvm::sys::path::parent_path(path: Directory);
92 }
93 ErrorMessage = ErrorStream.str();
94 return nullptr;
95}
96
97std::unique_ptr<CompilationDatabase>
98CompilationDatabase::autoDetectFromSource(StringRef SourceFile,
99 std::string &ErrorMessage) {
100 SmallString<1024> AbsolutePath(getAbsolutePath(File: SourceFile));
101 StringRef Directory = llvm::sys::path::parent_path(path: AbsolutePath);
102
103 std::unique_ptr<CompilationDatabase> DB =
104 findCompilationDatabaseFromDirectory(Directory, ErrorMessage);
105
106 if (!DB)
107 ErrorMessage = ("Could not auto-detect compilation database for file \"" +
108 SourceFile + "\"\n" + ErrorMessage).str();
109 return DB;
110}
111
112std::unique_ptr<CompilationDatabase>
113CompilationDatabase::autoDetectFromDirectory(StringRef SourceDir,
114 std::string &ErrorMessage) {
115 SmallString<1024> AbsolutePath(getAbsolutePath(File: SourceDir));
116
117 std::unique_ptr<CompilationDatabase> DB =
118 findCompilationDatabaseFromDirectory(Directory: AbsolutePath, ErrorMessage);
119
120 if (!DB)
121 ErrorMessage = ("Could not auto-detect compilation database from directory \"" +
122 SourceDir + "\"\n" + ErrorMessage).str();
123 return DB;
124}
125
126std::vector<CompileCommand> CompilationDatabase::getAllCompileCommands() const {
127 std::vector<CompileCommand> Result;
128 for (const auto &File : getAllFiles()) {
129 auto C = getCompileCommands(FilePath: File);
130 std::move(first: C.begin(), last: C.end(), result: std::back_inserter(x&: Result));
131 }
132 return Result;
133}
134
135CompilationDatabasePlugin::~CompilationDatabasePlugin() = default;
136
137namespace {
138
139// Helper for recursively searching through a chain of actions and collecting
140// all inputs, direct and indirect, of compile jobs.
141struct CompileJobAnalyzer {
142 SmallVector<std::string, 2> Inputs;
143
144 void run(const driver::Action *A) {
145 runImpl(A, Collect: false);
146 }
147
148private:
149 void runImpl(const driver::Action *A, bool Collect) {
150 bool CollectChildren = Collect;
151 switch (A->getKind()) {
152 case driver::Action::CompileJobClass:
153 case driver::Action::PrecompileJobClass:
154 CollectChildren = true;
155 break;
156
157 case driver::Action::InputClass:
158 if (Collect) {
159 const auto *IA = cast<driver::InputAction>(Val: A);
160 Inputs.push_back(Elt: std::string(IA->getInputArg().getSpelling()));
161 }
162 break;
163
164 default:
165 // Don't care about others
166 break;
167 }
168
169 for (const driver::Action *AI : A->inputs())
170 runImpl(A: AI, Collect: CollectChildren);
171 }
172};
173
174// Special DiagnosticConsumer that looks for warn_drv_input_file_unused
175// diagnostics from the driver and collects the option strings for those unused
176// options.
177class UnusedInputDiagConsumer : public DiagnosticConsumer {
178public:
179 UnusedInputDiagConsumer(DiagnosticConsumer &Other) : Other(Other) {}
180
181 void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
182 const Diagnostic &Info) override {
183 if (Info.getID() == diag::warn_drv_input_file_unused) {
184 // Arg 1 for this diagnostic is the option that didn't get used.
185 UnusedInputs.push_back(Elt: Info.getArgStdStr(Idx: 0));
186 } else if (DiagLevel >= DiagnosticsEngine::Error) {
187 // If driver failed to create compilation object, show the diagnostics
188 // to user.
189 Other.HandleDiagnostic(DiagLevel, Info);
190 }
191 }
192
193 DiagnosticConsumer &Other;
194 SmallVector<std::string, 2> UnusedInputs;
195};
196
197// Filter of tools unused flags such as -no-integrated-as and -Wa,*.
198// They are not used for syntax checking, and could confuse targets
199// which don't support these options.
200struct FilterUnusedFlags {
201 bool operator() (StringRef S) {
202 return (S == "-no-integrated-as") || S.starts_with(Prefix: "-Wa,");
203 }
204};
205
206std::string GetClangToolCommand() {
207 static int Dummy;
208 std::string ClangExecutable =
209 llvm::sys::fs::getMainExecutable(argv0: "clang", MainExecAddr: (void *)&Dummy);
210 SmallString<128> ClangToolPath;
211 ClangToolPath = llvm::sys::path::parent_path(path: ClangExecutable);
212 llvm::sys::path::append(path&: ClangToolPath, a: "clang-tool");
213 return std::string(ClangToolPath);
214}
215
216} // namespace
217
218/// Strips any positional args and possible argv[0] from a command-line
219/// provided by the user to construct a FixedCompilationDatabase.
220///
221/// FixedCompilationDatabase requires a command line to be in this format as it
222/// constructs the command line for each file by appending the name of the file
223/// to be compiled. FixedCompilationDatabase also adds its own argv[0] to the
224/// start of the command line although its value is not important as it's just
225/// ignored by the Driver invoked by the ClangTool using the
226/// FixedCompilationDatabase.
227///
228/// FIXME: This functionality should probably be made available by
229/// clang::driver::Driver although what the interface should look like is not
230/// clear.
231///
232/// \param[in] Args Args as provided by the user.
233/// \return Resulting stripped command line.
234/// \li true if successful.
235/// \li false if \c Args cannot be used for compilation jobs (e.g.
236/// contains an option like -E or -version).
237static bool stripPositionalArgs(std::vector<const char *> Args,
238 std::vector<std::string> &Result,
239 std::string &ErrorMsg) {
240 DiagnosticOptions DiagOpts;
241 llvm::raw_string_ostream Output(ErrorMsg);
242 TextDiagnosticPrinter DiagnosticPrinter(Output, DiagOpts);
243 UnusedInputDiagConsumer DiagClient(DiagnosticPrinter);
244 DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts, &DiagClient,
245 false);
246
247 // The clang executable path isn't required since the jobs the driver builds
248 // will not be executed.
249 std::unique_ptr<driver::Driver> NewDriver(new driver::Driver(
250 /* ClangExecutable= */ "", llvm::sys::getDefaultTargetTriple(),
251 Diagnostics));
252 NewDriver->setCheckInputsExist(false);
253
254 // This becomes the new argv[0]. The value is used to detect libc++ include
255 // dirs on Mac, it isn't used for other platforms.
256 std::string Argv0 = GetClangToolCommand();
257 Args.insert(position: Args.begin(), x: Argv0.c_str());
258
259 // By adding -c, we force the driver to treat compilation as the last phase.
260 // It will then issue warnings via Diagnostics about un-used options that
261 // would have been used for linking. If the user provided a compiler name as
262 // the original argv[0], this will be treated as a linker input thanks to
263 // insertng a new argv[0] above. All un-used options get collected by
264 // UnusedInputdiagConsumer and get stripped out later.
265 Args.push_back(x: "-c");
266
267 // Put a dummy C++ file on to ensure there's at least one compile job for the
268 // driver to construct. If the user specified some other argument that
269 // prevents compilation, e.g. -E or something like -version, we may still end
270 // up with no jobs but then this is the user's fault.
271 Args.push_back(x: "placeholder.cpp");
272
273 llvm::erase_if(C&: Args, P: FilterUnusedFlags());
274
275 const std::unique_ptr<driver::Compilation> Compilation(
276 NewDriver->BuildCompilation(Args));
277 if (!Compilation)
278 return false;
279
280 const driver::JobList &Jobs = Compilation->getJobs();
281
282 CompileJobAnalyzer CompileAnalyzer;
283
284 for (const auto &Cmd : Jobs) {
285 // Collect only for Assemble, Backend, and Compile jobs. If we do all jobs
286 // we get duplicates since Link jobs point to Assemble jobs as inputs.
287 // -flto* flags make the BackendJobClass, which still needs analyzer.
288 if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass ||
289 Cmd.getSource().getKind() == driver::Action::BackendJobClass ||
290 Cmd.getSource().getKind() == driver::Action::CompileJobClass ||
291 Cmd.getSource().getKind() == driver::Action::PrecompileJobClass) {
292 CompileAnalyzer.run(A: &Cmd.getSource());
293 }
294 }
295
296 if (CompileAnalyzer.Inputs.empty()) {
297 ErrorMsg = "warning: no compile jobs found\n";
298 return false;
299 }
300
301 // Remove all compilation input files from the command line and inputs deemed
302 // unused for compilation. This is necessary so that getCompileCommands() can
303 // construct a command line for each file.
304 std::vector<const char *>::iterator End =
305 llvm::remove_if(Range&: Args, P: [&](StringRef S) {
306 return llvm::is_contained(Range&: CompileAnalyzer.Inputs, Element: S) ||
307 llvm::is_contained(Range&: DiagClient.UnusedInputs, Element: S);
308 });
309 // Remove the -c add above as well. It will be at the end right now.
310 assert(strcmp(*(End - 1), "-c") == 0);
311 --End;
312
313 Result = std::vector<std::string>(Args.begin() + 1, End);
314 return true;
315}
316
317std::unique_ptr<FixedCompilationDatabase>
318FixedCompilationDatabase::loadFromCommandLine(int &Argc,
319 const char *const *Argv,
320 std::string &ErrorMsg,
321 const Twine &Directory) {
322 ErrorMsg.clear();
323 if (Argc == 0)
324 return nullptr;
325 const char *const *DoubleDash = std::find(first: Argv, last: Argv + Argc, val: StringRef("--"));
326 if (DoubleDash == Argv + Argc)
327 return nullptr;
328 std::vector<const char *> CommandLine(DoubleDash + 1, Argv + Argc);
329 Argc = DoubleDash - Argv;
330
331 std::vector<std::string> StrippedArgs;
332 if (!stripPositionalArgs(Args: CommandLine, Result&: StrippedArgs, ErrorMsg))
333 return nullptr;
334 return std::make_unique<FixedCompilationDatabase>(args: Directory, args&: StrippedArgs);
335}
336
337std::unique_ptr<FixedCompilationDatabase>
338FixedCompilationDatabase::loadFromFile(StringRef Path, std::string &ErrorMsg) {
339 ErrorMsg.clear();
340 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
341 llvm::MemoryBuffer::getFile(Filename: Path);
342 if (std::error_code Result = File.getError()) {
343 ErrorMsg = "Error while opening fixed database: " + Result.message();
344 return nullptr;
345 }
346 return loadFromBuffer(Directory: llvm::sys::path::parent_path(path: Path),
347 Data: (*File)->getBuffer(), ErrorMsg);
348}
349
350std::unique_ptr<FixedCompilationDatabase>
351FixedCompilationDatabase::loadFromBuffer(StringRef Directory, StringRef Data,
352 std::string &ErrorMsg) {
353 ErrorMsg.clear();
354 std::vector<std::string> Args;
355 StringRef Line;
356 while (!Data.empty()) {
357 std::tie(args&: Line, args&: Data) = Data.split(Separator: '\n');
358 // Stray whitespace is almost certainly unintended.
359 Line = Line.trim();
360 if (!Line.empty())
361 Args.push_back(x: Line.str());
362 }
363 return std::make_unique<FixedCompilationDatabase>(args&: Directory, args: std::move(Args));
364}
365
366FixedCompilationDatabase::FixedCompilationDatabase(
367 const Twine &Directory, ArrayRef<std::string> CommandLine) {
368 std::vector<std::string> ToolCommandLine(1, GetClangToolCommand());
369 ToolCommandLine.insert(position: ToolCommandLine.end(),
370 first: CommandLine.begin(), last: CommandLine.end());
371 CompileCommands.emplace_back(args: Directory, args: StringRef(),
372 args: std::move(ToolCommandLine),
373 args: StringRef());
374}
375
376std::vector<CompileCommand>
377FixedCompilationDatabase::getCompileCommands(StringRef FilePath) const {
378 std::vector<CompileCommand> Result(CompileCommands);
379 Result[0].CommandLine.push_back(x: std::string(FilePath));
380 Result[0].Filename = std::string(FilePath);
381 return Result;
382}
383
384namespace {
385
386class FixedCompilationDatabasePlugin : public CompilationDatabasePlugin {
387 std::unique_ptr<CompilationDatabase>
388 loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
389 SmallString<1024> DatabasePath(Directory);
390 llvm::sys::path::append(path&: DatabasePath, a: "compile_flags.txt");
391 return FixedCompilationDatabase::loadFromFile(Path: DatabasePath, ErrorMsg&: ErrorMessage);
392 }
393};
394
395} // namespace
396
397static CompilationDatabasePluginRegistry::Add<FixedCompilationDatabasePlugin>
398X("fixed-compilation-database", "Reads plain-text flags file");
399
400namespace clang {
401namespace tooling {
402
403// This anchor is used to force the linker to link in the generated object file
404// and thus register the JSONCompilationDatabasePlugin.
405extern volatile int JSONAnchorSource;
406[[maybe_unused]] static int JSONAnchorDest = JSONAnchorSource;
407
408} // namespace tooling
409} // namespace clang
410