1//===- CompilationDatabase.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains implementations of the CompilationDatabase base class
10// and the FixedCompilationDatabase.
11//
12// FIXME: Various functions that take a string &ErrorMessage should be upgraded
13// to Expected.
14//
15//===----------------------------------------------------------------------===//
16
17#include "clang/Tooling/CompilationDatabase.h"
18#include "clang/Basic/Diagnostic.h"
19#include "clang/Basic/DiagnosticIDs.h"
20#include "clang/Basic/DiagnosticOptions.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/Driver/Action.h"
23#include "clang/Driver/Compilation.h"
24#include "clang/Driver/Driver.h"
25#include "clang/Driver/DriverDiagnostic.h"
26#include "clang/Driver/Job.h"
27#include "clang/Frontend/TextDiagnosticPrinter.h"
28#include "clang/Tooling/CompilationDatabasePluginRegistry.h"
29#include "clang/Tooling/Tooling.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/IntrusiveRefCntPtr.h"
32#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallString.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/Option/Arg.h"
37#include "llvm/Support/Casting.h"
38#include "llvm/Support/Compiler.h"
39#include "llvm/Support/ErrorOr.h"
40#include "llvm/Support/LineIterator.h"
41#include "llvm/Support/MemoryBuffer.h"
42#include "llvm/Support/Path.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/TargetParser/Host.h"
45#include <algorithm>
46#include <cassert>
47#include <cstring>
48#include <iterator>
49#include <memory>
50#include <sstream>
51#include <string>
52#include <system_error>
53#include <utility>
54#include <vector>
55
56using namespace clang;
57using namespace tooling;
58
59LLVM_INSTANTIATE_REGISTRY(CompilationDatabasePluginRegistry)
60
61CompilationDatabase::~CompilationDatabase() = default;
62
63std::unique_ptr<CompilationDatabase>
64CompilationDatabase::loadFromDirectory(StringRef BuildDirectory,
65 std::string &ErrorMessage) {
66 llvm::raw_string_ostream ErrorStream(ErrorMessage);
67 for (const CompilationDatabasePluginRegistry::entry &Database :
68 CompilationDatabasePluginRegistry::entries()) {
69 std::string DatabaseErrorMessage;
70 std::unique_ptr<CompilationDatabasePlugin> Plugin(Database.instantiate());
71 if (std::unique_ptr<CompilationDatabase> DB =
72 Plugin->loadFromDirectory(Directory: BuildDirectory, ErrorMessage&: DatabaseErrorMessage))
73 return DB;
74 ErrorStream << Database.getName() << ": " << DatabaseErrorMessage << "\n";
75 }
76 return nullptr;
77}
78
79static std::unique_ptr<CompilationDatabase>
80findCompilationDatabaseFromDirectory(StringRef Directory,
81 std::string &ErrorMessage) {
82 std::stringstream ErrorStream;
83 bool HasErrorMessage = false;
84 while (!Directory.empty()) {
85 std::string LoadErrorMessage;
86
87 if (std::unique_ptr<CompilationDatabase> DB =
88 CompilationDatabase::loadFromDirectory(BuildDirectory: Directory, ErrorMessage&: LoadErrorMessage))
89 return DB;
90
91 if (!HasErrorMessage) {
92 ErrorStream << "No compilation database found in " << Directory.str()
93 << " or any parent directory\n" << LoadErrorMessage;
94 HasErrorMessage = true;
95 }
96
97 Directory = llvm::sys::path::parent_path(path: Directory);
98 }
99 ErrorMessage = ErrorStream.str();
100 return nullptr;
101}
102
103std::unique_ptr<CompilationDatabase>
104CompilationDatabase::autoDetectFromSource(StringRef SourceFile,
105 std::string &ErrorMessage) {
106 SmallString<1024> AbsolutePath(getAbsolutePath(File: SourceFile));
107 StringRef Directory = llvm::sys::path::parent_path(path: AbsolutePath);
108
109 std::unique_ptr<CompilationDatabase> DB =
110 findCompilationDatabaseFromDirectory(Directory, ErrorMessage);
111
112 if (!DB)
113 ErrorMessage = ("Could not auto-detect compilation database for file \"" +
114 SourceFile + "\"\n" + ErrorMessage).str();
115 return DB;
116}
117
118std::unique_ptr<CompilationDatabase>
119CompilationDatabase::autoDetectFromDirectory(StringRef SourceDir,
120 std::string &ErrorMessage) {
121 SmallString<1024> AbsolutePath(getAbsolutePath(File: SourceDir));
122
123 std::unique_ptr<CompilationDatabase> DB =
124 findCompilationDatabaseFromDirectory(Directory: AbsolutePath, ErrorMessage);
125
126 if (!DB)
127 ErrorMessage = ("Could not auto-detect compilation database from directory \"" +
128 SourceDir + "\"\n" + ErrorMessage).str();
129 return DB;
130}
131
132std::vector<CompileCommand> CompilationDatabase::getAllCompileCommands() const {
133 std::vector<CompileCommand> Result;
134 for (const auto &File : getAllFiles()) {
135 auto C = getCompileCommands(FilePath: File);
136 std::move(first: C.begin(), last: C.end(), result: std::back_inserter(x&: Result));
137 }
138 return Result;
139}
140
141CompilationDatabasePlugin::~CompilationDatabasePlugin() = default;
142
143namespace {
144
145// Helper for recursively searching through a chain of actions and collecting
146// all inputs, direct and indirect, of compile jobs.
147struct CompileJobAnalyzer {
148 SmallVector<std::string, 2> Inputs;
149
150 void run(const driver::Action *A) {
151 runImpl(A, Collect: false);
152 }
153
154private:
155 void runImpl(const driver::Action *A, bool Collect) {
156 bool CollectChildren = Collect;
157 switch (A->getKind()) {
158 case driver::Action::CompileJobClass:
159 case driver::Action::PrecompileJobClass:
160 CollectChildren = true;
161 break;
162
163 case driver::Action::InputClass:
164 if (Collect) {
165 const auto *IA = cast<driver::InputAction>(Val: A);
166 Inputs.push_back(Elt: std::string(IA->getInputArg().getSpelling()));
167 }
168 break;
169
170 default:
171 // Don't care about others
172 break;
173 }
174
175 for (const driver::Action *AI : A->inputs())
176 runImpl(A: AI, Collect: CollectChildren);
177 }
178};
179
180// Special DiagnosticConsumer that looks for warn_drv_input_file_unused
181// diagnostics from the driver and collects the option strings for those unused
182// options.
183class UnusedInputDiagConsumer : public DiagnosticConsumer {
184public:
185 UnusedInputDiagConsumer(DiagnosticConsumer &Other) : Other(Other) {}
186
187 void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
188 const Diagnostic &Info) override {
189 if (Info.getID() == diag::warn_drv_input_file_unused) {
190 // Arg 1 for this diagnostic is the option that didn't get used.
191 UnusedInputs.push_back(Elt: Info.getArgStdStr(Idx: 0));
192 } else if (DiagLevel >= DiagnosticsEngine::Error) {
193 // If driver failed to create compilation object, show the diagnostics
194 // to user.
195 Other.HandleDiagnostic(DiagLevel, Info);
196 }
197 }
198
199 DiagnosticConsumer &Other;
200 SmallVector<std::string, 2> UnusedInputs;
201};
202
203// Filter of tools unused flags such as -no-integrated-as and -Wa,*.
204// They are not used for syntax checking, and could confuse targets
205// which don't support these options.
206struct FilterUnusedFlags {
207 bool operator() (StringRef S) {
208 return (S == "-no-integrated-as") || S.starts_with(Prefix: "-Wa,");
209 }
210};
211
212std::string GetClangToolCommand() {
213 static int Dummy;
214 std::string ClangExecutable =
215 llvm::sys::fs::getMainExecutable(argv0: "clang", MainExecAddr: (void *)&Dummy);
216 SmallString<128> ClangToolPath;
217 ClangToolPath = llvm::sys::path::parent_path(path: ClangExecutable);
218 llvm::sys::path::append(path&: ClangToolPath, a: "clang-tool");
219 return std::string(ClangToolPath);
220}
221
222} // namespace
223
224/// Strips any positional args and possible argv[0] from a command-line
225/// provided by the user to construct a FixedCompilationDatabase.
226///
227/// FixedCompilationDatabase requires a command line to be in this format as it
228/// constructs the command line for each file by appending the name of the file
229/// to be compiled. FixedCompilationDatabase also adds its own argv[0] to the
230/// start of the command line although its value is not important as it's just
231/// ignored by the Driver invoked by the ClangTool using the
232/// FixedCompilationDatabase.
233///
234/// FIXME: This functionality should probably be made available by
235/// clang::driver::Driver although what the interface should look like is not
236/// clear.
237///
238/// \param[in] Args Args as provided by the user.
239/// \return Resulting stripped command line.
240/// \li true if successful.
241/// \li false if \c Args cannot be used for compilation jobs (e.g.
242/// contains an option like -E or -version).
243static bool stripPositionalArgs(std::vector<const char *> Args,
244 std::vector<std::string> &Result,
245 std::string &ErrorMsg) {
246 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
247 llvm::raw_string_ostream Output(ErrorMsg);
248 TextDiagnosticPrinter DiagnosticPrinter(Output, &*DiagOpts);
249 UnusedInputDiagConsumer DiagClient(DiagnosticPrinter);
250 DiagnosticsEngine Diagnostics(
251 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()),
252 &*DiagOpts, &DiagClient, false);
253
254 // The clang executable path isn't required since the jobs the driver builds
255 // will not be executed.
256 std::unique_ptr<driver::Driver> NewDriver(new driver::Driver(
257 /* ClangExecutable= */ "", llvm::sys::getDefaultTargetTriple(),
258 Diagnostics));
259 NewDriver->setCheckInputsExist(false);
260
261 // This becomes the new argv[0]. The value is used to detect libc++ include
262 // dirs on Mac, it isn't used for other platforms.
263 std::string Argv0 = GetClangToolCommand();
264 Args.insert(position: Args.begin(), x: Argv0.c_str());
265
266 // By adding -c, we force the driver to treat compilation as the last phase.
267 // It will then issue warnings via Diagnostics about un-used options that
268 // would have been used for linking. If the user provided a compiler name as
269 // the original argv[0], this will be treated as a linker input thanks to
270 // insertng a new argv[0] above. All un-used options get collected by
271 // UnusedInputdiagConsumer and get stripped out later.
272 Args.push_back(x: "-c");
273
274 // Put a dummy C++ file on to ensure there's at least one compile job for the
275 // driver to construct. If the user specified some other argument that
276 // prevents compilation, e.g. -E or something like -version, we may still end
277 // up with no jobs but then this is the user's fault.
278 Args.push_back(x: "placeholder.cpp");
279
280 llvm::erase_if(C&: Args, P: FilterUnusedFlags());
281
282 const std::unique_ptr<driver::Compilation> Compilation(
283 NewDriver->BuildCompilation(Args));
284 if (!Compilation)
285 return false;
286
287 const driver::JobList &Jobs = Compilation->getJobs();
288
289 CompileJobAnalyzer CompileAnalyzer;
290
291 for (const auto &Cmd : Jobs) {
292 // Collect only for Assemble, Backend, and Compile jobs. If we do all jobs
293 // we get duplicates since Link jobs point to Assemble jobs as inputs.
294 // -flto* flags make the BackendJobClass, which still needs analyzer.
295 if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass ||
296 Cmd.getSource().getKind() == driver::Action::BackendJobClass ||
297 Cmd.getSource().getKind() == driver::Action::CompileJobClass ||
298 Cmd.getSource().getKind() == driver::Action::PrecompileJobClass) {
299 CompileAnalyzer.run(A: &Cmd.getSource());
300 }
301 }
302
303 if (CompileAnalyzer.Inputs.empty()) {
304 ErrorMsg = "warning: no compile jobs found\n";
305 return false;
306 }
307
308 // Remove all compilation input files from the command line and inputs deemed
309 // unused for compilation. This is necessary so that getCompileCommands() can
310 // construct a command line for each file.
311 std::vector<const char *>::iterator End =
312 llvm::remove_if(Range&: Args, P: [&](StringRef S) {
313 return llvm::is_contained(Range&: CompileAnalyzer.Inputs, Element: S) ||
314 llvm::is_contained(Range&: DiagClient.UnusedInputs, Element: S);
315 });
316 // Remove the -c add above as well. It will be at the end right now.
317 assert(strcmp(*(End - 1), "-c") == 0);
318 --End;
319
320 Result = std::vector<std::string>(Args.begin() + 1, End);
321 return true;
322}
323
324std::unique_ptr<FixedCompilationDatabase>
325FixedCompilationDatabase::loadFromCommandLine(int &Argc,
326 const char *const *Argv,
327 std::string &ErrorMsg,
328 const Twine &Directory) {
329 ErrorMsg.clear();
330 if (Argc == 0)
331 return nullptr;
332 const char *const *DoubleDash = std::find(first: Argv, last: Argv + Argc, val: StringRef("--"));
333 if (DoubleDash == Argv + Argc)
334 return nullptr;
335 std::vector<const char *> CommandLine(DoubleDash + 1, Argv + Argc);
336 Argc = DoubleDash - Argv;
337
338 std::vector<std::string> StrippedArgs;
339 if (!stripPositionalArgs(Args: CommandLine, Result&: StrippedArgs, ErrorMsg))
340 return nullptr;
341 return std::make_unique<FixedCompilationDatabase>(args: Directory, args&: StrippedArgs);
342}
343
344std::unique_ptr<FixedCompilationDatabase>
345FixedCompilationDatabase::loadFromFile(StringRef Path, std::string &ErrorMsg) {
346 ErrorMsg.clear();
347 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
348 llvm::MemoryBuffer::getFile(Filename: Path);
349 if (std::error_code Result = File.getError()) {
350 ErrorMsg = "Error while opening fixed database: " + Result.message();
351 return nullptr;
352 }
353 return loadFromBuffer(Directory: llvm::sys::path::parent_path(path: Path),
354 Data: (*File)->getBuffer(), ErrorMsg);
355}
356
357std::unique_ptr<FixedCompilationDatabase>
358FixedCompilationDatabase::loadFromBuffer(StringRef Directory, StringRef Data,
359 std::string &ErrorMsg) {
360 ErrorMsg.clear();
361 std::vector<std::string> Args;
362 StringRef Line;
363 while (!Data.empty()) {
364 std::tie(args&: Line, args&: Data) = Data.split(Separator: '\n');
365 // Stray whitespace is almost certainly unintended.
366 Line = Line.trim();
367 if (!Line.empty())
368 Args.push_back(x: Line.str());
369 }
370 return std::make_unique<FixedCompilationDatabase>(args&: Directory, args: std::move(Args));
371}
372
373FixedCompilationDatabase::FixedCompilationDatabase(
374 const Twine &Directory, ArrayRef<std::string> CommandLine) {
375 std::vector<std::string> ToolCommandLine(1, GetClangToolCommand());
376 ToolCommandLine.insert(position: ToolCommandLine.end(),
377 first: CommandLine.begin(), last: CommandLine.end());
378 CompileCommands.emplace_back(args: Directory, args: StringRef(),
379 args: std::move(ToolCommandLine),
380 args: StringRef());
381}
382
383std::vector<CompileCommand>
384FixedCompilationDatabase::getCompileCommands(StringRef FilePath) const {
385 std::vector<CompileCommand> Result(CompileCommands);
386 Result[0].CommandLine.push_back(x: std::string(FilePath));
387 Result[0].Filename = std::string(FilePath);
388 return Result;
389}
390
391namespace {
392
393class FixedCompilationDatabasePlugin : public CompilationDatabasePlugin {
394 std::unique_ptr<CompilationDatabase>
395 loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
396 SmallString<1024> DatabasePath(Directory);
397 llvm::sys::path::append(path&: DatabasePath, a: "compile_flags.txt");
398 return FixedCompilationDatabase::loadFromFile(Path: DatabasePath, ErrorMsg&: ErrorMessage);
399 }
400};
401
402} // namespace
403
404static CompilationDatabasePluginRegistry::Add<FixedCompilationDatabasePlugin>
405X("fixed-compilation-database", "Reads plain-text flags file");
406
407namespace clang {
408namespace tooling {
409
410// This anchor is used to force the linker to link in the generated object file
411// and thus register the JSONCompilationDatabasePlugin.
412extern volatile int JSONAnchorSource;
413static int LLVM_ATTRIBUTE_UNUSED JSONAnchorDest = JSONAnchorSource;
414
415} // namespace tooling
416} // namespace clang
417