1//===-- llvm-cgdata.cpp - LLVM CodeGen Data Tool --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// llvm-cgdata parses raw codegen data embedded in compiled binary files, and
10// merges them into a single .cgdata file. It can also inspect and maninuplate
11// a .cgdata file. This .cgdata can contain various codegen data like outlining
12// information, and it can be used to optimize the code in the subsequent build.
13//
14//===----------------------------------------------------------------------===//
15#include "llvm/ADT/StringRef.h"
16#include "llvm/CGData/CodeGenDataReader.h"
17#include "llvm/CGData/CodeGenDataWriter.h"
18#include "llvm/IR/LLVMContext.h"
19#include "llvm/Object/Archive.h"
20#include "llvm/Object/Binary.h"
21#include "llvm/Option/ArgList.h"
22#include "llvm/Option/Option.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/LLVMDriver.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/VirtualFileSystem.h"
27#include "llvm/Support/WithColor.h"
28#include "llvm/Support/raw_ostream.h"
29
30using namespace llvm;
31using namespace llvm::object;
32
33enum CGDataFormat {
34 Invalid,
35 Text,
36 Binary,
37};
38
39enum CGDataAction {
40 Convert,
41 Merge,
42 Show,
43};
44
45// Command-line option boilerplate.
46namespace {
47enum ID {
48 OPT_INVALID = 0, // This is not an option ID.
49#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
50#include "Opts.inc"
51#undef OPTION
52};
53
54#define OPTTABLE_STR_TABLE_CODE
55#include "Opts.inc"
56#undef OPTTABLE_STR_TABLE_CODE
57
58#define OPTTABLE_PREFIXES_TABLE_CODE
59#include "Opts.inc"
60#undef OPTTABLE_PREFIXES_TABLE_CODE
61
62using namespace llvm::opt;
63static constexpr opt::OptTable::Info InfoTable[] = {
64#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
65#include "Opts.inc"
66#undef OPTION
67};
68
69class CGDataOptTable : public opt::GenericOptTable {
70public:
71 CGDataOptTable()
72 : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
73};
74} // end anonymous namespace
75
76// Options
77static StringRef ToolName;
78static std::string OutputFilename = "-";
79static std::string Filename;
80static bool ShowCGDataVersion;
81static bool SkipTrim;
82static CGDataAction Action;
83static std::optional<CGDataFormat> OutputFormat;
84static std::vector<std::string> InputFilenames;
85
86static void exitWithError(Twine Message, StringRef Whence = "",
87 StringRef Hint = "") {
88 WithColor::error();
89 if (!Whence.empty())
90 errs() << Whence << ": ";
91 errs() << Message << "\n";
92 if (!Hint.empty())
93 WithColor::note() << Hint << "\n";
94 ::exit(status: 1);
95}
96
97static void exitWithError(Error E, StringRef Whence = "") {
98 if (E.isA<CGDataError>()) {
99 handleAllErrors(E: std::move(E), Handlers: [&](const CGDataError &IPE) {
100 exitWithError(Message: IPE.message(), Whence);
101 });
102 return;
103 }
104
105 exitWithError(Message: toString(E: std::move(E)), Whence);
106}
107
108static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
109 exitWithError(Message: EC.message(), Whence);
110}
111
112static int convert_main(int argc, const char *argv[]) {
113 std::error_code EC;
114 raw_fd_ostream OS(OutputFilename, EC,
115 OutputFormat == CGDataFormat::Text
116 ? sys::fs::OF_TextWithCRLF
117 : sys::fs::OF_None);
118 if (EC)
119 exitWithErrorCode(EC, Whence: OutputFilename);
120
121 auto FS = vfs::getRealFileSystem();
122 auto ReaderOrErr = CodeGenDataReader::create(Path: Filename, FS&: *FS);
123 if (Error E = ReaderOrErr.takeError())
124 exitWithError(E: std::move(E), Whence: Filename);
125
126 CodeGenDataWriter Writer;
127 auto Reader = ReaderOrErr->get();
128 if (Reader->hasOutlinedHashTree()) {
129 OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
130 Writer.addRecord(Record);
131 }
132 if (Reader->hasStableFunctionMap()) {
133 StableFunctionMapRecord Record(Reader->releaseStableFunctionMap());
134 Writer.addRecord(Record);
135 }
136
137 if (OutputFormat == CGDataFormat::Text) {
138 if (Error E = Writer.writeText(OS))
139 exitWithError(E: std::move(E));
140 } else {
141 if (Error E = Writer.write(OS))
142 exitWithError(E: std::move(E));
143 }
144
145 return 0;
146}
147
148static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
149 OutlinedHashTreeRecord &GlobalOutlineRecord,
150 StableFunctionMapRecord &GlobalFunctionMapRecord);
151
152static bool handleArchive(StringRef Filename, Archive &Arch,
153 OutlinedHashTreeRecord &GlobalOutlineRecord,
154 StableFunctionMapRecord &GlobalFunctionMapRecord) {
155 bool Result = true;
156 Error Err = Error::success();
157 for (const auto &Child : Arch.children(Err)) {
158 auto BuffOrErr = Child.getMemoryBufferRef();
159 if (Error E = BuffOrErr.takeError())
160 exitWithError(E: std::move(E), Whence: Filename);
161 auto NameOrErr = Child.getName();
162 if (Error E = NameOrErr.takeError())
163 exitWithError(E: std::move(E), Whence: Filename);
164 std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
165 Result &= handleBuffer(Filename: Name, Buffer: BuffOrErr.get(), GlobalOutlineRecord,
166 GlobalFunctionMapRecord);
167 }
168 if (Err)
169 exitWithError(E: std::move(Err), Whence: Filename);
170 return Result;
171}
172
173static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
174 OutlinedHashTreeRecord &GlobalOutlineRecord,
175 StableFunctionMapRecord &GlobalFunctionMapRecord) {
176 Expected<std::unique_ptr<object::Binary>> BinOrErr =
177 object::createBinary(Source: Buffer);
178 if (Error E = BinOrErr.takeError())
179 exitWithError(E: std::move(E), Whence: Filename);
180
181 bool Result = true;
182 if (auto *Obj = dyn_cast<ObjectFile>(Val: BinOrErr->get())) {
183 if (Error E = CodeGenDataReader::mergeFromObjectFile(
184 Obj, GlobalOutlineRecord, GlobalFunctionMapRecord))
185 exitWithError(E: std::move(E), Whence: Filename);
186 } else if (auto *Arch = dyn_cast<Archive>(Val: BinOrErr->get())) {
187 Result &= handleArchive(Filename, Arch&: *Arch, GlobalOutlineRecord,
188 GlobalFunctionMapRecord);
189 } else {
190 // TODO: Support for the MachO universal binary format.
191 errs() << "Error: unsupported binary file: " << Filename << "\n";
192 Result = false;
193 }
194
195 return Result;
196}
197
198static bool handleFile(StringRef Filename,
199 OutlinedHashTreeRecord &GlobalOutlineRecord,
200 StableFunctionMapRecord &GlobalFunctionMapRecord) {
201 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
202 MemoryBuffer::getFileOrSTDIN(Filename);
203 if (std::error_code EC = BuffOrErr.getError())
204 exitWithErrorCode(EC, Whence: Filename);
205 return handleBuffer(Filename, Buffer: *BuffOrErr.get(), GlobalOutlineRecord,
206 GlobalFunctionMapRecord);
207}
208
209static int merge_main(int argc, const char *argv[]) {
210 bool Result = true;
211 OutlinedHashTreeRecord GlobalOutlineRecord;
212 StableFunctionMapRecord GlobalFunctionMapRecord;
213 for (auto &Filename : InputFilenames)
214 Result &=
215 handleFile(Filename, GlobalOutlineRecord, GlobalFunctionMapRecord);
216
217 if (!Result)
218 exitWithError(Message: "failed to merge codegen data files.");
219
220 GlobalFunctionMapRecord.finalize(SkipTrim);
221
222 CodeGenDataWriter Writer;
223 if (!GlobalOutlineRecord.empty())
224 Writer.addRecord(Record&: GlobalOutlineRecord);
225 if (!GlobalFunctionMapRecord.empty())
226 Writer.addRecord(Record&: GlobalFunctionMapRecord);
227
228 std::error_code EC;
229 raw_fd_ostream OS(OutputFilename, EC,
230 OutputFormat == CGDataFormat::Text
231 ? sys::fs::OF_TextWithCRLF
232 : sys::fs::OF_None);
233 if (EC)
234 exitWithErrorCode(EC, Whence: OutputFilename);
235
236 if (OutputFormat == CGDataFormat::Text) {
237 if (Error E = Writer.writeText(OS))
238 exitWithError(E: std::move(E));
239 } else {
240 if (Error E = Writer.write(OS))
241 exitWithError(E: std::move(E));
242 }
243
244 return 0;
245}
246
247static int show_main(int argc, const char *argv[]) {
248 std::error_code EC;
249 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
250 if (EC)
251 exitWithErrorCode(EC, Whence: OutputFilename);
252
253 auto FS = vfs::getRealFileSystem();
254 auto ReaderOrErr = CodeGenDataReader::create(Path: Filename, FS&: *FS);
255 if (Error E = ReaderOrErr.takeError())
256 exitWithError(E: std::move(E), Whence: Filename);
257
258 auto Reader = ReaderOrErr->get();
259 if (ShowCGDataVersion)
260 OS << "Version: " << Reader->getVersion() << "\n";
261
262 if (Reader->hasOutlinedHashTree()) {
263 auto Tree = Reader->releaseOutlinedHashTree();
264 OS << "Outlined hash tree:\n";
265 OS << " Total Node Count: " << Tree->size() << "\n";
266 OS << " Terminal Node Count: " << Tree->size(/*GetTerminalCountOnly=*/true)
267 << "\n";
268 OS << " Depth: " << Tree->depth() << "\n";
269 }
270 if (Reader->hasStableFunctionMap()) {
271 auto Map = Reader->releaseStableFunctionMap();
272 OS << "Stable function map:\n";
273 OS << " Unique hash Count: " << Map->size() << "\n";
274 OS << " Total function Count: "
275 << Map->size(Type: StableFunctionMap::TotalFunctionCount) << "\n";
276 OS << " Mergeable function Count: "
277 << Map->size(Type: StableFunctionMap::MergeableFunctionCount) << "\n";
278 }
279
280 return 0;
281}
282
283static void parseArgs(int argc, char **argv) {
284 CGDataOptTable Tbl;
285 ToolName = argv[0];
286 llvm::BumpPtrAllocator A;
287 llvm::StringSaver Saver{A};
288 llvm::opt::InputArgList Args =
289 Tbl.parseArgs(Argc: argc, Argv: argv, Unknown: OPT_UNKNOWN, Saver, ErrorFn: [&](StringRef Msg) {
290 llvm::errs() << Msg << '\n';
291 std::exit(status: 1);
292 });
293
294 if (Args.hasArg(Ids: OPT_help)) {
295 Tbl.printHelp(
296 OS&: llvm::outs(),
297 Usage: "llvm-cgdata <action> [options] (<binary files>|<.cgdata file>)",
298 Title: ToolName.str().c_str());
299 std::exit(status: 0);
300 }
301 if (Args.hasArg(Ids: OPT_version)) {
302 cl::PrintVersionMessage();
303 std::exit(status: 0);
304 }
305
306 ShowCGDataVersion = Args.hasArg(Ids: OPT_cgdata_version);
307 SkipTrim = Args.hasArg(Ids: OPT_skip_trim);
308
309 if (opt::Arg *A = Args.getLastArg(Ids: OPT_format)) {
310 StringRef OF = A->getValue();
311 OutputFormat = StringSwitch<CGDataFormat>(OF)
312 .Case(S: "text", Value: CGDataFormat::Text)
313 .Case(S: "binary", Value: CGDataFormat::Binary)
314 .Default(Value: CGDataFormat::Invalid);
315 if (OutputFormat == CGDataFormat::Invalid)
316 exitWithError(Message: "unsupported format '" + OF + "'");
317 }
318
319 InputFilenames = Args.getAllArgValues(Id: OPT_INPUT);
320 if (InputFilenames.empty())
321 exitWithError(Message: "No input file is specified.");
322 Filename = InputFilenames[0];
323
324 if (Args.hasArg(Ids: OPT_output)) {
325 OutputFilename = Args.getLastArgValue(Id: OPT_output);
326 for (auto &Filename : InputFilenames)
327 if (Filename == OutputFilename)
328 exitWithError(
329 Message: "Input file name cannot be the same as the output file name!\n");
330 }
331
332 opt::Arg *ActionArg = nullptr;
333 for (opt::Arg *Arg : Args.filtered(Ids: OPT_action_group)) {
334 if (ActionArg)
335 exitWithError(Message: "Only one action is allowed.");
336 ActionArg = Arg;
337 }
338 if (!ActionArg)
339 exitWithError(Message: "One action is required.");
340
341 switch (ActionArg->getOption().getID()) {
342 case OPT_show:
343 if (InputFilenames.size() != 1)
344 exitWithError(Message: "only one input file is allowed.");
345 Action = CGDataAction::Show;
346 break;
347 case OPT_convert:
348 // The default output format is text for convert.
349 if (!OutputFormat)
350 OutputFormat = CGDataFormat::Text;
351 if (InputFilenames.size() != 1)
352 exitWithError(Message: "only one input file is allowed.");
353 Action = CGDataAction::Convert;
354 break;
355 case OPT_merge:
356 // The default output format is binary for merge.
357 if (!OutputFormat)
358 OutputFormat = CGDataFormat::Binary;
359 Action = CGDataAction::Merge;
360 break;
361 default:
362 llvm_unreachable("unrecognized action");
363 }
364}
365
366int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {
367 const char **argv = const_cast<const char **>(argvNonConst);
368 parseArgs(argc, argv: argvNonConst);
369
370 switch (Action) {
371 case CGDataAction::Convert:
372 return convert_main(argc, argv);
373 case CGDataAction::Merge:
374 return merge_main(argc, argv);
375 case CGDataAction::Show:
376 return show_main(argc, argv);
377 }
378
379 llvm_unreachable("unrecognized action");
380}
381