1//===-- CodeGenData.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for codegen data that has stable summary which
10// can be used to optimize the code in the subsequent codegen.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Bitcode/BitcodeWriter.h"
15#include "llvm/CGData/CodeGenDataReader.h"
16#include "llvm/CGData/OutlinedHashTreeRecord.h"
17#include "llvm/CGData/StableFunctionMapRecord.h"
18#include "llvm/Object/ObjectFile.h"
19#include "llvm/Support/Caching.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/WithColor.h"
22
23#define DEBUG_TYPE "cg-data"
24
25using namespace llvm;
26using namespace cgdata;
27
28static cl::opt<bool>
29 CodeGenDataGenerate("codegen-data-generate", cl::init(Val: false), cl::Hidden,
30 cl::desc("Emit CodeGen Data into custom sections"));
31static cl::opt<std::string>
32 CodeGenDataUsePath("codegen-data-use-path", cl::init(Val: ""), cl::Hidden,
33 cl::desc("File path to where .cgdata file is read"));
34
35namespace llvm {
36cl::opt<bool> CodeGenDataThinLTOTwoRounds(
37 "codegen-data-thinlto-two-rounds", cl::init(Val: false), cl::Hidden,
38 cl::desc("Enable two-round ThinLTO code generation. The first round "
39 "emits codegen data, while the second round uses the emitted "
40 "codegen data for further optimizations."));
41} // end namespace llvm
42
43static std::string getCGDataErrString(cgdata_error Err,
44 const std::string &ErrMsg = "") {
45 std::string Msg;
46 raw_string_ostream OS(Msg);
47
48 switch (Err) {
49 case cgdata_error::success:
50 OS << "success";
51 break;
52 case cgdata_error::eof:
53 OS << "end of File";
54 break;
55 case cgdata_error::bad_magic:
56 OS << "invalid codegen data (bad magic)";
57 break;
58 case cgdata_error::bad_header:
59 OS << "invalid codegen data (file header is corrupt)";
60 break;
61 case cgdata_error::empty_cgdata:
62 OS << "empty codegen data";
63 break;
64 case cgdata_error::malformed:
65 OS << "malformed codegen data";
66 break;
67 case cgdata_error::unsupported_version:
68 OS << "unsupported codegen data version";
69 break;
70 }
71
72 // If optional error message is not empty, append it to the message.
73 if (!ErrMsg.empty())
74 OS << ": " << ErrMsg;
75
76 return OS.str();
77}
78
79namespace {
80
81// FIXME: This class is only here to support the transition to llvm::Error. It
82// will be removed once this transition is complete. Clients should prefer to
83// deal with the Error value directly, rather than converting to error_code.
84class CGDataErrorCategoryType : public std::error_category {
85 const char *name() const noexcept override { return "llvm.cgdata"; }
86
87 std::string message(int IE) const override {
88 return getCGDataErrString(Err: static_cast<cgdata_error>(IE));
89 }
90};
91
92} // end anonymous namespace
93
94const std::error_category &llvm::cgdata_category() {
95 static CGDataErrorCategoryType ErrorCategory;
96 return ErrorCategory;
97}
98
99std::string CGDataError::message() const {
100 return getCGDataErrString(Err, ErrMsg: Msg);
101}
102
103char CGDataError::ID = 0;
104
105namespace {
106
107const char *CodeGenDataSectNameCommon[] = {
108#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
109 SectNameCommon,
110#include "llvm/CGData/CodeGenData.inc"
111};
112
113const char *CodeGenDataSectNameCoff[] = {
114#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
115 SectNameCoff,
116#include "llvm/CGData/CodeGenData.inc"
117};
118
119const char *CodeGenDataSectNamePrefix[] = {
120#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix,
121#include "llvm/CGData/CodeGenData.inc"
122};
123
124} // namespace
125
126namespace llvm {
127
128std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
129 Triple::ObjectFormatType OF,
130 bool AddSegmentInfo) {
131 std::string SectName;
132
133 if (OF == Triple::MachO && AddSegmentInfo)
134 SectName = CodeGenDataSectNamePrefix[CGSK];
135
136 if (OF == Triple::COFF)
137 SectName += CodeGenDataSectNameCoff[CGSK];
138 else
139 SectName += CodeGenDataSectNameCommon[CGSK];
140
141 return SectName;
142}
143
144std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr;
145std::once_flag CodeGenData::OnceFlag;
146
147CodeGenData &CodeGenData::getInstance() {
148 std::call_once(once&: CodeGenData::OnceFlag, f: []() {
149 Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
150
151 if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
152 Instance->EmitCGData = true;
153 else if (!CodeGenDataUsePath.empty()) {
154 // Initialize the global CGData if the input file name is given.
155 // We do not error-out when failing to parse the input file.
156 // Instead, just emit an warning message and fall back as if no CGData
157 // were available.
158 auto FS = vfs::getRealFileSystem();
159 auto ReaderOrErr = CodeGenDataReader::create(Path: CodeGenDataUsePath, FS&: *FS);
160 if (Error E = ReaderOrErr.takeError()) {
161 warn(E: std::move(E), Whence: CodeGenDataUsePath);
162 return;
163 }
164 // Publish each CGData based on the data type in the header.
165 auto Reader = ReaderOrErr->get();
166 if (Reader->hasOutlinedHashTree())
167 Instance->publishOutlinedHashTree(HashTree: Reader->releaseOutlinedHashTree());
168 if (Reader->hasStableFunctionMap())
169 Instance->publishStableFunctionMap(FunctionMap: Reader->releaseStableFunctionMap());
170 }
171 });
172 return *Instance;
173}
174
175namespace IndexedCGData {
176
177Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
178 using namespace support;
179
180 static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>,
181 "The header should be standard layout type since we use offset "
182 "of fields to read.");
183 Header H;
184 H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr);
185 if (H.Magic != IndexedCGData::Magic)
186 return make_error<CGDataError>(Args: cgdata_error::bad_magic);
187 H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(memory&: Curr);
188 if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion)
189 return make_error<CGDataError>(Args: cgdata_error::unsupported_version);
190 H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(memory&: Curr);
191
192 static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4,
193 "Please update the offset computation below if a new field has "
194 "been added to the header.");
195 H.OutlinedHashTreeOffset =
196 endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr);
197 if (H.Version >= 2)
198 H.StableFunctionMapOffset =
199 endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr);
200
201 return H;
202}
203
204} // end namespace IndexedCGData
205
206namespace cgdata {
207
208void warn(Twine Message, StringRef Whence, StringRef Hint) {
209 WithColor::warning();
210 if (!Whence.empty())
211 errs() << Whence << ": ";
212 errs() << Message << "\n";
213 if (!Hint.empty())
214 WithColor::note() << Hint << "\n";
215}
216
217void warn(Error E, StringRef Whence) {
218 if (E.isA<CGDataError>()) {
219 handleAllErrors(E: std::move(E), Handlers: [&](const CGDataError &IPE) {
220 warn(Message: IPE.message(), Whence, Hint: "");
221 });
222 }
223}
224
225void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
226 AddStreamFn AddStream) {
227 LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
228 << " in Task " << Task << "\n");
229 Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
230 AddStream(Task, TheModule.getModuleIdentifier());
231 if (Error Err = StreamOrErr.takeError())
232 report_fatal_error(Err: std::move(Err));
233 std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
234
235 WriteBitcodeToFile(M: TheModule, Out&: *Stream->OS,
236 /*ShouldPreserveUseListOrder=*/true);
237
238 if (Error Err = Stream->commit())
239 report_fatal_error(Err: std::move(Err));
240}
241
242std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
243 unsigned Task,
244 LLVMContext &Context,
245 ArrayRef<StringRef> IRFiles) {
246 LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
247 << " in Task " << Task << "\n");
248 auto FileBuffer = MemoryBuffer::getMemBuffer(
249 InputData: IRFiles[Task], BufferName: "in-memory IR file", /*RequiresNullTerminator=*/false);
250 auto RestoredModule = parseBitcodeFile(Buffer: *FileBuffer, Context);
251 if (!RestoredModule)
252 report_fatal_error(
253 reason: Twine("Failed to parse optimized bitcode loaded for Task: ") +
254 Twine(Task) + "\n");
255
256 // Restore the original module identifier.
257 (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
258 return std::move(*RestoredModule);
259}
260
261Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
262 OutlinedHashTreeRecord GlobalOutlineRecord;
263 StableFunctionMapRecord GlobalStableFunctionMapRecord;
264 stable_hash CombinedHash = 0;
265 for (auto File : ObjFiles) {
266 if (File.empty())
267 continue;
268 std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
269 InputData: File, BufferName: "in-memory object file", /*RequiresNullTerminator=*/false);
270 Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
271 object::ObjectFile::createObjectFile(Object: Buffer->getMemBufferRef());
272 if (!BinOrErr)
273 return BinOrErr.takeError();
274
275 std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
276 if (auto E = CodeGenDataReader::mergeFromObjectFile(
277 Obj: Obj.get(), GlobalOutlineRecord, GlobalFunctionMapRecord&: GlobalStableFunctionMapRecord,
278 CombinedHash: &CombinedHash))
279 return E;
280 }
281
282 GlobalStableFunctionMapRecord.finalize();
283
284 if (!GlobalOutlineRecord.empty())
285 cgdata::publishOutlinedHashTree(HashTree: std::move(GlobalOutlineRecord.HashTree));
286 if (!GlobalStableFunctionMapRecord.empty())
287 cgdata::publishStableFunctionMap(
288 FunctionMap: std::move(GlobalStableFunctionMapRecord.FunctionMap));
289
290 return CombinedHash;
291}
292
293} // end namespace cgdata
294
295} // end namespace llvm
296