| 1 | //===-- CodeGenData.cpp ---------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains support for codegen data that has stable summary which |
| 10 | // can be used to optimize the code in the subsequent codegen. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Bitcode/BitcodeWriter.h" |
| 15 | #include "llvm/CGData/CodeGenDataReader.h" |
| 16 | #include "llvm/CGData/OutlinedHashTreeRecord.h" |
| 17 | #include "llvm/CGData/StableFunctionMapRecord.h" |
| 18 | #include "llvm/Object/ObjectFile.h" |
| 19 | #include "llvm/Support/Caching.h" |
| 20 | #include "llvm/Support/CommandLine.h" |
| 21 | #include "llvm/Support/WithColor.h" |
| 22 | |
| 23 | #define DEBUG_TYPE "cg-data" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace cgdata; |
| 27 | |
| 28 | static cl::opt<bool> |
| 29 | CodeGenDataGenerate("codegen-data-generate" , cl::init(Val: false), cl::Hidden, |
| 30 | cl::desc("Emit CodeGen Data into custom sections" )); |
| 31 | static cl::opt<std::string> |
| 32 | CodeGenDataUsePath("codegen-data-use-path" , cl::init(Val: "" ), cl::Hidden, |
| 33 | cl::desc("File path to where .cgdata file is read" )); |
| 34 | cl::opt<bool> CodeGenDataThinLTOTwoRounds( |
| 35 | "codegen-data-thinlto-two-rounds" , cl::init(Val: false), cl::Hidden, |
| 36 | cl::desc("Enable two-round ThinLTO code generation. The first round " |
| 37 | "emits codegen data, while the second round uses the emitted " |
| 38 | "codegen data for further optimizations." )); |
| 39 | |
| 40 | static std::string getCGDataErrString(cgdata_error Err, |
| 41 | const std::string &ErrMsg = "" ) { |
| 42 | std::string Msg; |
| 43 | raw_string_ostream OS(Msg); |
| 44 | |
| 45 | switch (Err) { |
| 46 | case cgdata_error::success: |
| 47 | OS << "success" ; |
| 48 | break; |
| 49 | case cgdata_error::eof: |
| 50 | OS << "end of File" ; |
| 51 | break; |
| 52 | case cgdata_error::bad_magic: |
| 53 | OS << "invalid codegen data (bad magic)" ; |
| 54 | break; |
| 55 | case cgdata_error::bad_header: |
| 56 | OS << "invalid codegen data (file header is corrupt)" ; |
| 57 | break; |
| 58 | case cgdata_error::empty_cgdata: |
| 59 | OS << "empty codegen data" ; |
| 60 | break; |
| 61 | case cgdata_error::malformed: |
| 62 | OS << "malformed codegen data" ; |
| 63 | break; |
| 64 | case cgdata_error::unsupported_version: |
| 65 | OS << "unsupported codegen data version" ; |
| 66 | break; |
| 67 | } |
| 68 | |
| 69 | // If optional error message is not empty, append it to the message. |
| 70 | if (!ErrMsg.empty()) |
| 71 | OS << ": " << ErrMsg; |
| 72 | |
| 73 | return OS.str(); |
| 74 | } |
| 75 | |
| 76 | namespace { |
| 77 | |
| 78 | // FIXME: This class is only here to support the transition to llvm::Error. It |
| 79 | // will be removed once this transition is complete. Clients should prefer to |
| 80 | // deal with the Error value directly, rather than converting to error_code. |
| 81 | class CGDataErrorCategoryType : public std::error_category { |
| 82 | const char *name() const noexcept override { return "llvm.cgdata" ; } |
| 83 | |
| 84 | std::string message(int IE) const override { |
| 85 | return getCGDataErrString(Err: static_cast<cgdata_error>(IE)); |
| 86 | } |
| 87 | }; |
| 88 | |
| 89 | } // end anonymous namespace |
| 90 | |
| 91 | const std::error_category &llvm::cgdata_category() { |
| 92 | static CGDataErrorCategoryType ErrorCategory; |
| 93 | return ErrorCategory; |
| 94 | } |
| 95 | |
| 96 | std::string CGDataError::message() const { |
| 97 | return getCGDataErrString(Err, ErrMsg: Msg); |
| 98 | } |
| 99 | |
| 100 | char CGDataError::ID = 0; |
| 101 | |
| 102 | namespace { |
| 103 | |
| 104 | const char *CodeGenDataSectNameCommon[] = { |
| 105 | #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ |
| 106 | SectNameCommon, |
| 107 | #include "llvm/CGData/CodeGenData.inc" |
| 108 | }; |
| 109 | |
| 110 | const char *CodeGenDataSectNameCoff[] = { |
| 111 | #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ |
| 112 | SectNameCoff, |
| 113 | #include "llvm/CGData/CodeGenData.inc" |
| 114 | }; |
| 115 | |
| 116 | const char *CodeGenDataSectNamePrefix[] = { |
| 117 | #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, |
| 118 | #include "llvm/CGData/CodeGenData.inc" |
| 119 | }; |
| 120 | |
| 121 | } // namespace |
| 122 | |
| 123 | namespace llvm { |
| 124 | |
| 125 | std::string getCodeGenDataSectionName(CGDataSectKind CGSK, |
| 126 | Triple::ObjectFormatType OF, |
| 127 | bool AddSegmentInfo) { |
| 128 | std::string SectName; |
| 129 | |
| 130 | if (OF == Triple::MachO && AddSegmentInfo) |
| 131 | SectName = CodeGenDataSectNamePrefix[CGSK]; |
| 132 | |
| 133 | if (OF == Triple::COFF) |
| 134 | SectName += CodeGenDataSectNameCoff[CGSK]; |
| 135 | else |
| 136 | SectName += CodeGenDataSectNameCommon[CGSK]; |
| 137 | |
| 138 | return SectName; |
| 139 | } |
| 140 | |
| 141 | std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr; |
| 142 | std::once_flag CodeGenData::OnceFlag; |
| 143 | |
| 144 | CodeGenData &CodeGenData::getInstance() { |
| 145 | std::call_once(once&: CodeGenData::OnceFlag, f: []() { |
| 146 | Instance = std::unique_ptr<CodeGenData>(new CodeGenData()); |
| 147 | |
| 148 | if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) |
| 149 | Instance->EmitCGData = true; |
| 150 | else if (!CodeGenDataUsePath.empty()) { |
| 151 | // Initialize the global CGData if the input file name is given. |
| 152 | // We do not error-out when failing to parse the input file. |
| 153 | // Instead, just emit an warning message and fall back as if no CGData |
| 154 | // were available. |
| 155 | auto FS = vfs::getRealFileSystem(); |
| 156 | auto ReaderOrErr = CodeGenDataReader::create(Path: CodeGenDataUsePath, FS&: *FS); |
| 157 | if (Error E = ReaderOrErr.takeError()) { |
| 158 | warn(E: std::move(E), Whence: CodeGenDataUsePath); |
| 159 | return; |
| 160 | } |
| 161 | // Publish each CGData based on the data type in the header. |
| 162 | auto Reader = ReaderOrErr->get(); |
| 163 | if (Reader->hasOutlinedHashTree()) |
| 164 | Instance->publishOutlinedHashTree(HashTree: Reader->releaseOutlinedHashTree()); |
| 165 | if (Reader->hasStableFunctionMap()) |
| 166 | Instance->publishStableFunctionMap(FunctionMap: Reader->releaseStableFunctionMap()); |
| 167 | } |
| 168 | }); |
| 169 | return *Instance; |
| 170 | } |
| 171 | |
| 172 | namespace IndexedCGData { |
| 173 | |
| 174 | Expected<Header> Header::(const unsigned char *Curr) { |
| 175 | using namespace support; |
| 176 | |
| 177 | static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>, |
| 178 | "The header should be standard layout type since we use offset " |
| 179 | "of fields to read." ); |
| 180 | Header H; |
| 181 | H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr); |
| 182 | if (H.Magic != IndexedCGData::Magic) |
| 183 | return make_error<CGDataError>(Args: cgdata_error::bad_magic); |
| 184 | H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(memory&: Curr); |
| 185 | if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) |
| 186 | return make_error<CGDataError>(Args: cgdata_error::unsupported_version); |
| 187 | H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(memory&: Curr); |
| 188 | |
| 189 | static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3, |
| 190 | "Please update the offset computation below if a new field has " |
| 191 | "been added to the header." ); |
| 192 | H.OutlinedHashTreeOffset = |
| 193 | endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr); |
| 194 | if (H.Version >= 2) |
| 195 | H.StableFunctionMapOffset = |
| 196 | endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr); |
| 197 | |
| 198 | return H; |
| 199 | } |
| 200 | |
| 201 | } // end namespace IndexedCGData |
| 202 | |
| 203 | namespace cgdata { |
| 204 | |
| 205 | void warn(Twine Message, StringRef Whence, StringRef Hint) { |
| 206 | WithColor::warning(); |
| 207 | if (!Whence.empty()) |
| 208 | errs() << Whence << ": " ; |
| 209 | errs() << Message << "\n" ; |
| 210 | if (!Hint.empty()) |
| 211 | WithColor::note() << Hint << "\n" ; |
| 212 | } |
| 213 | |
| 214 | void warn(Error E, StringRef Whence) { |
| 215 | if (E.isA<CGDataError>()) { |
| 216 | handleAllErrors(E: std::move(E), Handlers: [&](const CGDataError &IPE) { |
| 217 | warn(Message: IPE.message(), Whence, Hint: "" ); |
| 218 | }); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | void saveModuleForTwoRounds(const Module &TheModule, unsigned Task, |
| 223 | AddStreamFn AddStream) { |
| 224 | LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier() |
| 225 | << " in Task " << Task << "\n" ); |
| 226 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
| 227 | AddStream(Task, TheModule.getModuleIdentifier()); |
| 228 | if (Error Err = StreamOrErr.takeError()) |
| 229 | report_fatal_error(Err: std::move(Err)); |
| 230 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
| 231 | |
| 232 | WriteBitcodeToFile(M: TheModule, Out&: *Stream->OS, |
| 233 | /*ShouldPreserveUseListOrder=*/true); |
| 234 | |
| 235 | if (Error Err = Stream->commit()) |
| 236 | report_fatal_error(Err: std::move(Err)); |
| 237 | } |
| 238 | |
| 239 | std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule, |
| 240 | unsigned Task, |
| 241 | LLVMContext &Context, |
| 242 | ArrayRef<StringRef> IRFiles) { |
| 243 | LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier() |
| 244 | << " in Task " << Task << "\n" ); |
| 245 | auto FileBuffer = MemoryBuffer::getMemBuffer( |
| 246 | InputData: IRFiles[Task], BufferName: "in-memory IR file" , /*RequiresNullTerminator=*/false); |
| 247 | auto RestoredModule = parseBitcodeFile(Buffer: *FileBuffer, Context); |
| 248 | if (!RestoredModule) |
| 249 | report_fatal_error( |
| 250 | reason: Twine("Failed to parse optimized bitcode loaded for Task: " ) + |
| 251 | Twine(Task) + "\n" ); |
| 252 | |
| 253 | // Restore the original module identifier. |
| 254 | (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier()); |
| 255 | return std::move(*RestoredModule); |
| 256 | } |
| 257 | |
| 258 | Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { |
| 259 | OutlinedHashTreeRecord GlobalOutlineRecord; |
| 260 | StableFunctionMapRecord GlobalStableFunctionMapRecord; |
| 261 | stable_hash CombinedHash = 0; |
| 262 | for (auto File : ObjFiles) { |
| 263 | if (File.empty()) |
| 264 | continue; |
| 265 | std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer( |
| 266 | InputData: File, BufferName: "in-memory object file" , /*RequiresNullTerminator=*/false); |
| 267 | Expected<std::unique_ptr<object::ObjectFile>> BinOrErr = |
| 268 | object::ObjectFile::createObjectFile(Object: Buffer->getMemBufferRef()); |
| 269 | if (!BinOrErr) |
| 270 | return BinOrErr.takeError(); |
| 271 | |
| 272 | std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get(); |
| 273 | if (auto E = CodeGenDataReader::mergeFromObjectFile( |
| 274 | Obj: Obj.get(), GlobalOutlineRecord, GlobalFunctionMapRecord&: GlobalStableFunctionMapRecord, |
| 275 | CombinedHash: &CombinedHash)) |
| 276 | return E; |
| 277 | } |
| 278 | |
| 279 | GlobalStableFunctionMapRecord.finalize(); |
| 280 | |
| 281 | if (!GlobalOutlineRecord.empty()) |
| 282 | cgdata::publishOutlinedHashTree(HashTree: std::move(GlobalOutlineRecord.HashTree)); |
| 283 | if (!GlobalStableFunctionMapRecord.empty()) |
| 284 | cgdata::publishStableFunctionMap( |
| 285 | FunctionMap: std::move(GlobalStableFunctionMapRecord.FunctionMap)); |
| 286 | |
| 287 | return CombinedHash; |
| 288 | } |
| 289 | |
| 290 | } // end namespace cgdata |
| 291 | |
| 292 | } // end namespace llvm |
| 293 | |