| 1 | //===-- CodeGenData.cpp ---------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains support for codegen data that has stable summary which |
| 10 | // can be used to optimize the code in the subsequent codegen. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Bitcode/BitcodeWriter.h" |
| 15 | #include "llvm/CGData/CodeGenDataReader.h" |
| 16 | #include "llvm/CGData/OutlinedHashTreeRecord.h" |
| 17 | #include "llvm/CGData/StableFunctionMapRecord.h" |
| 18 | #include "llvm/Object/ObjectFile.h" |
| 19 | #include "llvm/Support/Caching.h" |
| 20 | #include "llvm/Support/CommandLine.h" |
| 21 | #include "llvm/Support/WithColor.h" |
| 22 | |
| 23 | #define DEBUG_TYPE "cg-data" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace cgdata; |
| 27 | |
| 28 | static cl::opt<bool> |
| 29 | CodeGenDataGenerate("codegen-data-generate" , cl::init(Val: false), cl::Hidden, |
| 30 | cl::desc("Emit CodeGen Data into custom sections" )); |
| 31 | static cl::opt<std::string> |
| 32 | CodeGenDataUsePath("codegen-data-use-path" , cl::init(Val: "" ), cl::Hidden, |
| 33 | cl::desc("File path to where .cgdata file is read" )); |
| 34 | |
| 35 | namespace llvm { |
| 36 | cl::opt<bool> CodeGenDataThinLTOTwoRounds( |
| 37 | "codegen-data-thinlto-two-rounds" , cl::init(Val: false), cl::Hidden, |
| 38 | cl::desc("Enable two-round ThinLTO code generation. The first round " |
| 39 | "emits codegen data, while the second round uses the emitted " |
| 40 | "codegen data for further optimizations." )); |
| 41 | } // end namespace llvm |
| 42 | |
| 43 | static std::string getCGDataErrString(cgdata_error Err, |
| 44 | const std::string &ErrMsg = "" ) { |
| 45 | std::string Msg; |
| 46 | raw_string_ostream OS(Msg); |
| 47 | |
| 48 | switch (Err) { |
| 49 | case cgdata_error::success: |
| 50 | OS << "success" ; |
| 51 | break; |
| 52 | case cgdata_error::eof: |
| 53 | OS << "end of File" ; |
| 54 | break; |
| 55 | case cgdata_error::bad_magic: |
| 56 | OS << "invalid codegen data (bad magic)" ; |
| 57 | break; |
| 58 | case cgdata_error::bad_header: |
| 59 | OS << "invalid codegen data (file header is corrupt)" ; |
| 60 | break; |
| 61 | case cgdata_error::empty_cgdata: |
| 62 | OS << "empty codegen data" ; |
| 63 | break; |
| 64 | case cgdata_error::malformed: |
| 65 | OS << "malformed codegen data" ; |
| 66 | break; |
| 67 | case cgdata_error::unsupported_version: |
| 68 | OS << "unsupported codegen data version" ; |
| 69 | break; |
| 70 | } |
| 71 | |
| 72 | // If optional error message is not empty, append it to the message. |
| 73 | if (!ErrMsg.empty()) |
| 74 | OS << ": " << ErrMsg; |
| 75 | |
| 76 | return OS.str(); |
| 77 | } |
| 78 | |
| 79 | namespace { |
| 80 | |
| 81 | // FIXME: This class is only here to support the transition to llvm::Error. It |
| 82 | // will be removed once this transition is complete. Clients should prefer to |
| 83 | // deal with the Error value directly, rather than converting to error_code. |
| 84 | class CGDataErrorCategoryType : public std::error_category { |
| 85 | const char *name() const noexcept override { return "llvm.cgdata" ; } |
| 86 | |
| 87 | std::string message(int IE) const override { |
| 88 | return getCGDataErrString(Err: static_cast<cgdata_error>(IE)); |
| 89 | } |
| 90 | }; |
| 91 | |
| 92 | } // end anonymous namespace |
| 93 | |
| 94 | const std::error_category &llvm::cgdata_category() { |
| 95 | static CGDataErrorCategoryType ErrorCategory; |
| 96 | return ErrorCategory; |
| 97 | } |
| 98 | |
| 99 | std::string CGDataError::message() const { |
| 100 | return getCGDataErrString(Err, ErrMsg: Msg); |
| 101 | } |
| 102 | |
| 103 | char CGDataError::ID = 0; |
| 104 | |
| 105 | namespace { |
| 106 | |
| 107 | const char *CodeGenDataSectNameCommon[] = { |
| 108 | #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ |
| 109 | SectNameCommon, |
| 110 | #include "llvm/CGData/CodeGenData.inc" |
| 111 | }; |
| 112 | |
| 113 | const char *CodeGenDataSectNameCoff[] = { |
| 114 | #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ |
| 115 | SectNameCoff, |
| 116 | #include "llvm/CGData/CodeGenData.inc" |
| 117 | }; |
| 118 | |
| 119 | const char *CodeGenDataSectNamePrefix[] = { |
| 120 | #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, |
| 121 | #include "llvm/CGData/CodeGenData.inc" |
| 122 | }; |
| 123 | |
| 124 | } // namespace |
| 125 | |
| 126 | namespace llvm { |
| 127 | |
| 128 | std::string getCodeGenDataSectionName(CGDataSectKind CGSK, |
| 129 | Triple::ObjectFormatType OF, |
| 130 | bool AddSegmentInfo) { |
| 131 | std::string SectName; |
| 132 | |
| 133 | if (OF == Triple::MachO && AddSegmentInfo) |
| 134 | SectName = CodeGenDataSectNamePrefix[CGSK]; |
| 135 | |
| 136 | if (OF == Triple::COFF) |
| 137 | SectName += CodeGenDataSectNameCoff[CGSK]; |
| 138 | else |
| 139 | SectName += CodeGenDataSectNameCommon[CGSK]; |
| 140 | |
| 141 | return SectName; |
| 142 | } |
| 143 | |
| 144 | std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr; |
| 145 | std::once_flag CodeGenData::OnceFlag; |
| 146 | |
| 147 | CodeGenData &CodeGenData::getInstance() { |
| 148 | std::call_once(once&: CodeGenData::OnceFlag, f: []() { |
| 149 | Instance = std::unique_ptr<CodeGenData>(new CodeGenData()); |
| 150 | |
| 151 | if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) |
| 152 | Instance->EmitCGData = true; |
| 153 | else if (!CodeGenDataUsePath.empty()) { |
| 154 | // Initialize the global CGData if the input file name is given. |
| 155 | // We do not error-out when failing to parse the input file. |
| 156 | // Instead, just emit an warning message and fall back as if no CGData |
| 157 | // were available. |
| 158 | auto FS = vfs::getRealFileSystem(); |
| 159 | auto ReaderOrErr = CodeGenDataReader::create(Path: CodeGenDataUsePath, FS&: *FS); |
| 160 | if (Error E = ReaderOrErr.takeError()) { |
| 161 | warn(E: std::move(E), Whence: CodeGenDataUsePath); |
| 162 | return; |
| 163 | } |
| 164 | // Publish each CGData based on the data type in the header. |
| 165 | auto Reader = ReaderOrErr->get(); |
| 166 | if (Reader->hasOutlinedHashTree()) |
| 167 | Instance->publishOutlinedHashTree(HashTree: Reader->releaseOutlinedHashTree()); |
| 168 | if (Reader->hasStableFunctionMap()) |
| 169 | Instance->publishStableFunctionMap(FunctionMap: Reader->releaseStableFunctionMap()); |
| 170 | } |
| 171 | }); |
| 172 | return *Instance; |
| 173 | } |
| 174 | |
| 175 | namespace IndexedCGData { |
| 176 | |
| 177 | Expected<Header> Header::(const unsigned char *Curr) { |
| 178 | using namespace support; |
| 179 | |
| 180 | static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>, |
| 181 | "The header should be standard layout type since we use offset " |
| 182 | "of fields to read." ); |
| 183 | Header H; |
| 184 | H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr); |
| 185 | if (H.Magic != IndexedCGData::Magic) |
| 186 | return make_error<CGDataError>(Args: cgdata_error::bad_magic); |
| 187 | H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(memory&: Curr); |
| 188 | if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) |
| 189 | return make_error<CGDataError>(Args: cgdata_error::unsupported_version); |
| 190 | H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(memory&: Curr); |
| 191 | |
| 192 | static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4, |
| 193 | "Please update the offset computation below if a new field has " |
| 194 | "been added to the header." ); |
| 195 | H.OutlinedHashTreeOffset = |
| 196 | endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr); |
| 197 | if (H.Version >= 2) |
| 198 | H.StableFunctionMapOffset = |
| 199 | endian::readNext<uint64_t, endianness::little, unaligned>(memory&: Curr); |
| 200 | |
| 201 | return H; |
| 202 | } |
| 203 | |
| 204 | } // end namespace IndexedCGData |
| 205 | |
| 206 | namespace cgdata { |
| 207 | |
| 208 | void warn(Twine Message, StringRef Whence, StringRef Hint) { |
| 209 | WithColor::warning(); |
| 210 | if (!Whence.empty()) |
| 211 | errs() << Whence << ": " ; |
| 212 | errs() << Message << "\n" ; |
| 213 | if (!Hint.empty()) |
| 214 | WithColor::note() << Hint << "\n" ; |
| 215 | } |
| 216 | |
| 217 | void warn(Error E, StringRef Whence) { |
| 218 | if (E.isA<CGDataError>()) { |
| 219 | handleAllErrors(E: std::move(E), Handlers: [&](const CGDataError &IPE) { |
| 220 | warn(Message: IPE.message(), Whence, Hint: "" ); |
| 221 | }); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | void saveModuleForTwoRounds(const Module &TheModule, unsigned Task, |
| 226 | AddStreamFn AddStream) { |
| 227 | LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier() |
| 228 | << " in Task " << Task << "\n" ); |
| 229 | Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = |
| 230 | AddStream(Task, TheModule.getModuleIdentifier()); |
| 231 | if (Error Err = StreamOrErr.takeError()) |
| 232 | report_fatal_error(Err: std::move(Err)); |
| 233 | std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; |
| 234 | |
| 235 | WriteBitcodeToFile(M: TheModule, Out&: *Stream->OS, |
| 236 | /*ShouldPreserveUseListOrder=*/true); |
| 237 | |
| 238 | if (Error Err = Stream->commit()) |
| 239 | report_fatal_error(Err: std::move(Err)); |
| 240 | } |
| 241 | |
| 242 | std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule, |
| 243 | unsigned Task, |
| 244 | LLVMContext &Context, |
| 245 | ArrayRef<StringRef> IRFiles) { |
| 246 | LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier() |
| 247 | << " in Task " << Task << "\n" ); |
| 248 | auto FileBuffer = MemoryBuffer::getMemBuffer( |
| 249 | InputData: IRFiles[Task], BufferName: "in-memory IR file" , /*RequiresNullTerminator=*/false); |
| 250 | auto RestoredModule = parseBitcodeFile(Buffer: *FileBuffer, Context); |
| 251 | if (!RestoredModule) |
| 252 | report_fatal_error( |
| 253 | reason: Twine("Failed to parse optimized bitcode loaded for Task: " ) + |
| 254 | Twine(Task) + "\n" ); |
| 255 | |
| 256 | // Restore the original module identifier. |
| 257 | (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier()); |
| 258 | return std::move(*RestoredModule); |
| 259 | } |
| 260 | |
| 261 | Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { |
| 262 | OutlinedHashTreeRecord GlobalOutlineRecord; |
| 263 | StableFunctionMapRecord GlobalStableFunctionMapRecord; |
| 264 | stable_hash CombinedHash = 0; |
| 265 | for (auto File : ObjFiles) { |
| 266 | if (File.empty()) |
| 267 | continue; |
| 268 | std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer( |
| 269 | InputData: File, BufferName: "in-memory object file" , /*RequiresNullTerminator=*/false); |
| 270 | Expected<std::unique_ptr<object::ObjectFile>> BinOrErr = |
| 271 | object::ObjectFile::createObjectFile(Object: Buffer->getMemBufferRef()); |
| 272 | if (!BinOrErr) |
| 273 | return BinOrErr.takeError(); |
| 274 | |
| 275 | std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get(); |
| 276 | if (auto E = CodeGenDataReader::mergeFromObjectFile( |
| 277 | Obj: Obj.get(), GlobalOutlineRecord, GlobalFunctionMapRecord&: GlobalStableFunctionMapRecord, |
| 278 | CombinedHash: &CombinedHash)) |
| 279 | return E; |
| 280 | } |
| 281 | |
| 282 | GlobalStableFunctionMapRecord.finalize(); |
| 283 | |
| 284 | if (!GlobalOutlineRecord.empty()) |
| 285 | cgdata::publishOutlinedHashTree(HashTree: std::move(GlobalOutlineRecord.HashTree)); |
| 286 | if (!GlobalStableFunctionMapRecord.empty()) |
| 287 | cgdata::publishStableFunctionMap( |
| 288 | FunctionMap: std::move(GlobalStableFunctionMapRecord.FunctionMap)); |
| 289 | |
| 290 | return CombinedHash; |
| 291 | } |
| 292 | |
| 293 | } // end namespace cgdata |
| 294 | |
| 295 | } // end namespace llvm |
| 296 | |