1//===- CodeGenDataReader.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading codegen data.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CGData/CodeGenDataReader.h"
14#include "llvm/CGData/OutlinedHashTreeRecord.h"
15#include "llvm/Object/ObjectFile.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/MemoryBuffer.h"
18
19#define DEBUG_TYPE "cg-data-reader"
20
21using namespace llvm;
22
23static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
24 "indexed-codegen-data-read-function-map-names", cl::init(Val: true), cl::Hidden,
25 cl::desc("Read function map names in indexed CodeGenData. Can be "
26 "disabled to save memory and time for final consumption of the "
27 "indexed CodeGenData in production."));
28
29namespace llvm {
30
31static Expected<std::unique_ptr<MemoryBuffer>>
32setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
33 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
34 : FS.getBufferForFile(Name: Filename);
35 if (std::error_code EC = BufferOrErr.getError())
36 return errorCodeToError(EC);
37 return std::move(BufferOrErr.get());
38}
39
40Error CodeGenDataReader::mergeFromObjectFile(
41 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
42 StableFunctionMapRecord &GlobalFunctionMapRecord,
43 stable_hash *CombinedHash) {
44 Triple TT = Obj->makeTriple();
45 auto CGOutlineName =
46 getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat(), AddSegmentInfo: false);
47 auto CGMergeName =
48 getCodeGenDataSectionName(CGSK: CG_merge, OF: TT.getObjectFormat(), AddSegmentInfo: false);
49
50 auto processSectionContents = [&](const StringRef &Name,
51 const StringRef &Contents) {
52 if (Name != CGOutlineName && Name != CGMergeName)
53 return;
54 if (CombinedHash)
55 *CombinedHash = stable_hash_combine(A: *CombinedHash, B: xxh3_64bits(data: Contents));
56 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
57 auto *EndData = Data + Contents.size();
58 // In case dealing with an executable that has concatenated cgdata,
59 // we want to merge them into a single cgdata.
60 // Although it's not a typical workflow, we support this scenario
61 // by looping over all data in the sections.
62 if (Name == CGOutlineName) {
63 while (Data != EndData) {
64 OutlinedHashTreeRecord LocalOutlineRecord;
65 LocalOutlineRecord.deserialize(Ptr&: Data);
66 GlobalOutlineRecord.merge(Other: LocalOutlineRecord);
67 }
68 } else if (Name == CGMergeName) {
69 while (Data != EndData) {
70 StableFunctionMapRecord LocalFunctionMapRecord;
71 LocalFunctionMapRecord.deserialize(Ptr&: Data);
72 GlobalFunctionMapRecord.merge(Other: LocalFunctionMapRecord);
73 }
74 }
75 };
76
77 for (auto &Section : Obj->sections()) {
78 Expected<StringRef> NameOrErr = Section.getName();
79 if (!NameOrErr)
80 return NameOrErr.takeError();
81 Expected<StringRef> ContentsOrErr = Section.getContents();
82 if (!ContentsOrErr)
83 return ContentsOrErr.takeError();
84 processSectionContents(*NameOrErr, *ContentsOrErr);
85 }
86
87 return Error::success();
88}
89
90Error IndexedCodeGenDataReader::read() {
91 using namespace support;
92
93 // The smallest header with the version 1 is 24 bytes.
94 // Do not update this value even with the new version of the header.
95 const unsigned MinHeaderSize = 24;
96 if (DataBuffer->getBufferSize() < MinHeaderSize)
97 return error(Err: cgdata_error::bad_header);
98
99 auto *Start =
100 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
101 auto *End =
102 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
103 if (auto E = IndexedCGData::Header::readFromBuffer(Curr: Start).moveInto(Value&: Header))
104 return E;
105
106 if (hasOutlinedHashTree()) {
107 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
108 if (Ptr >= End)
109 return error(Err: cgdata_error::eof);
110 HashTreeRecord.deserialize(Ptr);
111 }
112 if (hasStableFunctionMap()) {
113 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
114 if (Ptr >= End)
115 return error(Err: cgdata_error::eof);
116 FunctionMapRecord.deserialize(Ptr, ReadStableFunctionMapNames: IndexedCodeGenDataReadFunctionMapNames);
117 }
118
119 return success();
120}
121
122Expected<std::unique_ptr<CodeGenDataReader>>
123CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
124 // Set up the buffer to read.
125 auto BufferOrError = setupMemoryBuffer(Filename: Path, FS);
126 if (Error E = BufferOrError.takeError())
127 return std::move(E);
128 return CodeGenDataReader::create(Buffer: std::move(BufferOrError.get()));
129}
130
131Expected<std::unique_ptr<CodeGenDataReader>>
132CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
133 if (Buffer->getBufferSize() == 0)
134 return make_error<CGDataError>(Args: cgdata_error::empty_cgdata);
135
136 std::unique_ptr<CodeGenDataReader> Reader;
137 // Create the reader.
138 if (IndexedCodeGenDataReader::hasFormat(Buffer: *Buffer))
139 Reader = std::make_unique<IndexedCodeGenDataReader>(args: std::move(Buffer));
140 else if (TextCodeGenDataReader::hasFormat(Buffer: *Buffer))
141 Reader = std::make_unique<TextCodeGenDataReader>(args: std::move(Buffer));
142 else
143 return make_error<CGDataError>(Args: cgdata_error::malformed);
144
145 // Initialize the reader and return the result.
146 if (Error E = Reader->read())
147 return std::move(E);
148
149 return std::move(Reader);
150}
151
152bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
153 using namespace support;
154 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
155 return false;
156
157 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
158 memory: DataBuffer.getBufferStart());
159 // Verify that it's magical.
160 return Magic == IndexedCGData::Magic;
161}
162
163bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
164 // Verify that this really looks like plain ASCII text by checking a
165 // 'reasonable' number of characters (up to the magic size).
166 StringRef Prefix = Buffer.getBuffer().take_front(N: sizeof(uint64_t));
167 return llvm::all_of(Range&: Prefix, P: [](char c) { return isPrint(C: c) || isSpace(C: c); });
168}
169Error TextCodeGenDataReader::read() {
170 using namespace support;
171
172 // Parse the custom header line by line.
173 for (; !Line.is_at_eof(); ++Line) {
174 // Skip empty or whitespace-only lines
175 if (Line->trim().empty())
176 continue;
177
178 if (!Line->starts_with(Prefix: ":"))
179 break;
180 StringRef Str = Line->drop_front().rtrim();
181 if (Str.equals_insensitive(RHS: "outlined_hash_tree"))
182 DataKind |= CGDataKind::FunctionOutlinedHashTree;
183 else if (Str.equals_insensitive(RHS: "stable_function_map"))
184 DataKind |= CGDataKind::StableFunctionMergingMap;
185 else
186 return error(Err: cgdata_error::bad_header);
187 }
188
189 // We treat an empty header (that is a comment # only) as a valid header.
190 if (Line.is_at_eof()) {
191 if (DataKind == CGDataKind::Unknown)
192 return Error::success();
193 return error(Err: cgdata_error::bad_header);
194 }
195
196 // The YAML docs follow after the header.
197 const char *Pos = Line->data();
198 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
199 reinterpret_cast<size_t>(Pos);
200 yaml::Input YOS(StringRef(Pos, Size));
201 if (hasOutlinedHashTree())
202 HashTreeRecord.deserializeYAML(YIS&: YOS);
203 if (hasStableFunctionMap())
204 FunctionMapRecord.deserializeYAML(YIS&: YOS);
205
206 return Error::success();
207}
208} // end namespace llvm
209