1//===- CodeGenDataReader.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading codegen data.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CGData/CodeGenDataReader.h"
14#include "llvm/CGData/OutlinedHashTreeRecord.h"
15#include "llvm/Object/ObjectFile.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/MemoryBuffer.h"
18
19#define DEBUG_TYPE "cg-data-reader"
20
21using namespace llvm;
22
23static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
24 "indexed-codegen-data-read-function-map-names", cl::init(Val: true), cl::Hidden,
25 cl::desc("Read function map names in indexed CodeGenData. Can be "
26 "disabled to save memory and time for final consumption of the "
27 "indexed CodeGenData in production."));
28
29namespace llvm {
30
31cl::opt<bool> IndexedCodeGenDataLazyLoading(
32 "indexed-codegen-data-lazy-loading", cl::init(Val: false), cl::Hidden,
33 cl::desc(
34 "Lazily load indexed CodeGenData. Enable to save memory and time "
35 "for final consumption of the indexed CodeGenData in production."));
36
37static Expected<std::unique_ptr<MemoryBuffer>>
38setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
39 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
40 : FS.getBufferForFile(Name: Filename);
41 if (std::error_code EC = BufferOrErr.getError())
42 return errorCodeToError(EC);
43 return std::move(BufferOrErr.get());
44}
45
46Error CodeGenDataReader::mergeFromObjectFile(
47 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
48 StableFunctionMapRecord &GlobalFunctionMapRecord,
49 stable_hash *CombinedHash) {
50 Triple TT = Obj->makeTriple();
51 auto CGOutlineName =
52 getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat(), AddSegmentInfo: false);
53 auto CGMergeName =
54 getCodeGenDataSectionName(CGSK: CG_merge, OF: TT.getObjectFormat(), AddSegmentInfo: false);
55
56 auto processSectionContents = [&](const StringRef &Name,
57 const StringRef &Contents) {
58 if (Name != CGOutlineName && Name != CGMergeName)
59 return;
60 if (CombinedHash)
61 *CombinedHash = stable_hash_combine(A: *CombinedHash, B: xxh3_64bits(data: Contents));
62 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
63 auto *EndData = Data + Contents.size();
64 // In case dealing with an executable that has concatenated cgdata,
65 // we want to merge them into a single cgdata.
66 // Although it's not a typical workflow, we support this scenario
67 // by looping over all data in the sections.
68 if (Name == CGOutlineName) {
69 while (Data != EndData) {
70 OutlinedHashTreeRecord LocalOutlineRecord;
71 LocalOutlineRecord.deserialize(Ptr&: Data);
72 GlobalOutlineRecord.merge(Other: LocalOutlineRecord);
73 }
74 } else if (Name == CGMergeName) {
75 while (Data != EndData) {
76 StableFunctionMapRecord LocalFunctionMapRecord;
77 LocalFunctionMapRecord.deserialize(Ptr&: Data);
78 GlobalFunctionMapRecord.merge(Other: LocalFunctionMapRecord);
79 }
80 }
81 };
82
83 for (auto &Section : Obj->sections()) {
84 Expected<StringRef> NameOrErr = Section.getName();
85 if (!NameOrErr)
86 return NameOrErr.takeError();
87 Expected<StringRef> ContentsOrErr = Section.getContents();
88 if (!ContentsOrErr)
89 return ContentsOrErr.takeError();
90 processSectionContents(*NameOrErr, *ContentsOrErr);
91 }
92
93 return Error::success();
94}
95
96Error IndexedCodeGenDataReader::read() {
97 using namespace support;
98
99 // The smallest header with the version 1 is 24 bytes.
100 // Do not update this value even with the new version of the header.
101 const unsigned MinHeaderSize = 24;
102 if (DataBuffer->getBufferSize() < MinHeaderSize)
103 return error(Err: cgdata_error::bad_header);
104
105 auto *Start =
106 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
107 auto *End =
108 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
109 if (auto E = IndexedCGData::Header::readFromBuffer(Curr: Start).moveInto(Value&: Header))
110 return E;
111
112 if (hasOutlinedHashTree()) {
113 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
114 if (Ptr >= End)
115 return error(Err: cgdata_error::eof);
116 HashTreeRecord.deserialize(Ptr);
117 }
118
119 // TODO: lazy loading support for outlined hash tree.
120 std::shared_ptr<MemoryBuffer> SharedDataBuffer = std::move(DataBuffer);
121 if (hasStableFunctionMap()) {
122 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
123 if (Ptr >= End)
124 return error(Err: cgdata_error::eof);
125 FunctionMapRecord.setReadStableFunctionMapNames(
126 IndexedCodeGenDataReadFunctionMapNames);
127 if (IndexedCodeGenDataLazyLoading)
128 FunctionMapRecord.lazyDeserialize(Buffer: std::move(SharedDataBuffer),
129 Offset: Header.StableFunctionMapOffset);
130 else
131 FunctionMapRecord.deserialize(Ptr);
132 }
133
134 return success();
135}
136
137Expected<std::unique_ptr<CodeGenDataReader>>
138CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
139 // Set up the buffer to read.
140 auto BufferOrError = setupMemoryBuffer(Filename: Path, FS);
141 if (Error E = BufferOrError.takeError())
142 return std::move(E);
143 return CodeGenDataReader::create(Buffer: std::move(BufferOrError.get()));
144}
145
146Expected<std::unique_ptr<CodeGenDataReader>>
147CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
148 if (Buffer->getBufferSize() == 0)
149 return make_error<CGDataError>(Args: cgdata_error::empty_cgdata);
150
151 std::unique_ptr<CodeGenDataReader> Reader;
152 // Create the reader.
153 if (IndexedCodeGenDataReader::hasFormat(Buffer: *Buffer))
154 Reader = std::make_unique<IndexedCodeGenDataReader>(args: std::move(Buffer));
155 else if (TextCodeGenDataReader::hasFormat(Buffer: *Buffer))
156 Reader = std::make_unique<TextCodeGenDataReader>(args: std::move(Buffer));
157 else
158 return make_error<CGDataError>(Args: cgdata_error::malformed);
159
160 // Initialize the reader and return the result.
161 if (Error E = Reader->read())
162 return std::move(E);
163
164 return std::move(Reader);
165}
166
167bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
168 using namespace support;
169 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
170 return false;
171
172 uint64_t Magic = endian::read<uint64_t, aligned>(memory: DataBuffer.getBufferStart(),
173 endian: llvm::endianness::little);
174 // Verify that it's magical.
175 return Magic == IndexedCGData::Magic;
176}
177
178bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
179 // Verify that this really looks like plain ASCII text by checking a
180 // 'reasonable' number of characters (up to the magic size).
181 StringRef Prefix = Buffer.getBuffer().take_front(N: sizeof(uint64_t));
182 return llvm::all_of(Range&: Prefix, P: [](char c) { return isPrint(C: c) || isSpace(C: c); });
183}
184Error TextCodeGenDataReader::read() {
185 using namespace support;
186
187 // Parse the custom header line by line.
188 for (; !Line.is_at_eof(); ++Line) {
189 // Skip empty or whitespace-only lines
190 if (Line->trim().empty())
191 continue;
192
193 if (!Line->starts_with(Prefix: ":"))
194 break;
195 StringRef Str = Line->drop_front().rtrim();
196 if (Str.equals_insensitive(RHS: "outlined_hash_tree"))
197 DataKind |= CGDataKind::FunctionOutlinedHashTree;
198 else if (Str.equals_insensitive(RHS: "stable_function_map"))
199 DataKind |= CGDataKind::StableFunctionMergingMap;
200 else
201 return error(Err: cgdata_error::bad_header);
202 }
203
204 // We treat an empty header (that is a comment # only) as a valid header.
205 if (Line.is_at_eof()) {
206 if (DataKind == CGDataKind::Unknown)
207 return Error::success();
208 return error(Err: cgdata_error::bad_header);
209 }
210
211 // The YAML docs follow after the header.
212 const char *Pos = Line->data();
213 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
214 reinterpret_cast<size_t>(Pos);
215 yaml::Input YOS(StringRef(Pos, Size));
216 if (hasOutlinedHashTree())
217 HashTreeRecord.deserializeYAML(YIS&: YOS);
218 if (hasStableFunctionMap())
219 FunctionMapRecord.deserializeYAML(YIS&: YOS);
220
221 return Error::success();
222}
223} // end namespace llvm
224