1 | //===- CodeGenDataReader.cpp ----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for reading codegen data. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/CGData/CodeGenDataReader.h" |
14 | #include "llvm/CGData/OutlinedHashTreeRecord.h" |
15 | #include "llvm/Object/ObjectFile.h" |
16 | #include "llvm/Support/CommandLine.h" |
17 | #include "llvm/Support/MemoryBuffer.h" |
18 | |
19 | #define DEBUG_TYPE "cg-data-reader" |
20 | |
21 | using namespace llvm; |
22 | |
23 | static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames( |
24 | "indexed-codegen-data-read-function-map-names" , cl::init(Val: true), cl::Hidden, |
25 | cl::desc("Read function map names in indexed CodeGenData. Can be " |
26 | "disabled to save memory and time for final consumption of the " |
27 | "indexed CodeGenData in production." )); |
28 | |
29 | namespace llvm { |
30 | |
31 | static Expected<std::unique_ptr<MemoryBuffer>> |
32 | setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { |
33 | auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() |
34 | : FS.getBufferForFile(Name: Filename); |
35 | if (std::error_code EC = BufferOrErr.getError()) |
36 | return errorCodeToError(EC); |
37 | return std::move(BufferOrErr.get()); |
38 | } |
39 | |
40 | Error CodeGenDataReader::mergeFromObjectFile( |
41 | const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, |
42 | StableFunctionMapRecord &GlobalFunctionMapRecord, |
43 | stable_hash *CombinedHash) { |
44 | Triple TT = Obj->makeTriple(); |
45 | auto CGOutlineName = |
46 | getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat(), AddSegmentInfo: false); |
47 | auto CGMergeName = |
48 | getCodeGenDataSectionName(CGSK: CG_merge, OF: TT.getObjectFormat(), AddSegmentInfo: false); |
49 | |
50 | auto processSectionContents = [&](const StringRef &Name, |
51 | const StringRef &Contents) { |
52 | if (Name != CGOutlineName && Name != CGMergeName) |
53 | return; |
54 | if (CombinedHash) |
55 | *CombinedHash = stable_hash_combine(A: *CombinedHash, B: xxh3_64bits(data: Contents)); |
56 | auto *Data = reinterpret_cast<const unsigned char *>(Contents.data()); |
57 | auto *EndData = Data + Contents.size(); |
58 | // In case dealing with an executable that has concatenated cgdata, |
59 | // we want to merge them into a single cgdata. |
60 | // Although it's not a typical workflow, we support this scenario |
61 | // by looping over all data in the sections. |
62 | if (Name == CGOutlineName) { |
63 | while (Data != EndData) { |
64 | OutlinedHashTreeRecord LocalOutlineRecord; |
65 | LocalOutlineRecord.deserialize(Ptr&: Data); |
66 | GlobalOutlineRecord.merge(Other: LocalOutlineRecord); |
67 | } |
68 | } else if (Name == CGMergeName) { |
69 | while (Data != EndData) { |
70 | StableFunctionMapRecord LocalFunctionMapRecord; |
71 | LocalFunctionMapRecord.deserialize(Ptr&: Data); |
72 | GlobalFunctionMapRecord.merge(Other: LocalFunctionMapRecord); |
73 | } |
74 | } |
75 | }; |
76 | |
77 | for (auto &Section : Obj->sections()) { |
78 | Expected<StringRef> NameOrErr = Section.getName(); |
79 | if (!NameOrErr) |
80 | return NameOrErr.takeError(); |
81 | Expected<StringRef> ContentsOrErr = Section.getContents(); |
82 | if (!ContentsOrErr) |
83 | return ContentsOrErr.takeError(); |
84 | processSectionContents(*NameOrErr, *ContentsOrErr); |
85 | } |
86 | |
87 | return Error::success(); |
88 | } |
89 | |
90 | Error IndexedCodeGenDataReader::read() { |
91 | using namespace support; |
92 | |
93 | // The smallest header with the version 1 is 24 bytes. |
94 | // Do not update this value even with the new version of the header. |
95 | const unsigned = 24; |
96 | if (DataBuffer->getBufferSize() < MinHeaderSize) |
97 | return error(Err: cgdata_error::bad_header); |
98 | |
99 | auto *Start = |
100 | reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart()); |
101 | auto *End = |
102 | reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd()); |
103 | if (auto E = IndexedCGData::Header::readFromBuffer(Curr: Start).moveInto(Value&: Header)) |
104 | return E; |
105 | |
106 | if (hasOutlinedHashTree()) { |
107 | const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; |
108 | if (Ptr >= End) |
109 | return error(Err: cgdata_error::eof); |
110 | HashTreeRecord.deserialize(Ptr); |
111 | } |
112 | if (hasStableFunctionMap()) { |
113 | const unsigned char *Ptr = Start + Header.StableFunctionMapOffset; |
114 | if (Ptr >= End) |
115 | return error(Err: cgdata_error::eof); |
116 | FunctionMapRecord.deserialize(Ptr, ReadStableFunctionMapNames: IndexedCodeGenDataReadFunctionMapNames); |
117 | } |
118 | |
119 | return success(); |
120 | } |
121 | |
122 | Expected<std::unique_ptr<CodeGenDataReader>> |
123 | CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { |
124 | // Set up the buffer to read. |
125 | auto BufferOrError = setupMemoryBuffer(Filename: Path, FS); |
126 | if (Error E = BufferOrError.takeError()) |
127 | return std::move(E); |
128 | return CodeGenDataReader::create(Buffer: std::move(BufferOrError.get())); |
129 | } |
130 | |
131 | Expected<std::unique_ptr<CodeGenDataReader>> |
132 | CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) { |
133 | if (Buffer->getBufferSize() == 0) |
134 | return make_error<CGDataError>(Args: cgdata_error::empty_cgdata); |
135 | |
136 | std::unique_ptr<CodeGenDataReader> Reader; |
137 | // Create the reader. |
138 | if (IndexedCodeGenDataReader::hasFormat(Buffer: *Buffer)) |
139 | Reader = std::make_unique<IndexedCodeGenDataReader>(args: std::move(Buffer)); |
140 | else if (TextCodeGenDataReader::hasFormat(Buffer: *Buffer)) |
141 | Reader = std::make_unique<TextCodeGenDataReader>(args: std::move(Buffer)); |
142 | else |
143 | return make_error<CGDataError>(Args: cgdata_error::malformed); |
144 | |
145 | // Initialize the reader and return the result. |
146 | if (Error E = Reader->read()) |
147 | return std::move(E); |
148 | |
149 | return std::move(Reader); |
150 | } |
151 | |
152 | bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { |
153 | using namespace support; |
154 | if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) |
155 | return false; |
156 | |
157 | uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( |
158 | memory: DataBuffer.getBufferStart()); |
159 | // Verify that it's magical. |
160 | return Magic == IndexedCGData::Magic; |
161 | } |
162 | |
163 | bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { |
164 | // Verify that this really looks like plain ASCII text by checking a |
165 | // 'reasonable' number of characters (up to the magic size). |
166 | StringRef Prefix = Buffer.getBuffer().take_front(N: sizeof(uint64_t)); |
167 | return llvm::all_of(Range&: Prefix, P: [](char c) { return isPrint(C: c) || isSpace(C: c); }); |
168 | } |
169 | Error TextCodeGenDataReader::read() { |
170 | using namespace support; |
171 | |
172 | // Parse the custom header line by line. |
173 | for (; !Line.is_at_eof(); ++Line) { |
174 | // Skip empty or whitespace-only lines |
175 | if (Line->trim().empty()) |
176 | continue; |
177 | |
178 | if (!Line->starts_with(Prefix: ":" )) |
179 | break; |
180 | StringRef Str = Line->drop_front().rtrim(); |
181 | if (Str.equals_insensitive(RHS: "outlined_hash_tree" )) |
182 | DataKind |= CGDataKind::FunctionOutlinedHashTree; |
183 | else if (Str.equals_insensitive(RHS: "stable_function_map" )) |
184 | DataKind |= CGDataKind::StableFunctionMergingMap; |
185 | else |
186 | return error(Err: cgdata_error::bad_header); |
187 | } |
188 | |
189 | // We treat an empty header (that is a comment # only) as a valid header. |
190 | if (Line.is_at_eof()) { |
191 | if (DataKind == CGDataKind::Unknown) |
192 | return Error::success(); |
193 | return error(Err: cgdata_error::bad_header); |
194 | } |
195 | |
196 | // The YAML docs follow after the header. |
197 | const char *Pos = Line->data(); |
198 | size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) - |
199 | reinterpret_cast<size_t>(Pos); |
200 | yaml::Input YOS(StringRef(Pos, Size)); |
201 | if (hasOutlinedHashTree()) |
202 | HashTreeRecord.deserializeYAML(YIS&: YOS); |
203 | if (hasStableFunctionMap()) |
204 | FunctionMapRecord.deserializeYAML(YIS&: YOS); |
205 | |
206 | return Error::success(); |
207 | } |
208 | } // end namespace llvm |
209 | |