TypeStreamMerger.cpp source code [llvm_projects/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp]

1	//===-- TypeStreamMerger.cpp ------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
10	#include "llvm/ADT/SmallVector.h"
11	#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
12	#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
13	#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
14	#include "llvm/DebugInfo/CodeView/TypeIndex.h"
15	#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
16	#include "llvm/DebugInfo/CodeView/TypeRecord.h"
17	#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
18	#include "llvm/Support/Error.h"
19	#include <optional>
20
21	using namespace llvm;
22	using namespace llvm::codeview;
23
24	static inline size_t slotForIndex(TypeIndex Idx) {
25	assert(!Idx.isSimple() && "simple type indices have no slots");
26	return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
27	}
28
29	namespace {
30
31	/// Implementation of CodeView type stream merging.
32	///
33	/// A CodeView type stream is a series of records that reference each other
34	/// through type indices. A type index is either "simple", meaning it is less
35	/// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36	/// refers to a prior type record in the current stream. The type index of a
37	/// record is equal to the number of records before it in the stream plus
38	/// 0x1000.
39	///
40	/// Type records are only allowed to use type indices smaller than their own, so
41	/// a type stream is effectively a topologically sorted DAG. Cycles occurring in
42	/// the type graph of the source program are resolved with forward declarations
43	/// of composite types. This class implements the following type stream merging
44	/// algorithm, which relies on this DAG structure:
45	///
46	/// - Begin with a new empty stream, and a new empty hash table that maps from
47	/// type record contents to new type index.
48	/// - For each new type stream, maintain a map from source type index to
49	/// destination type index.
50	/// - For each record, copy it and rewrite its type indices to be valid in the
51	/// destination type stream.
52	/// - If the new type record is not already present in the destination stream
53	/// hash table, append it to the destination type stream, assign it the next
54	/// type index, and update the two hash tables.
55	/// - If the type record already exists in the destination stream, discard it
56	/// and update the type index map to forward the source type index to the
57	/// existing destination type index.
58	///
59	/// As an additional complication, type stream merging actually produces two
60	/// streams: an item (or IPI) stream and a type stream, as this is what is
61	/// actually stored in the final PDB. We choose which records go where by
62	/// looking at the record kind.
63	class TypeStreamMerger {
64	public:
65	explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66	: IndexMap(SourceToDest) {
67	// When dealing with precompiled headers objects, all data in SourceToDest
68	// belongs to the precompiled headers object, and is assumed to be already
69	// remapped to the target PDB. Any forthcoming type that will be merged in
70	// might potentially back-reference this data. We also don't want to resolve
71	// twice the types in the precompiled object.
72	CurIndex += SourceToDest.size();
73	}
74
75	static const TypeIndex Untranslated;
76
77	// Local hashing entry points
78	Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79	MergingTypeTableBuilder &DestTypes,
80	const CVTypeArray &IdsAndTypes,
81	std::optional<PCHMergerInfo> &PCHInfo);
82	Error mergeIdRecords(MergingTypeTableBuilder &Dest,
83	ArrayRef<TypeIndex> TypeSourceToDest,
84	const CVTypeArray &Ids);
85	Error mergeTypeRecords(MergingTypeTableBuilder &Dest,
86	const CVTypeArray &Types);
87
88	// Global hashing entry points
89	Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
90	GlobalTypeTableBuilder &DestTypes,
91	const CVTypeArray &IdsAndTypes,
92	ArrayRef<GloballyHashedType> Hashes,
93	std::optional<PCHMergerInfo> &PCHInfo);
94	Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
95	ArrayRef<TypeIndex> TypeSourceToDest,
96	const CVTypeArray &Ids,
97	ArrayRef<GloballyHashedType> Hashes);
98	Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
99	ArrayRef<GloballyHashedType> Hashes,
100	std::optional<PCHMergerInfo> &PCHInfo);
101
102	private:
103	Error doit(const CVTypeArray &Types);
104
105	Error remapAllTypes(const CVTypeArray &Types);
106
107	Error remapType(const CVType &Type);
108
109	void addMapping(TypeIndex Idx);
110
111	inline bool remapTypeIndex(TypeIndex &Idx) {
112	// If we're mapping a pure index stream, then IndexMap only contains
113	// mappings from OldIdStream -> NewIdStream, in which case we will need to
114	// use the special mapping from OldTypeStream -> NewTypeStream which was
115	// computed externally. Regardless, we use this special map if and only if
116	// we are doing an id-only mapping.
117	if (!hasTypeStream())
118	return remapIndex(Idx, Map: TypeLookup);
119
120	assert(TypeLookup.empty());
121	return remapIndex(Idx, Map: IndexMap);
122	}
123	inline bool remapItemIndex(TypeIndex &Idx) {
124	assert(hasIdStream());
125	return remapIndex(Idx, Map: IndexMap);
126	}
127
128	bool hasTypeStream() const {
129	return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
130	}
131
132	bool hasIdStream() const {
133	return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
134	}
135
136	ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
137	MutableArrayRef<uint8_t> Storage);
138
139	inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
140	if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
141	return true;
142
143	return remapIndexFallback(Idx, Map);
144	}
145
146	inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
147	// Simple types are unchanged.
148	if (Idx.isSimple())
149	return true;
150
151	// Check if this type index refers to a record we've already translated
152	// successfully. If it refers to a type later in the stream or a record we
153	// had to defer, defer it until later pass.
154	unsigned MapPos = slotForIndex(Idx);
155	if (LLVM_UNLIKELY(MapPos >= Map.size() \|\| Map[MapPos] == Untranslated))
156	return false;
157
158	Idx = Map [MapPos];
159	return true;
160	}
161
162	bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
163
164	Error errorCorruptRecord() const {
165	return llvm::make_error<CodeViewError>(Args: cv_error_code::corrupt_record);
166	}
167
168	Expected<bool> shouldRemapType(const CVType &Type);
169
170	std::optional<Error> LastError;
171
172	bool UseGlobalHashes = false;
173
174	bool IsSecondPass = false;
175
176	unsigned NumBadIndices = `0`;
177
178	TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
179
180	MergingTypeTableBuilder DestIdStream = nullptr*;
181	MergingTypeTableBuilder DestTypeStream = nullptr*;
182
183	GlobalTypeTableBuilder DestGlobalIdStream = nullptr*;
184	GlobalTypeTableBuilder DestGlobalTypeStream = nullptr*;
185
186	ArrayRef<GloballyHashedType> GlobalHashes;
187
188	// If we're only mapping id records, this array contains the mapping for
189	// type records.
190	ArrayRef<TypeIndex> TypeLookup;
191
192	/// Map from source type index to destination type index. Indexed by source
193	/// type index minus 0x1000.
194	SmallVectorImpl<TypeIndex> &IndexMap;
195
196	/// Temporary storage that we use to copy a record's data while re-writing
197	/// its type indices.
198	SmallVector<uint8_t, `256`> RemapStorage;
199
200	std::optional<PCHMergerInfo> PCHInfo;
201	};
202
203	} // end anonymous namespace
204
205	const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
206
207	void TypeStreamMerger::addMapping(TypeIndex Idx) {
208	if (!IsSecondPass) {
209	assert(IndexMap.size() == slotForIndex(CurIndex) &&
210	"visitKnownRecord should add one index map entry");
211	IndexMap.push_back(Elt: Idx);
212	} else {
213	assert(slotForIndex(CurIndex) < IndexMap.size());
214	IndexMap [slotForIndex(Idx: CurIndex)] = Idx;
215	}
216	}
217
218	bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
219	ArrayRef<TypeIndex> Map) {
220	size_t MapPos = slotForIndex(Idx);
221
222	// If this is the second pass and this index isn't in the map, then it points
223	// outside the current type stream, and this is a corrupt record.
224	if (IsSecondPass && MapPos >= Map.size()) {
225	// FIXME: Print a more useful error. We can give the current record and the
226	// index that we think its pointing to.
227	if (LastError)
228	LastError = joinErrors(E1: std::move(*LastError), E2: errorCorruptRecord());
229	else
230	LastError = errorCorruptRecord();
231	}
232
233	++NumBadIndices;
234
235	// This type index is invalid. Remap this to "not translated by cvpack",
236	// and return failure.
237	Idx = Untranslated;
238	return false;
239	}
240
241	// Local hashing entry points
242	Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest,
243	const CVTypeArray &Types) {
244	DestTypeStream = &Dest;
245	UseGlobalHashes = false;
246
247	return doit(Types);
248	}
249
250	Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
251	ArrayRef<TypeIndex> TypeSourceToDest,
252	const CVTypeArray &Ids) {
253	DestIdStream = &Dest;
254	TypeLookup = TypeSourceToDest;
255	UseGlobalHashes = false;
256
257	return doit(Types: Ids);
258	}
259
260	Error TypeStreamMerger::mergeTypesAndIds(
261	MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
262	const CVTypeArray &IdsAndTypes, std::optional<PCHMergerInfo> &PCHInfo) {
263	DestIdStream = &DestIds;
264	DestTypeStream = &DestTypes;
265	UseGlobalHashes = false;
266	auto Err = doit(Types: IdsAndTypes);
267	PCHInfo = this->PCHInfo;
268	return Err;
269	}
270
271	// Global hashing entry points
272	Error TypeStreamMerger::mergeTypeRecords(
273	GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
274	ArrayRef<GloballyHashedType> Hashes,
275	std::optional<PCHMergerInfo> &PCHInfo) {
276	DestGlobalTypeStream = &Dest;
277	UseGlobalHashes = true;
278	GlobalHashes = Hashes;
279	auto Err = doit(Types);
280	PCHInfo = this->PCHInfo;
281	return Err;
282	}
283
284	Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
285	ArrayRef<TypeIndex> TypeSourceToDest,
286	const CVTypeArray &Ids,
287	ArrayRef<GloballyHashedType> Hashes) {
288	DestGlobalIdStream = &Dest;
289	TypeLookup = TypeSourceToDest;
290	UseGlobalHashes = true;
291	GlobalHashes = Hashes;
292
293	return doit(Types: Ids);
294	}
295
296	Error TypeStreamMerger::mergeTypesAndIds(
297	GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
298	const CVTypeArray &IdsAndTypes, ArrayRef<GloballyHashedType> Hashes,
299	std::optional<PCHMergerInfo> &PCHInfo) {
300	DestGlobalIdStream = &DestIds;
301	DestGlobalTypeStream = &DestTypes;
302	UseGlobalHashes = true;
303	GlobalHashes = Hashes;
304	auto Err = doit(Types: IdsAndTypes);
305	PCHInfo = this->PCHInfo;
306	return Err;
307	}
308
309	Error TypeStreamMerger::doit(const CVTypeArray &Types) {
310	if (auto EC = remapAllTypes(Types))
311	return EC;
312
313	// If we found bad indices but no other errors, try doing another pass and see
314	// if we can resolve the indices that weren't in the map on the first pass.
315	// This may require multiple passes, but we should always make progress. MASM
316	// is the only known CodeView producer that makes type streams that aren't
317	// topologically sorted. The standard library contains MASM-produced objects,
318	// so this is important to handle correctly, but we don't have to be too
319	// efficient. MASM type streams are usually very small.
320	while (!LastError && NumBadIndices > `0`) {
321	unsigned BadIndicesRemaining = NumBadIndices;
322	IsSecondPass = true;
323	NumBadIndices = `0`;
324	CurIndex = TypeIndex (TypeIndex::FirstNonSimpleIndex);
325
326	if (auto EC = remapAllTypes(Types))
327	return EC;
328
329	assert(NumBadIndices <= BadIndicesRemaining &&
330	"second pass found more bad indices");
331	if (!LastError && NumBadIndices == BadIndicesRemaining) {
332	return llvm::make_error<CodeViewError>(
333	Args: cv_error_code::corrupt_record, Args: "Input type graph contains cycles");
334	}
335	}
336
337	if (LastError)
338	return std::move(*LastError);
339	return Error::success();
340	}
341
342	Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
343	BinaryStreamRef Stream = Types.getUnderlyingStream();
344	ArrayRef<uint8_t> Buffer;
345	cantFail(Err: Stream.readBytes(Offset: `0`, Size: Stream.getLength(), Buffer));
346
347	return forEachCodeViewRecord<CVType>(
348	StreamBuffer: Buffer, F: [this](const CVType &T) { return remapType(Type: T); });
349	}
350
351	Error TypeStreamMerger::remapType(const CVType &Type) {
352	auto R = shouldRemapType(Type);
353	if (!R)
354	return R.takeError();
355
356	TypeIndex DestIdx = Untranslated;
357	if (*R) {
358	auto DoSerialize =
359	[this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
360	return remapIndices(OriginalType: Type, Storage);
361	};
362	unsigned AlignedSize = alignTo(Value: Type.RecordData.size(), Align: `4`);
363
364	if (LLVM_LIKELY(UseGlobalHashes)) {
365	GlobalTypeTableBuilder &Dest =
366	isIdRecord(K: Type.kind()) ? DestGlobalIdStream : DestGlobalTypeStream;
367	GloballyHashedType H = GlobalHashes [CurIndex.toArrayIndex()];
368	DestIdx = Dest.insertRecordAs(Hash: H, RecordSize: AlignedSize, Create: DoSerialize);
369	} else {
370	MergingTypeTableBuilder &Dest =
371	isIdRecord(K: Type.kind()) ? DestIdStream : DestTypeStream;
372
373	RemapStorage.resize(N: AlignedSize);
374	ArrayRef<uint8_t> Result = DoSerialize (RemapStorage);
375	if (!Result.empty())
376	DestIdx = Dest.insertRecordBytes(Record&: Result);
377	}
378	}
379	addMapping(Idx: DestIdx);
380
381	++CurIndex;
382	assert((IsSecondPass \|\| IndexMap.size() == slotForIndex(CurIndex)) &&
383	"visitKnownRecord should add one index map entry");
384	return Error::success();
385	}
386
387	ArrayRef<uint8_t>
388	TypeStreamMerger::remapIndices(const CVType &OriginalType,
389	MutableArrayRef<uint8_t> Storage) {
390	unsigned Align = OriginalType.RecordData.size() & `3`;
391	assert(Storage.size() == alignTo(OriginalType.RecordData.size(), `4`) &&
392	"The storage buffer size is not a multiple of 4 bytes which will "
393	"cause misalignment in the output TPI stream!");
394
395	SmallVector<TiReference, `4`> Refs;
396	discoverTypeIndices(RecordData: OriginalType.RecordData, Refs);
397	if (Refs.empty() && Align == `0`)
398	return OriginalType.RecordData;
399
400	::memcpy(dest: Storage.data(), src: OriginalType.RecordData.data(),
401	n: OriginalType.RecordData.size());
402
403	uint8_t DestContent = Storage.data() + sizeof*(RecordPrefix);
404
405	for (auto &Ref : Refs) {
406	TypeIndex *DestTIs =
407	reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
408
409	for (size_t I = `0`; I < Ref.Count; ++I) {
410	TypeIndex &TI = DestTIs[I];
411	bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(Idx&: TI)
412	: remapTypeIndex(Idx&: TI);
413	if (LLVM_UNLIKELY(!Success))
414	return {};
415	}
416	}
417
418	if (Align > `0`) {
419	RecordPrefix *StorageHeader =
420	reinterpret_cast<RecordPrefix *>(Storage.data());
421	StorageHeader->RecordLen += `4` - Align;
422
423	DestContent = Storage.data() + OriginalType.RecordData.size();
424	for (; Align < `4`; ++Align)
425	*DestContent++ = LF_PAD4 - Align;
426	}
427	return Storage;
428	}
429
430	Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest,
431	SmallVectorImpl<TypeIndex> &SourceToDest,
432	const CVTypeArray &Types) {
433	TypeStreamMerger M(SourceToDest);
434	return M.mergeTypeRecords(Dest, Types);
435	}
436
437	Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
438	ArrayRef<TypeIndex> TypeSourceToDest,
439	SmallVectorImpl<TypeIndex> &SourceToDest,
440	const CVTypeArray &Ids) {
441	TypeStreamMerger M(SourceToDest);
442	return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
443	}
444
445	Error llvm::codeview::mergeTypeAndIdRecords(
446	MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
447	SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
448	std::optional<PCHMergerInfo> &PCHInfo) {
449	TypeStreamMerger M(SourceToDest);
450	return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHInfo);
451	}
452
453	Error llvm::codeview::mergeTypeAndIdRecords(
454	GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
455	SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
456	ArrayRef<GloballyHashedType> Hashes,
457	std::optional<PCHMergerInfo> &PCHInfo) {
458	TypeStreamMerger M(SourceToDest);
459	return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHInfo);
460	}
461
462	Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
463	SmallVectorImpl<TypeIndex> &SourceToDest,
464	const CVTypeArray &Types,
465	ArrayRef<GloballyHashedType> Hashes,
466	std::optional<PCHMergerInfo> &PCHInfo) {
467	TypeStreamMerger M(SourceToDest);
468	return M.mergeTypeRecords(Dest, Types, Hashes, PCHInfo);
469	}
470
471	Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
472	ArrayRef<TypeIndex> Types,
473	SmallVectorImpl<TypeIndex> &SourceToDest,
474	const CVTypeArray &Ids,
475	ArrayRef<GloballyHashedType> Hashes) {
476	TypeStreamMerger M(SourceToDest);
477	return M.mergeIdRecords(Dest, TypeSourceToDest: Types, Ids, Hashes);
478	}
479
480	Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
481	// For object files containing precompiled types, we need to extract the
482	// signature, through EndPrecompRecord. This is done here for performance
483	// reasons, to avoid re-parsing the Types stream.
484	if (Type.kind() == LF_ENDPRECOMP) {
485	EndPrecompRecord EP;
486	if (auto EC = TypeDeserializer::deserializeAs(CVT&: const_cast<CVType &>(Type),
487	Record&: EP))
488	return joinErrors(E1: std::move(EC), E2: errorCorruptRecord());
489	// Only one record of this kind can appear in a OBJ.
490	if (PCHInfo)
491	return errorCorruptRecord();
492	PCHInfo.emplace(args: PCHMergerInfo{.PCHSignature: EP.getSignature(), .EndPrecompIndex: CurIndex.toArrayIndex()});
493	return false;
494	}
495	return true;
496	}
497

Browse the source code of llvm_projects/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp