1//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
10#include "llvm/ADT/SmallVector.h"
11#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
12#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
13#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
14#include "llvm/DebugInfo/CodeView/TypeIndex.h"
15#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
16#include "llvm/DebugInfo/CodeView/TypeRecord.h"
17#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
18#include "llvm/Support/Error.h"
19#include <optional>
20
21using namespace llvm;
22using namespace llvm::codeview;
23
24static inline size_t slotForIndex(TypeIndex Idx) {
25 assert(!Idx.isSimple() && "simple type indices have no slots");
26 return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
27}
28
29namespace {
30
31/// Implementation of CodeView type stream merging.
32///
33/// A CodeView type stream is a series of records that reference each other
34/// through type indices. A type index is either "simple", meaning it is less
35/// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36/// refers to a prior type record in the current stream. The type index of a
37/// record is equal to the number of records before it in the stream plus
38/// 0x1000.
39///
40/// Type records are only allowed to use type indices smaller than their own, so
41/// a type stream is effectively a topologically sorted DAG. Cycles occurring in
42/// the type graph of the source program are resolved with forward declarations
43/// of composite types. This class implements the following type stream merging
44/// algorithm, which relies on this DAG structure:
45///
46/// - Begin with a new empty stream, and a new empty hash table that maps from
47/// type record contents to new type index.
48/// - For each new type stream, maintain a map from source type index to
49/// destination type index.
50/// - For each record, copy it and rewrite its type indices to be valid in the
51/// destination type stream.
52/// - If the new type record is not already present in the destination stream
53/// hash table, append it to the destination type stream, assign it the next
54/// type index, and update the two hash tables.
55/// - If the type record already exists in the destination stream, discard it
56/// and update the type index map to forward the source type index to the
57/// existing destination type index.
58///
59/// As an additional complication, type stream merging actually produces two
60/// streams: an item (or IPI) stream and a type stream, as this is what is
61/// actually stored in the final PDB. We choose which records go where by
62/// looking at the record kind.
63class TypeStreamMerger {
64public:
65 explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66 : IndexMap(SourceToDest) {
67 // When dealing with precompiled headers objects, all data in SourceToDest
68 // belongs to the precompiled headers object, and is assumed to be already
69 // remapped to the target PDB. Any forthcoming type that will be merged in
70 // might potentially back-reference this data. We also don't want to resolve
71 // twice the types in the precompiled object.
72 CurIndex += SourceToDest.size();
73 }
74
75 static const TypeIndex Untranslated;
76
77 // Local hashing entry points
78 Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79 MergingTypeTableBuilder &DestTypes,
80 const CVTypeArray &IdsAndTypes,
81 std::optional<PCHMergerInfo> &PCHInfo);
82 Error mergeIdRecords(MergingTypeTableBuilder &Dest,
83 ArrayRef<TypeIndex> TypeSourceToDest,
84 const CVTypeArray &Ids);
85 Error mergeTypeRecords(MergingTypeTableBuilder &Dest,
86 const CVTypeArray &Types);
87
88 // Global hashing entry points
89 Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
90 GlobalTypeTableBuilder &DestTypes,
91 const CVTypeArray &IdsAndTypes,
92 ArrayRef<GloballyHashedType> Hashes,
93 std::optional<PCHMergerInfo> &PCHInfo);
94 Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
95 ArrayRef<TypeIndex> TypeSourceToDest,
96 const CVTypeArray &Ids,
97 ArrayRef<GloballyHashedType> Hashes);
98 Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
99 ArrayRef<GloballyHashedType> Hashes,
100 std::optional<PCHMergerInfo> &PCHInfo);
101
102private:
103 Error doit(const CVTypeArray &Types);
104
105 Error remapAllTypes(const CVTypeArray &Types);
106
107 Error remapType(const CVType &Type);
108
109 void addMapping(TypeIndex Idx);
110
111 inline bool remapTypeIndex(TypeIndex &Idx) {
112 // If we're mapping a pure index stream, then IndexMap only contains
113 // mappings from OldIdStream -> NewIdStream, in which case we will need to
114 // use the special mapping from OldTypeStream -> NewTypeStream which was
115 // computed externally. Regardless, we use this special map if and only if
116 // we are doing an id-only mapping.
117 if (!hasTypeStream())
118 return remapIndex(Idx, Map: TypeLookup);
119
120 assert(TypeLookup.empty());
121 return remapIndex(Idx, Map: IndexMap);
122 }
123 inline bool remapItemIndex(TypeIndex &Idx) {
124 assert(hasIdStream());
125 return remapIndex(Idx, Map: IndexMap);
126 }
127
128 bool hasTypeStream() const {
129 return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
130 }
131
132 bool hasIdStream() const {
133 return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
134 }
135
136 ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
137 MutableArrayRef<uint8_t> Storage);
138
139 inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
140 if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
141 return true;
142
143 return remapIndexFallback(Idx, Map);
144 }
145
146 inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
147 // Simple types are unchanged.
148 if (Idx.isSimple())
149 return true;
150
151 // Check if this type index refers to a record we've already translated
152 // successfully. If it refers to a type later in the stream or a record we
153 // had to defer, defer it until later pass.
154 unsigned MapPos = slotForIndex(Idx);
155 if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
156 return false;
157
158 Idx = Map[MapPos];
159 return true;
160 }
161
162 bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
163
164 Error errorCorruptRecord() const {
165 return llvm::make_error<CodeViewError>(Args: cv_error_code::corrupt_record);
166 }
167
168 Expected<bool> shouldRemapType(const CVType &Type);
169
170 std::optional<Error> LastError;
171
172 bool UseGlobalHashes = false;
173
174 bool IsSecondPass = false;
175
176 unsigned NumBadIndices = 0;
177
178 TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
179
180 MergingTypeTableBuilder *DestIdStream = nullptr;
181 MergingTypeTableBuilder *DestTypeStream = nullptr;
182
183 GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
184 GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
185
186 ArrayRef<GloballyHashedType> GlobalHashes;
187
188 // If we're only mapping id records, this array contains the mapping for
189 // type records.
190 ArrayRef<TypeIndex> TypeLookup;
191
192 /// Map from source type index to destination type index. Indexed by source
193 /// type index minus 0x1000.
194 SmallVectorImpl<TypeIndex> &IndexMap;
195
196 /// Temporary storage that we use to copy a record's data while re-writing
197 /// its type indices.
198 SmallVector<uint8_t, 256> RemapStorage;
199
200 std::optional<PCHMergerInfo> PCHInfo;
201};
202
203} // end anonymous namespace
204
205const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
206
207void TypeStreamMerger::addMapping(TypeIndex Idx) {
208 if (!IsSecondPass) {
209 assert(IndexMap.size() == slotForIndex(CurIndex) &&
210 "visitKnownRecord should add one index map entry");
211 IndexMap.push_back(Elt: Idx);
212 } else {
213 assert(slotForIndex(CurIndex) < IndexMap.size());
214 IndexMap[slotForIndex(Idx: CurIndex)] = Idx;
215 }
216}
217
218bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
219 ArrayRef<TypeIndex> Map) {
220 size_t MapPos = slotForIndex(Idx);
221
222 // If this is the second pass and this index isn't in the map, then it points
223 // outside the current type stream, and this is a corrupt record.
224 if (IsSecondPass && MapPos >= Map.size()) {
225 // FIXME: Print a more useful error. We can give the current record and the
226 // index that we think its pointing to.
227 if (LastError)
228 LastError = joinErrors(E1: std::move(*LastError), E2: errorCorruptRecord());
229 else
230 LastError = errorCorruptRecord();
231 }
232
233 ++NumBadIndices;
234
235 // This type index is invalid. Remap this to "not translated by cvpack",
236 // and return failure.
237 Idx = Untranslated;
238 return false;
239}
240
241// Local hashing entry points
242Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest,
243 const CVTypeArray &Types) {
244 DestTypeStream = &Dest;
245 UseGlobalHashes = false;
246
247 return doit(Types);
248}
249
250Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
251 ArrayRef<TypeIndex> TypeSourceToDest,
252 const CVTypeArray &Ids) {
253 DestIdStream = &Dest;
254 TypeLookup = TypeSourceToDest;
255 UseGlobalHashes = false;
256
257 return doit(Types: Ids);
258}
259
260Error TypeStreamMerger::mergeTypesAndIds(
261 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
262 const CVTypeArray &IdsAndTypes, std::optional<PCHMergerInfo> &PCHInfo) {
263 DestIdStream = &DestIds;
264 DestTypeStream = &DestTypes;
265 UseGlobalHashes = false;
266 auto Err = doit(Types: IdsAndTypes);
267 PCHInfo = this->PCHInfo;
268 return Err;
269}
270
271// Global hashing entry points
272Error TypeStreamMerger::mergeTypeRecords(
273 GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
274 ArrayRef<GloballyHashedType> Hashes,
275 std::optional<PCHMergerInfo> &PCHInfo) {
276 DestGlobalTypeStream = &Dest;
277 UseGlobalHashes = true;
278 GlobalHashes = Hashes;
279 auto Err = doit(Types);
280 PCHInfo = this->PCHInfo;
281 return Err;
282}
283
284Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
285 ArrayRef<TypeIndex> TypeSourceToDest,
286 const CVTypeArray &Ids,
287 ArrayRef<GloballyHashedType> Hashes) {
288 DestGlobalIdStream = &Dest;
289 TypeLookup = TypeSourceToDest;
290 UseGlobalHashes = true;
291 GlobalHashes = Hashes;
292
293 return doit(Types: Ids);
294}
295
296Error TypeStreamMerger::mergeTypesAndIds(
297 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
298 const CVTypeArray &IdsAndTypes, ArrayRef<GloballyHashedType> Hashes,
299 std::optional<PCHMergerInfo> &PCHInfo) {
300 DestGlobalIdStream = &DestIds;
301 DestGlobalTypeStream = &DestTypes;
302 UseGlobalHashes = true;
303 GlobalHashes = Hashes;
304 auto Err = doit(Types: IdsAndTypes);
305 PCHInfo = this->PCHInfo;
306 return Err;
307}
308
309Error TypeStreamMerger::doit(const CVTypeArray &Types) {
310 if (auto EC = remapAllTypes(Types))
311 return EC;
312
313 // If we found bad indices but no other errors, try doing another pass and see
314 // if we can resolve the indices that weren't in the map on the first pass.
315 // This may require multiple passes, but we should always make progress. MASM
316 // is the only known CodeView producer that makes type streams that aren't
317 // topologically sorted. The standard library contains MASM-produced objects,
318 // so this is important to handle correctly, but we don't have to be too
319 // efficient. MASM type streams are usually very small.
320 while (!LastError && NumBadIndices > 0) {
321 unsigned BadIndicesRemaining = NumBadIndices;
322 IsSecondPass = true;
323 NumBadIndices = 0;
324 CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
325
326 if (auto EC = remapAllTypes(Types))
327 return EC;
328
329 assert(NumBadIndices <= BadIndicesRemaining &&
330 "second pass found more bad indices");
331 if (!LastError && NumBadIndices == BadIndicesRemaining) {
332 return llvm::make_error<CodeViewError>(
333 Args: cv_error_code::corrupt_record, Args: "Input type graph contains cycles");
334 }
335 }
336
337 if (LastError)
338 return std::move(*LastError);
339 return Error::success();
340}
341
342Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
343 BinaryStreamRef Stream = Types.getUnderlyingStream();
344 ArrayRef<uint8_t> Buffer;
345 cantFail(Err: Stream.readBytes(Offset: 0, Size: Stream.getLength(), Buffer));
346
347 return forEachCodeViewRecord<CVType>(
348 StreamBuffer: Buffer, F: [this](const CVType &T) { return remapType(Type: T); });
349}
350
351Error TypeStreamMerger::remapType(const CVType &Type) {
352 auto R = shouldRemapType(Type);
353 if (!R)
354 return R.takeError();
355
356 TypeIndex DestIdx = Untranslated;
357 if (*R) {
358 auto DoSerialize =
359 [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
360 return remapIndices(OriginalType: Type, Storage);
361 };
362 unsigned AlignedSize = alignTo(Value: Type.RecordData.size(), Align: 4);
363
364 if (LLVM_LIKELY(UseGlobalHashes)) {
365 GlobalTypeTableBuilder &Dest =
366 isIdRecord(K: Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
367 GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
368 DestIdx = Dest.insertRecordAs(Hash: H, RecordSize: AlignedSize, Create: DoSerialize);
369 } else {
370 MergingTypeTableBuilder &Dest =
371 isIdRecord(K: Type.kind()) ? *DestIdStream : *DestTypeStream;
372
373 RemapStorage.resize(N: AlignedSize);
374 ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
375 if (!Result.empty())
376 DestIdx = Dest.insertRecordBytes(Record&: Result);
377 }
378 }
379 addMapping(Idx: DestIdx);
380
381 ++CurIndex;
382 assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
383 "visitKnownRecord should add one index map entry");
384 return Error::success();
385}
386
387ArrayRef<uint8_t>
388TypeStreamMerger::remapIndices(const CVType &OriginalType,
389 MutableArrayRef<uint8_t> Storage) {
390 unsigned Align = OriginalType.RecordData.size() & 3;
391 assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) &&
392 "The storage buffer size is not a multiple of 4 bytes which will "
393 "cause misalignment in the output TPI stream!");
394
395 SmallVector<TiReference, 4> Refs;
396 discoverTypeIndices(RecordData: OriginalType.RecordData, Refs);
397 if (Refs.empty() && Align == 0)
398 return OriginalType.RecordData;
399
400 ::memcpy(dest: Storage.data(), src: OriginalType.RecordData.data(),
401 n: OriginalType.RecordData.size());
402
403 uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
404
405 for (auto &Ref : Refs) {
406 TypeIndex *DestTIs =
407 reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
408
409 for (size_t I = 0; I < Ref.Count; ++I) {
410 TypeIndex &TI = DestTIs[I];
411 bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(Idx&: TI)
412 : remapTypeIndex(Idx&: TI);
413 if (LLVM_UNLIKELY(!Success))
414 return {};
415 }
416 }
417
418 if (Align > 0) {
419 RecordPrefix *StorageHeader =
420 reinterpret_cast<RecordPrefix *>(Storage.data());
421 StorageHeader->RecordLen += 4 - Align;
422
423 DestContent = Storage.data() + OriginalType.RecordData.size();
424 for (; Align < 4; ++Align)
425 *DestContent++ = LF_PAD4 - Align;
426 }
427 return Storage;
428}
429
430Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest,
431 SmallVectorImpl<TypeIndex> &SourceToDest,
432 const CVTypeArray &Types) {
433 TypeStreamMerger M(SourceToDest);
434 return M.mergeTypeRecords(Dest, Types);
435}
436
437Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
438 ArrayRef<TypeIndex> TypeSourceToDest,
439 SmallVectorImpl<TypeIndex> &SourceToDest,
440 const CVTypeArray &Ids) {
441 TypeStreamMerger M(SourceToDest);
442 return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
443}
444
445Error llvm::codeview::mergeTypeAndIdRecords(
446 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
447 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
448 std::optional<PCHMergerInfo> &PCHInfo) {
449 TypeStreamMerger M(SourceToDest);
450 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHInfo);
451}
452
453Error llvm::codeview::mergeTypeAndIdRecords(
454 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
455 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
456 ArrayRef<GloballyHashedType> Hashes,
457 std::optional<PCHMergerInfo> &PCHInfo) {
458 TypeStreamMerger M(SourceToDest);
459 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHInfo);
460}
461
462Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
463 SmallVectorImpl<TypeIndex> &SourceToDest,
464 const CVTypeArray &Types,
465 ArrayRef<GloballyHashedType> Hashes,
466 std::optional<PCHMergerInfo> &PCHInfo) {
467 TypeStreamMerger M(SourceToDest);
468 return M.mergeTypeRecords(Dest, Types, Hashes, PCHInfo);
469}
470
471Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
472 ArrayRef<TypeIndex> Types,
473 SmallVectorImpl<TypeIndex> &SourceToDest,
474 const CVTypeArray &Ids,
475 ArrayRef<GloballyHashedType> Hashes) {
476 TypeStreamMerger M(SourceToDest);
477 return M.mergeIdRecords(Dest, TypeSourceToDest: Types, Ids, Hashes);
478}
479
480Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
481 // For object files containing precompiled types, we need to extract the
482 // signature, through EndPrecompRecord. This is done here for performance
483 // reasons, to avoid re-parsing the Types stream.
484 if (Type.kind() == LF_ENDPRECOMP) {
485 EndPrecompRecord EP;
486 if (auto EC = TypeDeserializer::deserializeAs(CVT&: const_cast<CVType &>(Type),
487 Record&: EP))
488 return joinErrors(E1: std::move(EC), E2: errorCorruptRecord());
489 // Only one record of this kind can appear in a OBJ.
490 if (PCHInfo)
491 return errorCorruptRecord();
492 PCHInfo.emplace(args: PCHMergerInfo{.PCHSignature: EP.getSignature(), .EndPrecompIndex: CurIndex.toArrayIndex()});
493 return false;
494 }
495 return true;
496}
497