1 | //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" |
10 | #include "llvm/ADT/SmallVector.h" |
11 | #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" |
12 | #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" |
13 | #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" |
14 | #include "llvm/DebugInfo/CodeView/TypeIndex.h" |
15 | #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" |
16 | #include "llvm/DebugInfo/CodeView/TypeRecord.h" |
17 | #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include <optional> |
20 | |
21 | using namespace llvm; |
22 | using namespace llvm::codeview; |
23 | |
24 | static inline size_t slotForIndex(TypeIndex Idx) { |
25 | assert(!Idx.isSimple() && "simple type indices have no slots" ); |
26 | return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex; |
27 | } |
28 | |
29 | namespace { |
30 | |
31 | /// Implementation of CodeView type stream merging. |
32 | /// |
33 | /// A CodeView type stream is a series of records that reference each other |
34 | /// through type indices. A type index is either "simple", meaning it is less |
35 | /// than 0x1000 and refers to a builtin type, or it is complex, meaning it |
36 | /// refers to a prior type record in the current stream. The type index of a |
37 | /// record is equal to the number of records before it in the stream plus |
38 | /// 0x1000. |
39 | /// |
40 | /// Type records are only allowed to use type indices smaller than their own, so |
41 | /// a type stream is effectively a topologically sorted DAG. Cycles occurring in |
42 | /// the type graph of the source program are resolved with forward declarations |
43 | /// of composite types. This class implements the following type stream merging |
44 | /// algorithm, which relies on this DAG structure: |
45 | /// |
46 | /// - Begin with a new empty stream, and a new empty hash table that maps from |
47 | /// type record contents to new type index. |
48 | /// - For each new type stream, maintain a map from source type index to |
49 | /// destination type index. |
50 | /// - For each record, copy it and rewrite its type indices to be valid in the |
51 | /// destination type stream. |
52 | /// - If the new type record is not already present in the destination stream |
53 | /// hash table, append it to the destination type stream, assign it the next |
54 | /// type index, and update the two hash tables. |
55 | /// - If the type record already exists in the destination stream, discard it |
56 | /// and update the type index map to forward the source type index to the |
57 | /// existing destination type index. |
58 | /// |
59 | /// As an additional complication, type stream merging actually produces two |
60 | /// streams: an item (or IPI) stream and a type stream, as this is what is |
61 | /// actually stored in the final PDB. We choose which records go where by |
62 | /// looking at the record kind. |
63 | class TypeStreamMerger { |
64 | public: |
65 | explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest) |
66 | : IndexMap(SourceToDest) { |
67 | // When dealing with precompiled headers objects, all data in SourceToDest |
68 | // belongs to the precompiled headers object, and is assumed to be already |
69 | // remapped to the target PDB. Any forthcoming type that will be merged in |
70 | // might potentially back-reference this data. We also don't want to resolve |
71 | // twice the types in the precompiled object. |
72 | CurIndex += SourceToDest.size(); |
73 | } |
74 | |
75 | static const TypeIndex Untranslated; |
76 | |
77 | // Local hashing entry points |
78 | Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds, |
79 | MergingTypeTableBuilder &DestTypes, |
80 | const CVTypeArray &IdsAndTypes, |
81 | std::optional<PCHMergerInfo> &PCHInfo); |
82 | Error mergeIdRecords(MergingTypeTableBuilder &Dest, |
83 | ArrayRef<TypeIndex> TypeSourceToDest, |
84 | const CVTypeArray &Ids); |
85 | Error mergeTypeRecords(MergingTypeTableBuilder &Dest, |
86 | const CVTypeArray &Types); |
87 | |
88 | // Global hashing entry points |
89 | Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, |
90 | GlobalTypeTableBuilder &DestTypes, |
91 | const CVTypeArray &IdsAndTypes, |
92 | ArrayRef<GloballyHashedType> Hashes, |
93 | std::optional<PCHMergerInfo> &PCHInfo); |
94 | Error mergeIdRecords(GlobalTypeTableBuilder &Dest, |
95 | ArrayRef<TypeIndex> TypeSourceToDest, |
96 | const CVTypeArray &Ids, |
97 | ArrayRef<GloballyHashedType> Hashes); |
98 | Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, |
99 | ArrayRef<GloballyHashedType> Hashes, |
100 | std::optional<PCHMergerInfo> &PCHInfo); |
101 | |
102 | private: |
103 | Error doit(const CVTypeArray &Types); |
104 | |
105 | Error remapAllTypes(const CVTypeArray &Types); |
106 | |
107 | Error remapType(const CVType &Type); |
108 | |
109 | void addMapping(TypeIndex Idx); |
110 | |
111 | inline bool remapTypeIndex(TypeIndex &Idx) { |
112 | // If we're mapping a pure index stream, then IndexMap only contains |
113 | // mappings from OldIdStream -> NewIdStream, in which case we will need to |
114 | // use the special mapping from OldTypeStream -> NewTypeStream which was |
115 | // computed externally. Regardless, we use this special map if and only if |
116 | // we are doing an id-only mapping. |
117 | if (!hasTypeStream()) |
118 | return remapIndex(Idx, Map: TypeLookup); |
119 | |
120 | assert(TypeLookup.empty()); |
121 | return remapIndex(Idx, Map: IndexMap); |
122 | } |
123 | inline bool remapItemIndex(TypeIndex &Idx) { |
124 | assert(hasIdStream()); |
125 | return remapIndex(Idx, Map: IndexMap); |
126 | } |
127 | |
128 | bool hasTypeStream() const { |
129 | return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream); |
130 | } |
131 | |
132 | bool hasIdStream() const { |
133 | return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream); |
134 | } |
135 | |
136 | ArrayRef<uint8_t> remapIndices(const CVType &OriginalType, |
137 | MutableArrayRef<uint8_t> Storage); |
138 | |
139 | inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) { |
140 | if (LLVM_LIKELY(remapIndexSimple(Idx, Map))) |
141 | return true; |
142 | |
143 | return remapIndexFallback(Idx, Map); |
144 | } |
145 | |
146 | inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const { |
147 | // Simple types are unchanged. |
148 | if (Idx.isSimple()) |
149 | return true; |
150 | |
151 | // Check if this type index refers to a record we've already translated |
152 | // successfully. If it refers to a type later in the stream or a record we |
153 | // had to defer, defer it until later pass. |
154 | unsigned MapPos = slotForIndex(Idx); |
155 | if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated)) |
156 | return false; |
157 | |
158 | Idx = Map[MapPos]; |
159 | return true; |
160 | } |
161 | |
162 | bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map); |
163 | |
164 | Error errorCorruptRecord() const { |
165 | return llvm::make_error<CodeViewError>(Args: cv_error_code::corrupt_record); |
166 | } |
167 | |
168 | Expected<bool> shouldRemapType(const CVType &Type); |
169 | |
170 | std::optional<Error> LastError; |
171 | |
172 | bool UseGlobalHashes = false; |
173 | |
174 | bool IsSecondPass = false; |
175 | |
176 | unsigned NumBadIndices = 0; |
177 | |
178 | TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; |
179 | |
180 | MergingTypeTableBuilder *DestIdStream = nullptr; |
181 | MergingTypeTableBuilder *DestTypeStream = nullptr; |
182 | |
183 | GlobalTypeTableBuilder *DestGlobalIdStream = nullptr; |
184 | GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr; |
185 | |
186 | ArrayRef<GloballyHashedType> GlobalHashes; |
187 | |
188 | // If we're only mapping id records, this array contains the mapping for |
189 | // type records. |
190 | ArrayRef<TypeIndex> TypeLookup; |
191 | |
192 | /// Map from source type index to destination type index. Indexed by source |
193 | /// type index minus 0x1000. |
194 | SmallVectorImpl<TypeIndex> &IndexMap; |
195 | |
196 | /// Temporary storage that we use to copy a record's data while re-writing |
197 | /// its type indices. |
198 | SmallVector<uint8_t, 256> RemapStorage; |
199 | |
200 | std::optional<PCHMergerInfo> PCHInfo; |
201 | }; |
202 | |
203 | } // end anonymous namespace |
204 | |
205 | const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); |
206 | |
207 | void TypeStreamMerger::addMapping(TypeIndex Idx) { |
208 | if (!IsSecondPass) { |
209 | assert(IndexMap.size() == slotForIndex(CurIndex) && |
210 | "visitKnownRecord should add one index map entry" ); |
211 | IndexMap.push_back(Elt: Idx); |
212 | } else { |
213 | assert(slotForIndex(CurIndex) < IndexMap.size()); |
214 | IndexMap[slotForIndex(Idx: CurIndex)] = Idx; |
215 | } |
216 | } |
217 | |
218 | bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx, |
219 | ArrayRef<TypeIndex> Map) { |
220 | size_t MapPos = slotForIndex(Idx); |
221 | |
222 | // If this is the second pass and this index isn't in the map, then it points |
223 | // outside the current type stream, and this is a corrupt record. |
224 | if (IsSecondPass && MapPos >= Map.size()) { |
225 | // FIXME: Print a more useful error. We can give the current record and the |
226 | // index that we think its pointing to. |
227 | if (LastError) |
228 | LastError = joinErrors(E1: std::move(*LastError), E2: errorCorruptRecord()); |
229 | else |
230 | LastError = errorCorruptRecord(); |
231 | } |
232 | |
233 | ++NumBadIndices; |
234 | |
235 | // This type index is invalid. Remap this to "not translated by cvpack", |
236 | // and return failure. |
237 | Idx = Untranslated; |
238 | return false; |
239 | } |
240 | |
241 | // Local hashing entry points |
242 | Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest, |
243 | const CVTypeArray &Types) { |
244 | DestTypeStream = &Dest; |
245 | UseGlobalHashes = false; |
246 | |
247 | return doit(Types); |
248 | } |
249 | |
250 | Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest, |
251 | ArrayRef<TypeIndex> TypeSourceToDest, |
252 | const CVTypeArray &Ids) { |
253 | DestIdStream = &Dest; |
254 | TypeLookup = TypeSourceToDest; |
255 | UseGlobalHashes = false; |
256 | |
257 | return doit(Types: Ids); |
258 | } |
259 | |
260 | Error TypeStreamMerger::mergeTypesAndIds( |
261 | MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, |
262 | const CVTypeArray &IdsAndTypes, std::optional<PCHMergerInfo> &PCHInfo) { |
263 | DestIdStream = &DestIds; |
264 | DestTypeStream = &DestTypes; |
265 | UseGlobalHashes = false; |
266 | auto Err = doit(Types: IdsAndTypes); |
267 | PCHInfo = this->PCHInfo; |
268 | return Err; |
269 | } |
270 | |
271 | // Global hashing entry points |
272 | Error TypeStreamMerger::mergeTypeRecords( |
273 | GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, |
274 | ArrayRef<GloballyHashedType> Hashes, |
275 | std::optional<PCHMergerInfo> &PCHInfo) { |
276 | DestGlobalTypeStream = &Dest; |
277 | UseGlobalHashes = true; |
278 | GlobalHashes = Hashes; |
279 | auto Err = doit(Types); |
280 | PCHInfo = this->PCHInfo; |
281 | return Err; |
282 | } |
283 | |
284 | Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest, |
285 | ArrayRef<TypeIndex> TypeSourceToDest, |
286 | const CVTypeArray &Ids, |
287 | ArrayRef<GloballyHashedType> Hashes) { |
288 | DestGlobalIdStream = &Dest; |
289 | TypeLookup = TypeSourceToDest; |
290 | UseGlobalHashes = true; |
291 | GlobalHashes = Hashes; |
292 | |
293 | return doit(Types: Ids); |
294 | } |
295 | |
296 | Error TypeStreamMerger::mergeTypesAndIds( |
297 | GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, |
298 | const CVTypeArray &IdsAndTypes, ArrayRef<GloballyHashedType> Hashes, |
299 | std::optional<PCHMergerInfo> &PCHInfo) { |
300 | DestGlobalIdStream = &DestIds; |
301 | DestGlobalTypeStream = &DestTypes; |
302 | UseGlobalHashes = true; |
303 | GlobalHashes = Hashes; |
304 | auto Err = doit(Types: IdsAndTypes); |
305 | PCHInfo = this->PCHInfo; |
306 | return Err; |
307 | } |
308 | |
309 | Error TypeStreamMerger::(const CVTypeArray &Types) { |
310 | if (auto EC = remapAllTypes(Types)) |
311 | return EC; |
312 | |
313 | // If we found bad indices but no other errors, try doing another pass and see |
314 | // if we can resolve the indices that weren't in the map on the first pass. |
315 | // This may require multiple passes, but we should always make progress. MASM |
316 | // is the only known CodeView producer that makes type streams that aren't |
317 | // topologically sorted. The standard library contains MASM-produced objects, |
318 | // so this is important to handle correctly, but we don't have to be too |
319 | // efficient. MASM type streams are usually very small. |
320 | while (!LastError && NumBadIndices > 0) { |
321 | unsigned BadIndicesRemaining = NumBadIndices; |
322 | IsSecondPass = true; |
323 | NumBadIndices = 0; |
324 | CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); |
325 | |
326 | if (auto EC = remapAllTypes(Types)) |
327 | return EC; |
328 | |
329 | assert(NumBadIndices <= BadIndicesRemaining && |
330 | "second pass found more bad indices" ); |
331 | if (!LastError && NumBadIndices == BadIndicesRemaining) { |
332 | return llvm::make_error<CodeViewError>( |
333 | Args: cv_error_code::corrupt_record, Args: "Input type graph contains cycles" ); |
334 | } |
335 | } |
336 | |
337 | if (LastError) |
338 | return std::move(*LastError); |
339 | return Error::success(); |
340 | } |
341 | |
342 | Error TypeStreamMerger::(const CVTypeArray &Types) { |
343 | BinaryStreamRef Stream = Types.getUnderlyingStream(); |
344 | ArrayRef<uint8_t> Buffer; |
345 | cantFail(Err: Stream.readBytes(Offset: 0, Size: Stream.getLength(), Buffer)); |
346 | |
347 | return forEachCodeViewRecord<CVType>( |
348 | StreamBuffer: Buffer, F: [this](const CVType &T) { return remapType(Type: T); }); |
349 | } |
350 | |
351 | Error TypeStreamMerger::remapType(const CVType &Type) { |
352 | auto R = shouldRemapType(Type); |
353 | if (!R) |
354 | return R.takeError(); |
355 | |
356 | TypeIndex DestIdx = Untranslated; |
357 | if (*R) { |
358 | auto DoSerialize = |
359 | [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> { |
360 | return remapIndices(OriginalType: Type, Storage); |
361 | }; |
362 | unsigned AlignedSize = alignTo(Value: Type.RecordData.size(), Align: 4); |
363 | |
364 | if (LLVM_LIKELY(UseGlobalHashes)) { |
365 | GlobalTypeTableBuilder &Dest = |
366 | isIdRecord(K: Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; |
367 | GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; |
368 | DestIdx = Dest.insertRecordAs(Hash: H, RecordSize: AlignedSize, Create: DoSerialize); |
369 | } else { |
370 | MergingTypeTableBuilder &Dest = |
371 | isIdRecord(K: Type.kind()) ? *DestIdStream : *DestTypeStream; |
372 | |
373 | RemapStorage.resize(N: AlignedSize); |
374 | ArrayRef<uint8_t> Result = DoSerialize(RemapStorage); |
375 | if (!Result.empty()) |
376 | DestIdx = Dest.insertRecordBytes(Record&: Result); |
377 | } |
378 | } |
379 | addMapping(Idx: DestIdx); |
380 | |
381 | ++CurIndex; |
382 | assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && |
383 | "visitKnownRecord should add one index map entry" ); |
384 | return Error::success(); |
385 | } |
386 | |
387 | ArrayRef<uint8_t> |
388 | TypeStreamMerger::remapIndices(const CVType &OriginalType, |
389 | MutableArrayRef<uint8_t> Storage) { |
390 | unsigned Align = OriginalType.RecordData.size() & 3; |
391 | assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) && |
392 | "The storage buffer size is not a multiple of 4 bytes which will " |
393 | "cause misalignment in the output TPI stream!" ); |
394 | |
395 | SmallVector<TiReference, 4> Refs; |
396 | discoverTypeIndices(RecordData: OriginalType.RecordData, Refs); |
397 | if (Refs.empty() && Align == 0) |
398 | return OriginalType.RecordData; |
399 | |
400 | ::memcpy(dest: Storage.data(), src: OriginalType.RecordData.data(), |
401 | n: OriginalType.RecordData.size()); |
402 | |
403 | uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix); |
404 | |
405 | for (auto &Ref : Refs) { |
406 | TypeIndex *DestTIs = |
407 | reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset); |
408 | |
409 | for (size_t I = 0; I < Ref.Count; ++I) { |
410 | TypeIndex &TI = DestTIs[I]; |
411 | bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(Idx&: TI) |
412 | : remapTypeIndex(Idx&: TI); |
413 | if (LLVM_UNLIKELY(!Success)) |
414 | return {}; |
415 | } |
416 | } |
417 | |
418 | if (Align > 0) { |
419 | RecordPrefix * = |
420 | reinterpret_cast<RecordPrefix *>(Storage.data()); |
421 | StorageHeader->RecordLen += 4 - Align; |
422 | |
423 | DestContent = Storage.data() + OriginalType.RecordData.size(); |
424 | for (; Align < 4; ++Align) |
425 | *DestContent++ = LF_PAD4 - Align; |
426 | } |
427 | return Storage; |
428 | } |
429 | |
430 | Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest, |
431 | SmallVectorImpl<TypeIndex> &SourceToDest, |
432 | const CVTypeArray &Types) { |
433 | TypeStreamMerger M(SourceToDest); |
434 | return M.mergeTypeRecords(Dest, Types); |
435 | } |
436 | |
437 | Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest, |
438 | ArrayRef<TypeIndex> TypeSourceToDest, |
439 | SmallVectorImpl<TypeIndex> &SourceToDest, |
440 | const CVTypeArray &Ids) { |
441 | TypeStreamMerger M(SourceToDest); |
442 | return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); |
443 | } |
444 | |
445 | Error llvm::codeview::mergeTypeAndIdRecords( |
446 | MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, |
447 | SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, |
448 | std::optional<PCHMergerInfo> &PCHInfo) { |
449 | TypeStreamMerger M(SourceToDest); |
450 | return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHInfo); |
451 | } |
452 | |
453 | Error llvm::codeview::mergeTypeAndIdRecords( |
454 | GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, |
455 | SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, |
456 | ArrayRef<GloballyHashedType> Hashes, |
457 | std::optional<PCHMergerInfo> &PCHInfo) { |
458 | TypeStreamMerger M(SourceToDest); |
459 | return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHInfo); |
460 | } |
461 | |
462 | Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest, |
463 | SmallVectorImpl<TypeIndex> &SourceToDest, |
464 | const CVTypeArray &Types, |
465 | ArrayRef<GloballyHashedType> Hashes, |
466 | std::optional<PCHMergerInfo> &PCHInfo) { |
467 | TypeStreamMerger M(SourceToDest); |
468 | return M.mergeTypeRecords(Dest, Types, Hashes, PCHInfo); |
469 | } |
470 | |
471 | Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest, |
472 | ArrayRef<TypeIndex> Types, |
473 | SmallVectorImpl<TypeIndex> &SourceToDest, |
474 | const CVTypeArray &Ids, |
475 | ArrayRef<GloballyHashedType> Hashes) { |
476 | TypeStreamMerger M(SourceToDest); |
477 | return M.mergeIdRecords(Dest, TypeSourceToDest: Types, Ids, Hashes); |
478 | } |
479 | |
480 | Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) { |
481 | // For object files containing precompiled types, we need to extract the |
482 | // signature, through EndPrecompRecord. This is done here for performance |
483 | // reasons, to avoid re-parsing the Types stream. |
484 | if (Type.kind() == LF_ENDPRECOMP) { |
485 | EndPrecompRecord EP; |
486 | if (auto EC = TypeDeserializer::deserializeAs(CVT&: const_cast<CVType &>(Type), |
487 | Record&: EP)) |
488 | return joinErrors(E1: std::move(EC), E2: errorCorruptRecord()); |
489 | // Only one record of this kind can appear in a OBJ. |
490 | if (PCHInfo) |
491 | return errorCorruptRecord(); |
492 | PCHInfo.emplace(args: PCHMergerInfo{.PCHSignature: EP.getSignature(), .EndPrecompIndex: CurIndex.toArrayIndex()}); |
493 | return false; |
494 | } |
495 | return true; |
496 | } |
497 | |