DebugTypes.cpp source code [llvm_projects/lld/COFF/DebugTypes.cpp]

1	//===- DebugTypes.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "DebugTypes.h"
10	#include "COFFLinkerContext.h"
11	#include "Chunks.h"
12	#include "Driver.h"
13	#include "InputFiles.h"
14	#include "PDB.h"
15	#include "TypeMerger.h"
16	#include "lld/Common/ErrorHandler.h"
17	#include "lld/Common/Memory.h"
18	#include "llvm/ADT/StringExtras.h"
19	#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
20	#include "llvm/DebugInfo/CodeView/TypeRecord.h"
21	#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
22	#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
23	#include "llvm/DebugInfo/PDB/GenericError.h"
24	#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
25	#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26	#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27	#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
28	#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
29	#include "llvm/Support/FormatVariadic.h"
30	#include "llvm/Support/Parallel.h"
31	#include "llvm/Support/Path.h"
32	#include "llvm/Support/TimeProfiler.h"
33
34	using namespace llvm;
35	using namespace llvm::codeview;
36	using namespace lld;
37	using namespace lld::coff;
38
39	namespace {
40	class TypeServerIpiSource;
41
42	// The TypeServerSource class represents a PDB type server, a file referenced by
43	// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
44	// files, therefore there must be only once instance per OBJ lot. The file path
45	// is discovered from the dependent OBJ's debug type stream. The
46	// TypeServerSource object is then queued and loaded by the COFF Driver. The
47	// debug type stream for such PDB files will be merged first in the final PDB,
48	// before any dependent OBJ.
49	class TypeServerSource : public TpiSource {
50	public:
51	explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
52	: TpiSource (ctx, PDB, nullptr), pdbInputFile(f) {
53	if (f->loadErrorStr)
54	return;
55	pdb::PDBFile &file = f->session ->getPDBFile();
56	auto expectedInfo = file.getPDBInfoStream();
57	if (!expectedInfo)
58	return;
59	Guid = expectedInfo ->getGuid();
60	auto it = ctx.typeServerSourceMappings.emplace(args&: Guid, args: this);
61	if (!it.second) {
62	// If we hit here we have collision on Guid's in two PDB files.
63	// This can happen if the PDB Guid is invalid or if we are really
64	// unlucky. This should fall back on stright file-system lookup.
65	it.first ->second = nullptr;
66	}
67	}
68
69	Error mergeDebugT(TypeMerger *m) override;
70
71	void loadGHashes() override;
72	void remapTpiWithGHashes(GHashState *g) override;
73
74	bool isDependency() const override { return true; }
75
76	PDBInputFile pdbInputFile = nullptr*;
77
78	// TpiSource for IPI stream.
79	TypeServerIpiSource ipiSrc = nullptr*;
80
81	// The PDB signature GUID.
82	codeview::GUID Guid;
83	};
84
85	// Companion to TypeServerSource. Stores the index map for the IPI stream in the
86	// PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
87	// invariant of one type index space per source.
88	class TypeServerIpiSource : public TpiSource {
89	public:
90	explicit TypeServerIpiSource(COFFLinkerContext &ctx)
91	: TpiSource (ctx, PDBIpi, nullptr) {}
92
93	friend class TypeServerSource;
94
95	// All of the TpiSource methods are no-ops. The parent TypeServerSource
96	// handles both TPI and IPI.
97	Error mergeDebugT(TypeMerger m) override { return* Error::success(); }
98	void loadGHashes() override {}
99	void remapTpiWithGHashes(GHashState *g) override {}
100	bool isDependency() const override { return true; }
101	};
102
103	// This class represents the debug type stream of an OBJ file that depends on a
104	// PDB type server (see TypeServerSource).
105	class UseTypeServerSource : public TpiSource {
106	Expected<TypeServerSource *> getTypeServerSource();
107
108	public:
109	UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
110	: TpiSource (ctx, UsingPDB, f), typeServerDependency (ts) {}
111
112	Error mergeDebugT(TypeMerger *m) override;
113
114	// No need to load ghashes from /Zi objects.
115	void loadGHashes() override {}
116	void remapTpiWithGHashes(GHashState *g) override;
117
118	// Information about the PDB type server dependency, that needs to be loaded
119	// in before merging this OBJ.
120	TypeServer2Record typeServerDependency;
121	};
122
123	// This class represents the debug type stream of a Microsoft precompiled
124	// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
125	// PDB, before any other OBJs that depend on this. Note that only MSVC generate
126	// such files, clang does not.
127	class PrecompSource : public TpiSource {
128	public:
129	PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource (ctx, PCH, f) {
130	// If the S_OBJNAME record contains the PCH signature, we'll register this
131	// source file right away.
132	registerMapping();
133	}
134
135	Error mergeDebugT(TypeMerger *m) override;
136
137	void loadGHashes() override;
138
139	bool isDependency() const override { return true; }
140
141	private:
142	void registerMapping();
143
144	// Whether this precomp OBJ was recorded in the precompSourceMappings map.
145	// Only happens if the file->pchSignature is valid.
146	bool registered = false;
147	};
148
149	// This class represents the debug type stream of an OBJ file that depends on a
150	// Microsoft precompiled headers OBJ (see PrecompSource).
151	class UsePrecompSource : public TpiSource {
152	public:
153	UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
154	: TpiSource (ctx, UsingPCH, f), precompDependency (precomp) {}
155
156	Error mergeDebugT(TypeMerger *m) override;
157
158	void loadGHashes() override;
159	void remapTpiWithGHashes(GHashState *g) override;
160
161	private:
162	Error mergeInPrecompHeaderObj();
163
164	PrecompSource *findObjByName(StringRef fileNameOnly);
165	PrecompSource findPrecompSource(ObjFile file, PrecompRecord &pr);
166	Expected<PrecompSource > findPrecompMap(ObjFile file, PrecompRecord &pr);
167
168	public:
169	// Information about the Precomp OBJ dependency, that needs to be loaded in
170	// before merging this OBJ.
171	PrecompRecord precompDependency;
172	};
173	} // namespace
174
175	TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
176	: ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
177	ctx.addTpiSource(tpi: this);
178	}
179
180	// Vtable key method.
181	TpiSource::~TpiSource() {
182	// Silence any assertions about unchecked errors.
183	consumeError(Err: std::move(typeMergingError));
184	}
185
186	TpiSource lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile file) {
187	return make<TpiSource>(args&: ctx, args: TpiSource::Regular, args&: file);
188	}
189
190	TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
191	PDBInputFile *pdbInputFile) {
192	// Type server sources come in pairs: the TPI stream, and the IPI stream.
193	auto *tpiSource = make<TypeServerSource>(args&: ctx, args&: pdbInputFile);
194	if (pdbInputFile->session ->getPDBFile().hasPDBIpiStream())
195	tpiSource->ipiSrc = make<TypeServerIpiSource>(args&: ctx);
196	return tpiSource;
197	}
198
199	TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
200	ObjFile *file,
201	TypeServer2Record ts) {
202	return make<UseTypeServerSource>(args&: ctx, args&: file, args&: ts);
203	}
204
205	TpiSource lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile file) {
206	return make<PrecompSource>(args&: ctx, args&: file);
207	}
208
209	TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
210	ObjFile *file,
211	PrecompRecord precomp) {
212	return make<UsePrecompSource>(args&: ctx, args&: file, args&: precomp);
213	}
214
215	bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
216	if (ti.isSimple())
217	return true;
218
219	// This can be an item index or a type index. Choose the appropriate map.
220	ArrayRef<TypeIndex> tpiOrIpiMap =
221	(refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
222	if (ti.toArrayIndex() >= tpiOrIpiMap.size())
223	return false;
224	ti = tpiOrIpiMap [ti.toArrayIndex()];
225	return true;
226	}
227
228	void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
229	ArrayRef<TiReference> typeRefs) {
230	MutableArrayRef<uint8_t> contents = rec.drop_front(N: sizeof(RecordPrefix));
231	for (const TiReference &ref : typeRefs) {
232	unsigned byteSize = ref.Count * sizeof(TypeIndex);
233	if (contents.size() < ref.Offset + byteSize)
234	fatal(msg: "symbol record too short");
235
236	MutableArrayRef<TypeIndex> indices(
237	reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
238	for (TypeIndex &ti : indices) {
239	if (!remapTypeIndex(ti, refKind: ref.Kind)) {
240	if (ctx.config.verbose) {
241	uint16_t kind =
242	reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
243	StringRef fname = file ? file->getName() : "<unknown PDB>";
244	log(msg: "failed to remap type index in record of kind 0x" +
245	utohexstr(X: kind) + " in " + fname + " with bad " +
246	(ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
247	" index 0x" + utohexstr(X: ti.getIndex()));
248	}
249	ti = TypeIndex (SimpleTypeKind::NotTranslated);
250	continue;
251	}
252	}
253	}
254	}
255
256	void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
257	// TODO: Handle errors similar to symbols.
258	SmallVector<TiReference, `32`> typeRefs;
259	discoverTypeIndices(Type: CVType (rec), Refs&: typeRefs);
260	remapRecord(rec, typeRefs);
261	}
262
263	bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
264	// Discover type index references in the record. Skip it if we don't
265	// know where they are.
266	SmallVector<TiReference, `32`> typeRefs;
267	if (!discoverTypeIndicesInSymbol(RecordData: rec, Refs&: typeRefs))
268	return false;
269	remapRecord(rec, typeRefs);
270	return true;
271	}
272
273	// A COFF .debug$H section is currently a clang extension. This function checks
274	// if a .debug$H section is in a format that we expect / understand, so that we
275	// can ignore any sections which are coincidentally also named .debug$H but do
276	// not contain a format we recognize.
277	static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
278	if (debugH.size() < sizeof(object::debug_h_header))
279	return false;
280	auto *header =
281	reinterpret_cast<const object::debug_h_header *>(debugH.data());
282	debugH = debugH.drop_front(N: sizeof(object::debug_h_header));
283	return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
284	header->Version == `0` &&
285	header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
286	(debugH.size() % `8` == `0`);
287	}
288
289	static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
290	SectionChunk *sec =
291	SectionChunk::findByName(sections: file->getDebugChunks(), name: ".debug$H");
292	if (!sec)
293	return std::nullopt;
294	ArrayRef<uint8_t> contents = sec->getContents();
295	if (!canUseDebugH(debugH: contents))
296	return std::nullopt;
297	return contents;
298	}
299
300	static ArrayRef<GloballyHashedType>
301	getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
302	assert(canUseDebugH(debugH));
303	debugH = debugH.drop_front(N: sizeof(object::debug_h_header));
304	uint32_t count = debugH.size() / sizeof(GloballyHashedType);
305	return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
306	}
307
308	// Merge .debug$T for a generic object file.
309	Error TpiSource::mergeDebugT(TypeMerger *m) {
310	assert(!ctx.config.debugGHashes &&
311	"use remapTpiWithGHashes when ghash is enabled");
312
313	CVTypeArray types;
314	BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
315	cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
316
317	// When dealing with PCH.OBJ, some indices were already merged.
318	unsigned nbHeadIndices = indexMapStorage.size();
319
320	std::optional<PCHMergerInfo> pchInfo;
321	if (auto err = mergeTypeAndIdRecords(DestIds&: m->idTable, DestTypes&: m->typeTable,
322	SourceToDest&: indexMapStorage, IdsAndTypes: types, PCHInfo&: pchInfo))
323	fatal(msg: "codeview::mergeTypeAndIdRecords failed: " +
324	toString(E: std::move(err)));
325	if (pchInfo) {
326	file->pchSignature = pchInfo ->PCHSignature;
327	endPrecompIdx = pchInfo ->EndPrecompIndex;
328	}
329
330	// In an object, there is only one mapping for both types and items.
331	tpiMap = indexMapStorage;
332	ipiMap = indexMapStorage;
333
334	if (ctx.config.showSummary) {
335	nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
336	nbTypeRecordsBytes = reader.getLength();
337	// Count how many times we saw each type record in our input. This
338	// calculation requires a second pass over the type records to classify each
339	// record as a type or index. This is slow, but this code executes when
340	// collecting statistics.
341	m->tpiCounts.resize(N: m->getTypeTable().size());
342	m->ipiCounts.resize(N: m->getIDTable().size());
343	uint32_t srcIdx = nbHeadIndices;
344	for (const CVType &ty : types) {
345	TypeIndex dstIdx = tpiMap [srcIdx++];
346	// Type merging may fail, so a complex source type may become the simple
347	// NotTranslated type, which cannot be used as an array index.
348	if (dstIdx.isSimple())
349	continue;
350	SmallVectorImpl<uint32_t> &counts =
351	isIdRecord(K: ty.kind()) ? m->ipiCounts : m->tpiCounts;
352	++counts [dstIdx.toArrayIndex()];
353	}
354	}
355
356	return Error::success();
357	}
358
359	// Merge types from a type server PDB.
360	Error TypeServerSource::mergeDebugT(TypeMerger *m) {
361	assert(!ctx.config.debugGHashes &&
362	"use remapTpiWithGHashes when ghash is enabled");
363
364	pdb::PDBFile &pdbFile = pdbInputFile->session ->getPDBFile();
365	Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
366	if (auto e = expectedTpi.takeError())
367	fatal(msg: "Type server does not have TPI stream: " + toString(E: std::move(e)));
368	pdb::TpiStream maybeIpi = nullptr*;
369	if (pdbFile.hasPDBIpiStream()) {
370	Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371	if (auto e = expectedIpi.takeError())
372	fatal(msg: "Error getting type server IPI stream: " + toString(E: std::move(e)));
373	maybeIpi = &*expectedIpi;
374	}
375
376	// Merge TPI first, because the IPI stream will reference type indices.
377	if (auto err = mergeTypeRecords(Dest&: m->typeTable, SourceToDest&: indexMapStorage,
378	Types: expectedTpi ->typeArray()))
379	fatal(msg: "codeview::mergeTypeRecords failed: " + toString(E: std::move(err)));
380	tpiMap = indexMapStorage;
381
382	// Merge IPI.
383	if (maybeIpi) {
384	if (auto err = mergeIdRecords(Dest&: m->idTable, Types: tpiMap, SourceToDest&: ipiSrc->indexMapStorage,
385	Ids: maybeIpi->typeArray()))
386	fatal(msg: "codeview::mergeIdRecords failed: " + toString(E: std::move(err)));
387	ipiMap = ipiSrc->indexMapStorage;
388	}
389
390	if (ctx.config.showSummary) {
391	nbTypeRecords = tpiMap.size() + ipiMap.size();
392	nbTypeRecordsBytes =
393	expectedTpi ->typeArray().getUnderlyingStream().getLength() +
394	(maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
395	: `0`);
396
397	// Count how many times we saw each type record in our input. If a
398	// destination type index is present in the source to destination type index
399	// map, that means we saw it once in the input. Add it to our histogram.
400	m->tpiCounts.resize(N: m->getTypeTable().size());
401	m->ipiCounts.resize(N: m->getIDTable().size());
402	for (TypeIndex ti : tpiMap)
403	if (!ti.isSimple())
404	++m->tpiCounts [ti.toArrayIndex()];
405	for (TypeIndex ti : ipiMap)
406	if (!ti.isSimple())
407	++m->ipiCounts [ti.toArrayIndex()];
408	}
409
410	return Error::success();
411	}
412
413	Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
414	const codeview::GUID &tsId = typeServerDependency.getGuid();
415	StringRef tsPath = typeServerDependency.getName();
416
417	TypeServerSource tsSrc = nullptr*;
418	auto it = ctx.typeServerSourceMappings.find(x: tsId);
419	if (it != ctx.typeServerSourceMappings.end()) {
420	tsSrc = (TypeServerSource *)it ->second;
421	}
422	if (tsSrc == nullptr) {
423	// The file failed to load, lookup by name
424	PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, path: tsPath, fromFile: file);
425	if (!pdb)
426	return createFileError(F: tsPath, E: errorCodeToError(EC: std::error_code (
427	ENOENT, std::generic_category())));
428	// If an error occurred during loading, throw it now
429	if (pdb->loadErrorStr)
430	return createFileError(
431	F: tsPath, E: make_error<StringError>(Args&: *pdb->loadErrorStr,
432	Args: llvm::inconvertibleErrorCode()));
433
434	tsSrc = (TypeServerSource *)pdb->debugTypesObj;
435
436	// Just because a file with a matching name was found and it was an actual
437	// PDB file doesn't mean it matches. For it to match the InfoStream's GUID
438	// must match the GUID specified in the TypeServer2 record.
439	if (tsSrc->Guid != tsId) {
440	return createFileError(F: tsPath,
441	E: make_error<pdb::PDBError>(
442	Args: pdb::pdb_error_code::signature_out_of_date));
443	}
444	}
445	return tsSrc;
446	}
447
448	Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
449	Expected<TypeServerSource *> tsSrc = getTypeServerSource();
450	if (!tsSrc)
451	return tsSrc.takeError();
452
453	pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session ->getPDBFile();
454	auto expectedInfo = pdbSession.getPDBInfoStream();
455	if (!expectedInfo)
456	return expectedInfo.takeError();
457
458	// Reuse the type index map of the type server.
459	tpiMap = (*tsSrc)->tpiMap;
460	ipiMap = (*tsSrc)->ipiMap;
461	return Error::success();
462	}
463
464	static bool equalsPath(StringRef path1, StringRef path2) {
465	#if defined(_WIN32)
466	return path1.equals_insensitive(path2);
467	#else
468	return path1 == path2;
469	#endif
470	}
471
472	// Find by name an OBJ provided on the command line
473	PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
474	SmallString<`128`> currentPath;
475	for (auto kv : ctx.precompSourceMappings) {
476	StringRef currentFileName = sys::path::filename(path: kv.second->file->getName(),
477	style: sys::path::Style::windows);
478
479	// Compare based solely on the file name (link.exe behavior)
480	if (equalsPath(path1: currentFileName, path2: fileNameOnly))
481	return (PrecompSource *)kv.second;
482	}
483	return nullptr;
484	}
485
486	PrecompSource UsePrecompSource::findPrecompSource(ObjFile file,
487	PrecompRecord &pr) {
488	// Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
489	// records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
490	// the paths embedded in the OBJs are in the Windows format.
491	SmallString<`128`> prFileName =
492	sys::path::filename(path: pr.getPrecompFilePath(), style: sys::path::Style::windows);
493
494	auto it = ctx.precompSourceMappings.find(x: pr.getSignature());
495	if (it != ctx.precompSourceMappings.end()) {
496	return (PrecompSource *)it ->second;
497	}
498	// Lookup by name
499	return findObjByName(fileNameOnly: prFileName);
500	}
501
502	Expected<PrecompSource > UsePrecompSource::findPrecompMap(ObjFile file,
503	PrecompRecord &pr) {
504	PrecompSource *precomp = findPrecompSource(file, pr);
505
506	if (!precomp)
507	return createFileError(
508	F: pr.getPrecompFilePath(),
509	E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch));
510
511	// Don't rely on the PCH signature to validate the concordance between the PCH
512	// and the OBJ that uses it. However we do validate here that the
513	// LF_ENDPRECOMP record index lines up with the number of type records
514	// LF_PRECOMP is expecting.
515	if (precomp->endPrecompIdx != pr.getTypesCount())
516	return createFileError(
517	F: toString(file),
518	E: make_error<pdb::PDBError>(Args: pdb::pdb_error_code::no_matching_pch));
519
520	return precomp;
521	}
522
523	/// Merges a precompiled headers TPI map into the current TPI map. The
524	/// precompiled headers object will also be loaded and remapped in the
525	/// process.
526	Error UsePrecompSource::mergeInPrecompHeaderObj() {
527	auto e = findPrecompMap(file, pr&: precompDependency);
528	if (!e)
529	return e.takeError();
530
531	PrecompSource precompSrc = e;
532	if (precompSrc->tpiMap.empty())
533	return Error::success();
534
535	assert(precompDependency.getStartTypeIndex() ==
536	TypeIndex::FirstNonSimpleIndex);
537	assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
538	// Use the previously remapped index map from the precompiled headers.
539	indexMapStorage.insert(I: indexMapStorage.begin(), From: precompSrc->tpiMap.begin(),
540	To: precompSrc->tpiMap.begin() +
541	precompDependency.getTypesCount());
542
543	return Error::success();
544	}
545
546	Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
547	// This object was compiled with /Yu, so process the corresponding
548	// precompiled headers object (/Yc) first. Some type indices in the current
549	// object are referencing data in the precompiled headers object, so we need
550	// both to be loaded.
551	if (Error e = mergeInPrecompHeaderObj())
552	return e;
553
554	return TpiSource::mergeDebugT(m);
555	}
556
557	Error PrecompSource::mergeDebugT(TypeMerger *m) {
558	// In some cases, the S_OBJNAME record doesn't contain the PCH signature.
559	// The signature comes later with the LF_ENDPRECOMP record, so we first need
560	// to merge in all the .PCH.OBJ file type records, before registering below.
561	if (Error e = TpiSource::mergeDebugT(m))
562	return e;
563
564	registerMapping();
565
566	return Error::success();
567	}
568
569	void PrecompSource::registerMapping() {
570	if (registered)
571	return;
572	if (file->pchSignature && *file->pchSignature) {
573	auto it = ctx.precompSourceMappings.emplace(args&: file->pchSignature, args: this*);
574	if (!it.second)
575	fatal(msg: "a PCH object with the same signature has already been provided (" +
576	toString(file: it.first ->second->file) + " and " + toString(file) + ")");
577	registered = true;
578	}
579	}
580
581	//===----------------------------------------------------------------------===//
582	// Parellel GHash type merging implementation.
583	//===----------------------------------------------------------------------===//
584
585	void TpiSource::loadGHashes() {
586	if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
587	ghashes = getHashesFromDebugH(debugH: *debugH);
588	ownedGHashes = false;
589	} else {
590	CVTypeArray types;
591	BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
592	cantFail(Err: reader.readArray(Array&: types, Size: reader.getLength()));
593	assignGHashesFromVector(hashVec: GloballyHashedType::hashTypes(Records&: types));
594	}
595
596	fillIsItemIndexFromDebugT();
597	}
598
599	// Copies ghashes from a vector into an array. These are long lived, so it's
600	// worth the time to copy these into an appropriately sized vector to reduce
601	// memory usage.
602	void TpiSource::assignGHashesFromVector(
603	std::vector<GloballyHashedType> &&hashVec) {
604	if (hashVec.empty())
605	return;
606	GloballyHashedType hashes = new* GloballyHashedType[hashVec.size()];
607	memcpy(dest: hashes, src: hashVec.data(), n: hashVec.size() * sizeof(GloballyHashedType));
608	ghashes = ArrayRef(hashes, hashVec.size());
609	ownedGHashes = true;
610	}
611
612	// Faster way to iterate type records. forEachTypeChecked is faster than
613	// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
614	static void forEachTypeChecked(ArrayRef<uint8_t> types,
615	function_ref<void(const CVType &)> fn) {
616	checkError(
617	e: forEachCodeViewRecord<CVType>(StreamBuffer: types, F: [fn](const CVType &ty) -> Error {
618	fn (ty);
619	return Error::success();
620	}));
621	}
622
623	// Walk over file->debugTypes and fill in the isItemIndex bit vector.
624	// TODO: Store this information in .debug$H so that we don't have to recompute
625	// it. This is the main bottleneck slowing down parallel ghashing with one
626	// thread over single-threaded ghashing.
627	void TpiSource::fillIsItemIndexFromDebugT() {
628	uint32_t index = `0`;
629	isItemIndex.resize(N: ghashes.size());
630	forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
631	if (isIdRecord(K: ty.kind()))
632	isItemIndex.set(index);
633	++index;
634	});
635	}
636
637	void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
638	// Decide if the merged type goes into TPI or IPI.
639	bool isItem = isIdRecord(K: ty.kind());
640	MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
641
642	// Copy the type into our mutable buffer.
643	assert(ty.length() <= codeview::MaxRecordLength);
644	size_t offset = merged.recs.size();
645	size_t newSize = alignTo(Value: ty.length(), Align: `4`);
646	merged.recs.resize(new_size: offset + newSize);
647	auto newRec = MutableArrayRef(&merged.recs [offset], newSize);
648	memcpy(dest: newRec.data(), src: ty.data().data(), n: newSize);
649
650	// Fix up the record prefix and padding bytes if it required resizing.
651	if (newSize != ty.length()) {
652	reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - `2`;
653	for (size_t i = ty.length(); i < newSize; ++i)
654	newRec [i] = LF_PAD0 + (newSize - i);
655	}
656
657	// Remap the type indices in the new record.
658	remapTypesInTypeRecord(rec: newRec);
659	uint32_t pdbHash = check(e: pdb::hashTypeRecord(Type: CVType (newRec)));
660	merged.recSizes.push_back(x: static_cast<uint16_t>(newSize));
661	merged.recHashes.push_back(x: pdbHash);
662
663	// Retain a mapping from PDB function id to PDB function type. This mapping is
664	// used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
665	// symbols.
666	if (ty.kind() == LF_FUNC_ID \|\| ty.kind() == LF_MFUNC_ID) {
667	bool success = ty.length() >= `12`;
668	TypeIndex funcId = curIndex;
669	if (success)
670	success &= remapTypeIndex(ti&: funcId, refKind: TiRefKind::IndexRef);
671	TypeIndex funcType =
672	*reinterpret_cast<const TypeIndex *>(&newRec.data()[`8`]);
673	if (success) {
674	funcIdToType.push_back(x: {funcId, funcType});
675	} else {
676	StringRef fname = file ? file->getName() : "<unknown PDB>";
677	warn(msg: "corrupt LF_[M]FUNC_ID record 0x" + utohexstr(X: curIndex.getIndex()) +
678	" in " + fname);
679	}
680	}
681	}
682
683	void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
684	TypeIndex beginIndex) {
685	// Re-sort the list of unique types by index.
686	if (kind == PDB)
687	assert(llvm::is_sorted(uniqueTypes));
688	else
689	llvm::sort(C&: uniqueTypes);
690
691	// Accumulate all the unique types into one buffer in mergedTypes.
692	uint32_t ghashIndex = `0`;
693	auto nextUniqueIndex = uniqueTypes.begin();
694	assert(mergedTpi.recs.empty());
695	assert(mergedIpi.recs.empty());
696
697	// Pre-compute the number of elements in advance to avoid std::vector resizes.
698	unsigned nbTpiRecs = `0`;
699	unsigned nbIpiRecs = `0`;
700	forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) {
701	if (nextUniqueIndex != uniqueTypes.end() &&
702	*nextUniqueIndex == ghashIndex) {
703	assert(ty.length() <= codeview::MaxRecordLength);
704	size_t newSize = alignTo(Value: ty.length(), Align: `4`);
705	(isIdRecord(K: ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
706	++nextUniqueIndex;
707	}
708	++ghashIndex;
709	});
710	mergedTpi.recs.reserve(n: nbTpiRecs);
711	mergedIpi.recs.reserve(n: nbIpiRecs);
712
713	// Do the actual type merge.
714	ghashIndex = `0`;
715	nextUniqueIndex = uniqueTypes.begin();
716	forEachTypeChecked(types: typeRecords, fn: [&](const CVType &ty) {
717	if (nextUniqueIndex != uniqueTypes.end() &&
718	*nextUniqueIndex == ghashIndex) {
719	mergeTypeRecord(curIndex: beginIndex + ghashIndex, ty);
720	++nextUniqueIndex;
721	}
722	++ghashIndex;
723	});
724	assert(nextUniqueIndex == uniqueTypes.end() &&
725	"failed to merge all desired records");
726	assert(uniqueTypes.size() ==
727	mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
728	"missing desired record");
729	}
730
731	void TpiSource::remapTpiWithGHashes(GHashState *g) {
732	assert(ctx.config.debugGHashes && "ghashes must be enabled");
733	fillMapFromGHashes(m: g);
734	tpiMap = indexMapStorage;
735	ipiMap = indexMapStorage;
736	mergeUniqueTypeRecords(typeRecords: file->debugTypes);
737	// TODO: Free all unneeded ghash resources now that we have a full index map.
738
739	if (ctx.config.showSummary) {
740	nbTypeRecords = ghashes.size();
741	nbTypeRecordsBytes = file->debugTypes.size();
742	}
743	}
744
745	// PDBs do not actually store global hashes, so when merging a type server
746	// PDB we have to synthesize global hashes. To do this, we first synthesize
747	// global hashes for the TPI stream, since it is independent, then we
748	// synthesize hashes for the IPI stream, using the hashes for the TPI stream
749	// as inputs.
750	void TypeServerSource::loadGHashes() {
751	// Don't hash twice.
752	if (!ghashes.empty())
753	return;
754	pdb::PDBFile &pdbFile = pdbInputFile->session ->getPDBFile();
755
756	// Hash TPI stream.
757	Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
758	if (auto e = expectedTpi.takeError())
759	fatal(msg: "Type server does not have TPI stream: " + toString(E: std::move(e)));
760	assignGHashesFromVector(
761	hashVec: GloballyHashedType::hashTypes(Records: expectedTpi ->typeArray()));
762	isItemIndex.resize(N: ghashes.size());
763
764	// Hash IPI stream, which depends on TPI ghashes.
765	if (!pdbFile.hasPDBIpiStream())
766	return;
767	Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
768	if (auto e = expectedIpi.takeError())
769	fatal(msg: "error retrieving IPI stream: " + toString(E: std::move(e)));
770	ipiSrc->assignGHashesFromVector(
771	hashVec: GloballyHashedType::hashIds(Records: expectedIpi ->typeArray(), TypeHashes: ghashes));
772
773	// The IPI stream isItemIndex bitvector should be all ones.
774	ipiSrc->isItemIndex.resize(N: ipiSrc->ghashes.size());
775	ipiSrc->isItemIndex.set(I: `0`, E: ipiSrc->ghashes.size());
776	}
777
778	// Flatten discontiguous PDB type arrays to bytes so that we can use
779	// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
780	// type servers is faster than iterating all object files compiled with /Z7 with
781	// CVTypeArray, which has high overheads due to the virtual interface of
782	// BinaryStream::readBytes.
783	static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
784	BinaryStreamRef stream = types.getUnderlyingStream();
785	ArrayRef<uint8_t> debugTypes;
786	checkError(e: stream.readBytes(Offset: `0`, Size: stream.getLength(), Buffer&: debugTypes));
787	return debugTypes;
788	}
789
790	// Merge types from a type server PDB.
791	void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
792	assert(ctx.config.debugGHashes && "ghashes must be enabled");
793
794	// IPI merging depends on TPI, so do TPI first, then do IPI. No need to
795	// propagate errors, those should've been handled during ghash loading.
796	pdb::PDBFile &pdbFile = pdbInputFile->session ->getPDBFile();
797	pdb::TpiStream &tpi = check(e: pdbFile.getPDBTpiStream());
798	fillMapFromGHashes(m: g);
799	tpiMap = indexMapStorage;
800	mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: tpi.typeArray()));
801	if (pdbFile.hasPDBIpiStream()) {
802	pdb::TpiStream &ipi = check(e: pdbFile.getPDBIpiStream());
803	ipiSrc->indexMapStorage.resize(N: ipiSrc->ghashes.size());
804	ipiSrc->fillMapFromGHashes(m: g);
805	ipiMap = ipiSrc->indexMapStorage;
806	ipiSrc->tpiMap = tpiMap;
807	ipiSrc->ipiMap = ipiMap;
808	ipiSrc->mergeUniqueTypeRecords(typeRecords: typeArrayToBytes(types: ipi.typeArray()));
809
810	if (ctx.config.showSummary) {
811	nbTypeRecords = ipiSrc->ghashes.size();
812	nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
813	}
814	}
815
816	if (ctx.config.showSummary) {
817	nbTypeRecords += ghashes.size();
818	nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
819	}
820	}
821
822	void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
823	// No remapping to do with /Zi objects. Simply use the index map from the type
824	// server. Errors should have been reported earlier. Symbols from this object
825	// will be ignored.
826	Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
827	if (!maybeTsSrc) {
828	typeMergingError =
829	joinErrors(E1: std::move(typeMergingError), E2: maybeTsSrc.takeError());
830	return;
831	}
832	TypeServerSource tsSrc = maybeTsSrc;
833	tpiMap = tsSrc->tpiMap;
834	ipiMap = tsSrc->ipiMap;
835	}
836
837	void PrecompSource::loadGHashes() {
838	if (getDebugH(file)) {
839	warn(msg: "ignoring .debug$H section; pch with ghash is not implemented");
840	}
841
842	uint32_t ghashIdx = `0`;
843	std::vector<GloballyHashedType> hashVec;
844	forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
845	// Remember the index of the LF_ENDPRECOMP record so it can be excluded from
846	// the PDB. There must be an entry in the list of ghashes so that the type
847	// indexes of the following records in the /Yc PCH object line up.
848	if (ty.kind() == LF_ENDPRECOMP) {
849	EndPrecompRecord endPrecomp;
850	cantFail(Err: TypeDeserializer::deserializeAs<EndPrecompRecord>(
851	CVT&: const_cast<CVType &>(ty), Record&: endPrecomp));
852	file->pchSignature = endPrecomp.getSignature();
853	registerMapping();
854	endPrecompIdx = ghashIdx;
855	}
856
857	hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec));
858	isItemIndex.push_back(Val: isIdRecord(K: ty.kind()));
859	++ghashIdx;
860	});
861	assignGHashesFromVector(hashVec: std::move(hashVec));
862	}
863
864	void UsePrecompSource::loadGHashes() {
865	auto e = findPrecompMap(file, pr&: precompDependency);
866	if (!e) {
867	warn(msg: toString(E: e.takeError()));
868	return;
869	}
870
871	PrecompSource pchSrc = e;
872
873	// To compute ghashes of a /Yu object file, we need to build on the ghashes of
874	// the /Yc PCH object. After we are done hashing, discard the ghashes from the
875	// PCH source so we don't unnecessarily try to deduplicate them.
876	std::vector<GloballyHashedType> hashVec =
877	pchSrc->ghashes.take_front(N: precompDependency.getTypesCount());
878	forEachTypeChecked(types: file->debugTypes, fn: [&](const CVType &ty) {
879	hashVec.push_back(x: GloballyHashedType::hashType(Type: ty, PreviousTypes: hashVec, PreviousIds: hashVec));
880	isItemIndex.push_back(Val: isIdRecord(K: ty.kind()));
881	});
882	hashVec.erase(first: hashVec.begin(),
883	last: hashVec.begin() + precompDependency.getTypesCount());
884	assignGHashesFromVector(hashVec: std::move(hashVec));
885	}
886
887	void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
888	fillMapFromGHashes(m: g);
889	// This object was compiled with /Yu, so process the corresponding
890	// precompiled headers object (/Yc) first. Some type indices in the current
891	// object are referencing data in the precompiled headers object, so we need
892	// both to be loaded.
893	if (Error e = mergeInPrecompHeaderObj()) {
894	typeMergingError = joinErrors(E1: std::move(typeMergingError), E2: std::move(e));
895	return;
896	}
897
898	tpiMap = indexMapStorage;
899	ipiMap = indexMapStorage;
900	mergeUniqueTypeRecords(typeRecords: file->debugTypes,
901	beginIndex: TypeIndex (precompDependency.getStartTypeIndex() +
902	precompDependency.getTypesCount()));
903	if (ctx.config.showSummary) {
904	nbTypeRecords = ghashes.size();
905	nbTypeRecordsBytes = file->debugTypes.size();
906	}
907	}
908
909	namespace {
910	/// A concurrent hash table for global type hashing. It is based on this paper:
911	/// Concurrent Hash Tables: Fast and General(?)!
912	/// https://dl.acm.org/doi/10.1145/3309206
913	///
914	/// This hash table is meant to be used in two phases:
915	/// 1. concurrent insertions
916	/// 2. concurrent reads
917	/// It does not support lookup, deletion, or rehashing. It uses linear probing.
918	///
919	/// The paper describes storing a key-value pair in two machine words.
920	/// Generally, the values stored in this map are type indices, and we can use
921	/// those values to recover the ghash key from a side table. This allows us to
922	/// shrink the table entries further at the cost of some loads, and sidesteps
923	/// the need for a 128 bit atomic compare-and-swap operation.
924	///
925	/// During insertion, a priority function is used to decide which insertion
926	/// should be preferred. This ensures that the output is deterministic. For
927	/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
928	///
929	class GHashCell;
930	struct GHashTable {
931	GHashCell table = nullptr*;
932	uint32_t tableSize = `0`;
933
934	GHashTable() = default;
935	~GHashTable();
936
937	/// Initialize the table with the given size. Because the table cannot be
938	/// resized, the initial size of the table must be large enough to contain all
939	/// inputs, or insertion may not be able to find an empty cell.
940	void init(uint32_t newTableSize);
941
942	/// Insert the cell with the given ghash into the table. Return the insertion
943	/// position in the table. It is safe for the caller to store the insertion
944	/// position because the table cannot be resized.
945	uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
946	GHashCell newCell);
947	};
948
949	/// A ghash table cell for deduplicating types from TpiSources.
950	class GHashCell {
951	// Force "data" to be 64-bit aligned; otherwise, some versions of clang
952	// will generate calls to libatomic when using some versions of libstdc++
953	// on 32-bit targets. (Also, in theory, there could be a target where
954	// new[] doesn't always return an 8-byte-aligned allocation.)
955	alignas(sizeof(uint64_t)) uint64_t data = `0`;
956
957	public:
958	GHashCell() = default;
959
960	// Construct data most to least significant so that sorting works well:
961	// - isItem
962	// - tpiSrcIdx
963	// - ghashIdx
964	// Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
965	// non-zero representation.
966	GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
967	: data((uint64_t(isItem) << `63U`) \| (uint64_t(tpiSrcIdx + `1`) << `32ULL`) \|
968	ghashIdx) {
969	assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
970	assert(ghashIdx == getGHashIdx() && "round trip failure");
971	}
972
973	explicit GHashCell(uint64_t data) : data(data) {}
974
975	// The empty cell is all zeros.
976	bool isEmpty() const { return data == `0ULL`; }
977
978	/// Extract the tpiSrcIdx.
979	uint32_t getTpiSrcIdx() const {
980	return ((uint32_t)(data >> `32U`) & `0x7FFFFFFF`) - `1`;
981	}
982
983	/// Extract the index into the ghash array of the TpiSource.
984	uint32_t getGHashIdx() const { return (uint32_t)data; }
985
986	bool isItem() const { return data & (`1ULL` << `63U`); }
987
988	/// Get the ghash key for this cell.
989	GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
990	return ctx.tpiSourceList [getTpiSrcIdx()]->ghashes [getGHashIdx()];
991	}
992
993	/// The priority function for the cell. The data is stored such that lower
994	/// tpiSrcIdx and ghashIdx values are preferred, which means that type record
995	/// from earlier sources are more likely to prevail.
996	friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
997	return l.data < r.data;
998	}
999	};
1000	} // namespace
1001
1002	namespace lld::coff {
1003	/// This type is just a wrapper around GHashTable with external linkage so it
1004	/// can be used from a header.
1005	struct GHashState {
1006	GHashTable table;
1007	};
1008	} // namespace lld::coff
1009
1010	GHashTable::~GHashTable() { delete[] table; }
1011
1012	void GHashTable::init(uint32_t newTableSize) {
1013	table = new GHashCell[newTableSize];
1014	memset(s: table, c: `0`, n: newTableSize * sizeof(GHashCell));
1015	tableSize = newTableSize;
1016	}
1017
1018	uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1019	GHashCell newCell) {
1020	assert(!newCell.isEmpty() && "cannot insert empty cell value");
1021
1022	// FIXME: The low bytes of SHA1 have low entropy for short records, which
1023	// type records are. Swap the byte order for better entropy. A better ghash
1024	// won't need this.
1025	uint32_t startIdx =
1026	llvm::byteswap<uint64_t>(V: *reinterpret_cast<uint64_t *>(&ghash)) %
1027	tableSize;
1028
1029	// Do a linear probe starting at startIdx.
1030	uint32_t idx = startIdx;
1031	while (true) {
1032	// Run a compare and swap loop. There are four cases:
1033	// - cell is empty: CAS into place and return
1034	// - cell has matching key, earlier priority: do nothing, return
1035	// - cell has matching key, later priority: CAS into place and return
1036	// - cell has non-matching key: hash collision, probe next cell
1037	auto cellPtr = reinterpret_cast<std::atomic<GHashCell> >(&table[idx]);
1038	GHashCell oldCell(cellPtr->load());
1039	while (oldCell.isEmpty() \|\| oldCell.getGHash(ctx) == ghash) {
1040	// Check if there is an existing ghash entry with a higher priority
1041	// (earlier ordering). If so, this is a duplicate, we are done.
1042	if (!oldCell.isEmpty() && oldCell < newCell)
1043	return idx;
1044	// Either the cell is empty, or our value is higher priority. Try to
1045	// compare and swap. If it succeeds, we are done.
1046	if (cellPtr->compare_exchange_weak(e&: oldCell, i: newCell))
1047	return idx;
1048	// If the CAS failed, check this cell again.
1049	}
1050
1051	// Advance the probe. Wrap around to the beginning if we run off the end.
1052	++idx;
1053	idx = idx == tableSize ? `0` : idx;
1054	if (idx == startIdx) {
1055	// If this becomes an issue, we could mark failure and rehash from the
1056	// beginning with a bigger table. There is no difference between rehashing
1057	// internally and starting over.
1058	report_fatal_error(reason: "ghash table is full");
1059	}
1060	}
1061	llvm_unreachable("left infloop");
1062	}
1063
1064	TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1065	: typeTable (alloc), idTable (alloc), ctx(c) {}
1066
1067	TypeMerger::~TypeMerger() = default;
1068
1069	void TypeMerger::mergeTypesWithGHash() {
1070	// Load ghashes. Do type servers and PCH objects first.
1071	{
1072	llvm::TimeTraceScope timeScope("Load GHASHes");
1073	ScopedTimer t1(ctx.loadGHashTimer);
1074	parallelForEach(R&: dependencySources,
1075	Fn: [&](TpiSource *source) { source->loadGHashes(); });
1076	parallelForEach(R&: objectSources,
1077	Fn: [&](TpiSource *source) { source->loadGHashes(); });
1078	}
1079
1080	llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1081	ScopedTimer t2(ctx.mergeGHashTimer);
1082	GHashState ghashState;
1083
1084	// Estimate the size of hash table needed to deduplicate ghashes. This must
1085	// be larger than the number of unique types, or hash table insertion may not
1086	// be able to find a vacant slot. Summing the input types guarantees this, but
1087	// it is a gross overestimate. The table size could be reduced to save memory,
1088	// but it would require implementing rehashing, and this table is generally
1089	// small compared to total memory usage, at eight bytes per input type record,
1090	// and most input type records are larger than eight bytes.
1091	size_t tableSize = `0`;
1092	for (TpiSource *source : ctx.tpiSourceList)
1093	tableSize += source->ghashes.size();
1094
1095	// Cap the table size so that we can use 32-bit cell indices. Type indices are
1096	// also 32-bit, so this is an inherent PDB file format limit anyway.
1097	tableSize =
1098	std::min(a: size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, b: tableSize);
1099	ghashState.table.init(newTableSize: static_cast<uint32_t>(tableSize));
1100
1101	// Insert ghashes in parallel. During concurrent insertion, we cannot observe
1102	// the contents of the hash table cell, but we can remember the insertion
1103	// position. Because the table does not rehash, the position will not change
1104	// under insertion. After insertion is done, the value of the cell can be read
1105	// to retrieve the final PDB type index.
1106	parallelFor(Begin: `0`, End: ctx.tpiSourceList.size(), Fn: [&](size_t tpiSrcIdx) {
1107	TpiSource *source = ctx.tpiSourceList [tpiSrcIdx];
1108	source->indexMapStorage.resize(N: source->ghashes.size());
1109	for (uint32_t i = `0`, e = source->ghashes.size(); i < e; i++) {
1110	if (source->shouldOmitFromPdb(ghashIdx: i)) {
1111	source->indexMapStorage [i] = TypeIndex (SimpleTypeKind::NotTranslated);
1112	continue;
1113	}
1114	GloballyHashedType ghash = source->ghashes [i];
1115	bool isItem = source->isItemIndex.test(Idx: i);
1116	uint32_t cellIdx =
1117	ghashState.table.insert(ctx, ghash, newCell: GHashCell (isItem, tpiSrcIdx, i));
1118
1119	// Store the ghash cell index as a type index in indexMapStorage. Later
1120	// we will replace it with the PDB type index.
1121	source->indexMapStorage [i] = TypeIndex::fromArrayIndex(Index: cellIdx);
1122	}
1123	});
1124
1125	// Collect all non-empty cells and sort them. This will implicitly assign
1126	// destination type indices, and partition the entries into type records and
1127	// item records. It arranges types in this order:
1128	// - type records
1129	// - source 0, type 0...
1130	// - source 1, type 1...
1131	// - item records
1132	// - source 0, type 1...
1133	// - source 1, type 0...
1134	std::vector<GHashCell> entries;
1135	for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1136	if (!cell.isEmpty())
1137	entries.push_back(x: cell);
1138	}
1139	parallelSort(R&: entries, Comp: std::less<GHashCell>());
1140	log(msg: formatv(Fmt: "ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1141	Vals: tableSize ? double(entries.size()) / tableSize : `0`,
1142	Vals: entries.size(), Vals&: tableSize));
1143
1144	// Find out how many type and item indices there are.
1145	auto mid = llvm::lower_bound(Range&: entries, Value: GHashCell (true, `0`, `0`));
1146	assert((mid == entries.end() \|\| mid->isItem()) &&
1147	(mid == entries.begin() \|\| !std::prev(mid)->isItem()) &&
1148	"midpoint is not midpoint");
1149	uint32_t numTypes = std::distance(first: entries.begin(), last: mid);
1150	uint32_t numItems = std::distance(first: mid, last: entries.end());
1151	log(msg: "Tpi record count: " + Twine (numTypes));
1152	log(msg: "Ipi record count: " + Twine (numItems));
1153
1154	// Make a list of the "unique" type records to merge for each tpi source. Type
1155	// merging will skip indices not on this list. Store the destination PDB type
1156	// index for these unique types in the tpiMap for each source. The entries for
1157	// non-unique types will be filled in prior to type merging.
1158	for (uint32_t i = `0`, e = entries.size(); i < e; ++i) {
1159	auto &cell = entries [i];
1160	uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1161	TpiSource *source = ctx.tpiSourceList [tpiSrcIdx];
1162	source->uniqueTypes.push_back(x: cell.getGHashIdx());
1163
1164	// Update the ghash table to store the destination PDB type index in the
1165	// table.
1166	uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1167	uint32_t ghashCellIndex =
1168	source->indexMapStorage [cell.getGHashIdx()].toArrayIndex();
1169	ghashState.table.table[ghashCellIndex] =
1170	GHashCell (cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1171	}
1172
1173	// In parallel, remap all types.
1174	for (TpiSource *source : dependencySources)
1175	source->remapTpiWithGHashes(g: &ghashState);
1176	parallelForEach(R&: objectSources, Fn: [&](TpiSource *source) {
1177	source->remapTpiWithGHashes(g: &ghashState);
1178	});
1179
1180	// Build a global map of from function ID to function type.
1181	for (TpiSource *source : ctx.tpiSourceList) {
1182	for (auto idToType : source->funcIdToType)
1183	funcIdToType.insert(KV: idToType);
1184	source->funcIdToType.clear();
1185	}
1186
1187	clearGHashes();
1188	}
1189
1190	void TypeMerger::sortDependencies() {
1191	// Order dependencies first, but preserve the existing order.
1192	std::vector<TpiSource *> deps;
1193	std::vector<TpiSource *> objs;
1194	for (TpiSource *s : ctx.tpiSourceList)
1195	(s->isDependency() ? deps : objs).push_back(x: s);
1196	uint32_t numDeps = deps.size();
1197	uint32_t numObjs = objs.size();
1198	ctx.tpiSourceList = std::move(deps);
1199	ctx.tpiSourceList.insert(position: ctx.tpiSourceList.end(), first: objs.begin(), last: objs.end());
1200	for (uint32_t i = `0`, e = ctx.tpiSourceList.size(); i < e; ++i)
1201	ctx.tpiSourceList [i]->tpiSrcIdx = i;
1202	dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1203	objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1204	}
1205
1206	/// Given the index into the ghash table for a particular type, return the type
1207	/// index for that type in the output PDB.
1208	static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1209	uint32_t ghashCellIdx) {
1210	GHashCell cell = g->table.table[ghashCellIdx];
1211	return TypeIndex::fromArrayIndex(Index: cell.getGHashIdx());
1212	}
1213
1214	/// Free heap allocated ghashes.
1215	void TypeMerger::clearGHashes() {
1216	for (TpiSource *src : ctx.tpiSourceList) {
1217	if (src->ownedGHashes)
1218	delete[] src->ghashes.data();
1219	src->ghashes = {};
1220	src->isItemIndex.clear();
1221	src->uniqueTypes.clear();
1222	}
1223	}
1224
1225	// Fill in a TPI or IPI index map using ghashes. For each source type, use its
1226	// ghash to lookup its final type index in the PDB, and store that in the map.
1227	void TpiSource::fillMapFromGHashes(GHashState *g) {
1228	for (size_t i = `0`, e = ghashes.size(); i < e; ++i) {
1229	TypeIndex fakeCellIndex = indexMapStorage [i];
1230	if (fakeCellIndex.isSimple())
1231	indexMapStorage [i] = fakeCellIndex;
1232	else
1233	indexMapStorage [i] =
1234	loadPdbTypeIndexFromCell(g, ghashCellIdx: fakeCellIndex.toArrayIndex());
1235	}
1236	}
1237

Browse the source code of llvm_projects/lld/COFF/DebugTypes.cpp