DIEHash.cpp source code [llvm_projects/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp]

1	//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains support for DWARF4 hashing of DIEs.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "DIEHash.h"
14	#include "ByteStreamer.h"
15	#include "DwarfCompileUnit.h"
16	#include "DwarfDebug.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/StringRef.h"
19	#include "llvm/BinaryFormat/Dwarf.h"
20	#include "llvm/CodeGen/AsmPrinter.h"
21	#include "llvm/Support/Debug.h"
22	#include "llvm/Support/raw_ostream.h"
23
24	using namespace llvm;
25
26	#define DEBUG_TYPE "dwarfdebug"
27
28	/// Grabs the string in whichever attribute is passed in and returns
29	/// a reference to it.
30	static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
31	// Iterate through all the attributes until we find the one we're
32	// looking for, if we can't find it return an empty string.
33	for (const auto &V : Die.values())
34	if (V.getAttribute() == Attr)
35	return V.getDIEString().getString();
36
37	return StringRef ("");
38	}
39
40	/// Adds the string in \p Str to the hash. This also hashes
41	/// a trailing NULL with the string.
42	void DIEHash::addString(StringRef Str) {
43	LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
44	Hash.update(Str);
45	Hash.update(Data: ArrayRef((uint8_t)`'\0'`));
46	}
47
48	// FIXME: The LEB128 routines are copied and only slightly modified out of
49	// LEB128.h.
50
51	/// Adds the unsigned in \p Value to the hash encoded as a ULEB128.
52	void DIEHash::addULEB128(uint64_t Value) {
53	LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
54	do {
55	uint8_t Byte = Value & `0x7f`;
56	Value >>= `7`;
57	if (Value != `0`)
58	Byte \|= `0x80`; // Mark this byte to show that more bytes will follow.
59	Hash.update(Data: Byte);
60	} while (Value != `0`);
61	}
62
63	void DIEHash::addSLEB128(int64_t Value) {
64	LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
65	bool More;
66	do {
67	uint8_t Byte = Value & `0x7f`;
68	Value >>= `7`;
69	More = !((((Value == `0`) && ((Byte & `0x40`) == `0`)) \|\|
70	((Value == -`1`) && ((Byte & `0x40`) != `0`))));
71	if (More)
72	Byte \|= `0x80`; // Mark this byte to show that more bytes will follow.
73	Hash.update(Data: Byte);
74	} while (More);
75	}
76
77	/// Including \p Parent adds the context of Parent to the hash..
78	void DIEHash::addParentContext(const DIE &Parent) {
79
80	LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n");
81
82	// [7.27.2] For each surrounding type or namespace beginning with the
83	// outermost such construct...
84	SmallVector<const DIE *, `1`> Parents;
85	const DIE *Cur = &Parent;
86	while (Cur->getParent()) {
87	Parents.push_back(Elt: Cur);
88	Cur = Cur->getParent();
89	}
90	assert(Cur->getTag() == dwarf::DW_TAG_compile_unit \|\|
91	Cur->getTag() == dwarf::DW_TAG_type_unit);
92
93	// Reverse iterate over our list to go from the outermost construct to the
94	// innermost.
95	for (const DIE *Die : llvm::reverse(C&: Parents)) {
96	// ... Append the letter "C" to the sequence...
97	addULEB128(Value: `'C'`);
98
99	// ... Followed by the DWARF tag of the construct...
100	addULEB128(Value: Die->getTag());
101
102	// ... Then the name, taken from the DW_AT_name attribute.
103	StringRef Name = getDIEStringAttr(Die: *Die, Attr: dwarf::DW_AT_name);
104	LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");
105	if (!Name.empty())
106	addString(Str: Name);
107	}
108	}
109
110	// Collect all of the attributes for a particular DIE in single structure.
111	void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
112
113	for (const auto &V : Die.values()) {
114	LLVM_DEBUG(dbgs() << "Attribute: "
115	<< dwarf::AttributeString(V.getAttribute())
116	<< " added.\n");
117	switch (V.getAttribute()) {
118	#define HANDLE_DIE_HASH_ATTR(NAME) \
119	case dwarf::NAME: \
120	Attrs.NAME = V; \
121	break;
122	#include "DIEHashAttributes.def"
123	default:
124	break;
125	}
126	}
127	}
128
129	void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute,
130	const DIE &Entry, StringRef Name) {
131	// append the letter 'N'
132	addULEB128(Value: `'N'`);
133
134	// the DWARF attribute code (DW_AT_type or DW_AT_friend),
135	addULEB128(Value: Attribute);
136
137	// the context of the tag,
138	if (const DIE *Parent = Entry.getParent())
139	addParentContext(Parent: *Parent);
140
141	// the letter 'E',
142	addULEB128(Value: `'E'`);
143
144	// and the name of the type.
145	addString(Str: Name);
146
147	// Currently DW_TAG_friends are not used by Clang, but if they do become so,
148	// here's the relevant spec text to implement:
149	//
150	// For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram,
151	// the context is omitted and the name to be used is the ABI-specific name
152	// of the subprogram (e.g., the mangled linker name).
153	}
154
155	void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute,
156	unsigned DieNumber) {
157	// a) If T is in the list of [previously hashed types], use the letter
158	// 'R' as the marker
159	addULEB128(Value: `'R'`);
160
161	addULEB128(Value: Attribute);
162
163	// and use the unsigned LEB128 encoding of [the index of T in the
164	// list] as the attribute value;
165	addULEB128(Value: DieNumber);
166	}
167
168	void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
169	const DIE &Entry) {
170	assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend "
171	"tags. Add support here when there's "
172	"a use case");
173	// Step 5
174	// If the tag in Step 3 is one of [the below tags]
175	if ((Tag == dwarf::DW_TAG_pointer_type \|\|
176	Tag == dwarf::DW_TAG_reference_type \|\|
177	Tag == dwarf::DW_TAG_rvalue_reference_type \|\|
178	Tag == dwarf::DW_TAG_ptr_to_member_type) &&
179	// and the referenced type (via the [below attributes])
180	// FIXME: This seems overly restrictive, and causes hash mismatches
181	// there's a decl/def difference in the containing type of a
182	// ptr_to_member_type, but it's what DWARF says, for some reason.
183	Attribute == dwarf::DW_AT_type) {
184	// ... has a DW_AT_name attribute,
185	StringRef Name = getDIEStringAttr(Die: Entry, Attr: dwarf::DW_AT_name);
186	if (!Name.empty()) {
187	hashShallowTypeReference(Attribute, Entry, Name);
188	return;
189	}
190	}
191
192	unsigned &DieNumber = Numbering [&Entry];
193	if (DieNumber) {
194	hashRepeatedTypeReference(Attribute, DieNumber);
195	return;
196	}
197
198	// otherwise, b) use the letter 'T' as the marker, ...
199	addULEB128(Value: `'T'`);
200
201	addULEB128(Value: Attribute);
202
203	// ... process the type T recursively by performing Steps 2 through 7, and
204	// use the result as the attribute value.
205	DieNumber = Numbering.size();
206	computeHash(Die: Entry);
207	}
208
209	void DIEHash::hashRawTypeReference(const DIE &Entry) {
210	unsigned &DieNumber = Numbering [&Entry];
211	if (DieNumber) {
212	addULEB128(Value: `'R'`);
213	addULEB128(Value: DieNumber);
214	return;
215	}
216	DieNumber = Numbering.size();
217	addULEB128(Value: `'T'`);
218	computeHash(Die: Entry);
219	}
220
221	// Hash all of the values in a block like set of values. This assumes that
222	// all of the data is going to be added as integers.
223	void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
224	for (const auto &V : Values)
225	if (V.getType() == DIEValue::isBaseTypeRef) {
226	const DIE &C =
227	*CU->ExprRefedBaseTypes [V.getDIEBaseTypeRef().getIndex()].Die;
228	StringRef Name = getDIEStringAttr(Die: C, Attr: dwarf::DW_AT_name);
229	assert(!Name.empty() &&
230	"Base types referenced from DW_OP_convert should have a name");
231	hashNestedType(Die: C, Name);
232	} else
233	Hash.update(Data: (uint64_t)V.getDIEInteger().getValue());
234	}
235
236	// Hash the contents of a loclistptr class.
237	void DIEHash::hashLocList(const DIELocList &LocList) {
238	HashingByteStreamer Streamer(*this);
239	DwarfDebug &DD = *AP->getDwarfDebug();
240	const DebugLocStream &Locs = DD.getDebugLocs();
241	const DebugLocStream::List &List = Locs.getList(LI: LocList.getValue());
242	for (const DebugLocStream::Entry &Entry : Locs.getEntries(L: List))
243	DD.emitDebugLocEntry(Streamer, Entry, CU: List.CU);
244	}
245
246	// Hash an individual attribute \param Attr based on the type of attribute and
247	// the form.
248	void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
249	dwarf::Attribute Attribute = Value.getAttribute();
250
251	// Other attribute values use the letter 'A' as the marker, and the value
252	// consists of the form code (encoded as an unsigned LEB128 value) followed by
253	// the encoding of the value according to the form code. To ensure
254	// reproducibility of the signature, the set of forms used in the signature
255	// computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,
256	// DW_FORM_string, and DW_FORM_block.
257
258	switch (Value.getType()) {
259	case DIEValue::isNone:
260	llvm_unreachable("Expected valid DIEValue");
261
262	// 7.27 Step 3
263	// ... An attribute that refers to another type entry T is processed as
264	// follows:
265	case DIEValue::isEntry:
266	hashDIEEntry(Attribute, Tag, Entry: Value.getDIEEntry().getEntry());
267	break;
268	case DIEValue::isInteger: {
269	addULEB128(Value: `'A'`);
270	addULEB128(Value: Attribute);
271	switch (Value.getForm()) {
272	case dwarf::DW_FORM_data1:
273	case dwarf::DW_FORM_data2:
274	case dwarf::DW_FORM_data4:
275	case dwarf::DW_FORM_data8:
276	case dwarf::DW_FORM_udata:
277	case dwarf::DW_FORM_sdata:
278	addULEB128(Value: dwarf::DW_FORM_sdata);
279	addSLEB128(Value: (int64_t)Value.getDIEInteger().getValue());
280	break;
281	// DW_FORM_flag_present is just flag with a value of one. We still give it a
282	// value so just use the value.
283	case dwarf::DW_FORM_flag_present:
284	case dwarf::DW_FORM_flag:
285	addULEB128(Value: dwarf::DW_FORM_flag);
286	addULEB128(Value: (int64_t)Value.getDIEInteger().getValue());
287	break;
288	default:
289	llvm_unreachable("Unknown integer form!");
290	}
291	break;
292	}
293	case DIEValue::isString:
294	addULEB128(Value: `'A'`);
295	addULEB128(Value: Attribute);
296	addULEB128(Value: dwarf::DW_FORM_string);
297	addString(Str: Value.getDIEString().getString());
298	break;
299	case DIEValue::isInlineString:
300	addULEB128(Value: `'A'`);
301	addULEB128(Value: Attribute);
302	addULEB128(Value: dwarf::DW_FORM_string);
303	addString(Str: Value.getDIEInlineString().getString());
304	break;
305	case DIEValue::isBlock:
306	case DIEValue::isLoc:
307	case DIEValue::isLocList:
308	addULEB128(Value: `'A'`);
309	addULEB128(Value: Attribute);
310	addULEB128(Value: dwarf::DW_FORM_block);
311	if (Value.getType() == DIEValue::isBlock) {
312	addULEB128(Value: Value.getDIEBlock().computeSize(FormParams: AP->getDwarfFormParams()));
313	hashBlockData(Values: Value.getDIEBlock().values());
314	} else if (Value.getType() == DIEValue::isLoc) {
315	addULEB128(Value: Value.getDIELoc().computeSize(FormParams: AP->getDwarfFormParams()));
316	hashBlockData(Values: Value.getDIELoc().values());
317	} else {
318	// We could add the block length, but that would take
319	// a bit of work and not add a lot of uniqueness
320	// to the hash in some way we could test.
321	hashLocList(LocList: Value.getDIELocList());
322	}
323	break;
324	// FIXME: It's uncertain whether or not we should handle this at the moment.
325	case DIEValue::isExpr:
326	case DIEValue::isLabel:
327	case DIEValue::isBaseTypeRef:
328	case DIEValue::isDelta:
329	case DIEValue::isAddrOffset:
330	llvm_unreachable("Add support for additional value types.");
331	}
332	}
333
334	// Go through the attributes from \param Attrs in the order specified in 7.27.4
335	// and hash them.
336	void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
337	#define HANDLE_DIE_HASH_ATTR(NAME) \
338	{ \
339	if (Attrs.NAME) \
340	hashAttribute(Attrs.NAME, Tag); \
341	}
342	#include "DIEHashAttributes.def"
343	// FIXME: Add the extended attributes.
344	}
345
346	// Add all of the attributes for \param Die to the hash.
347	void DIEHash::addAttributes(const DIE &Die) {
348	DIEAttrs Attrs = {};
349	collectAttributes(Die, Attrs);
350	hashAttributes(Attrs, Tag: Die.getTag());
351	}
352
353	void DIEHash::hashNestedType(const DIE &Die, StringRef Name) {
354	// 7.27 Step 7
355	// ... append the letter 'S',
356	addULEB128(Value: `'S'`);
357
358	// the tag of C,
359	addULEB128(Value: Die.getTag());
360
361	// and the name.
362	addString(Str: Name);
363	}
364
365	// Compute the hash of a DIE. This is based on the type signature computation
366	// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a
367	// flattened description of the DIE.
368	void DIEHash::computeHash(const DIE &Die) {
369	// Append the letter 'D', followed by the DWARF tag of the DIE.
370	addULEB128(Value: `'D'`);
371	addULEB128(Value: Die.getTag());
372
373	// Add each of the attributes of the DIE.
374	addAttributes(Die);
375
376	// Then hash each of the children of the DIE.
377	for (const auto &C : Die.children()) {
378	// 7.27 Step 7
379	// If C is a nested type entry or a member function entry, ...
380	if (isType(T: C.getTag()) \|\| (C.getTag() == dwarf::DW_TAG_subprogram && isType(T: C.getParent()->getTag()))) {
381	StringRef Name = getDIEStringAttr(Die: C, Attr: dwarf::DW_AT_name);
382	// ... and has a DW_AT_name attribute
383	if (!Name.empty()) {
384	hashNestedType(Die: C, Name);
385	continue;
386	}
387	}
388	computeHash(Die: C);
389	}
390
391	// Following the last (or if there are no children), append a zero byte.
392	Hash.update(Data: ArrayRef((uint8_t)`'\0'`));
393	}
394
395	/// This is based on the type signature computation given in section 7.27 of the
396	/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
397	/// with the inclusion of the full CU and all top level CU entities.
398	// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
399	uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) {
400	Numbering.clear();
401	Numbering [&Die] = `1`;
402
403	if (!DWOName.empty())
404	Hash.update(Str: DWOName);
405	// Hash the DIE.
406	computeHash(Die);
407
408	// Now return the result.
409	MD5::MD5Result Result;
410	Hash.final(Result);
411
412	// ... take the least significant 8 bytes and return those. Our MD5
413	// implementation always returns its results in little endian, so we actually
414	// need the "high" word.
415	return Result.high();
416	}
417
418	/// This is based on the type signature computation given in section 7.27 of the
419	/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
420	/// with the inclusion of additional forms not specifically called out in the
421	/// standard.
422	uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
423	Numbering.clear();
424	Numbering [&Die] = `1`;
425
426	if (const DIE *Parent = Die.getParent())
427	addParentContext(Parent: *Parent);
428
429	// Hash the DIE.
430	computeHash(Die);
431
432	// Now return the result.
433	MD5::MD5Result Result;
434	Hash.final(Result);
435
436	// ... take the least significant 8 bytes and return those. Our MD5
437	// implementation always returns its results in little endian, so we actually
438	// need the "high" word.
439	return Result.high();
440	}
441

Browse the source code of llvm_projects/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp