1 | //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/IR/StructuralHash.h" |
10 | #include "llvm/IR/Function.h" |
11 | #include "llvm/IR/GlobalVariable.h" |
12 | #include "llvm/IR/InstrTypes.h" |
13 | #include "llvm/IR/Instructions.h" |
14 | #include "llvm/IR/IntrinsicInst.h" |
15 | #include "llvm/IR/Module.h" |
16 | |
17 | using namespace llvm; |
18 | |
19 | namespace { |
20 | |
21 | // Basic hashing mechanism to detect structural change to the IR, used to verify |
22 | // pass return status consistency with actual change. In addition to being used |
23 | // by the MergeFunctions pass. |
24 | |
25 | class StructuralHashImpl { |
26 | stable_hash Hash = 4; |
27 | |
28 | bool DetailedHash; |
29 | |
30 | // This random value acts as a block header, as otherwise the partition of |
31 | // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. |
32 | static constexpr stable_hash = 45798; |
33 | static constexpr stable_hash = 0x62642d6b6b2d6b72; |
34 | static constexpr stable_hash = 23456; |
35 | |
36 | /// IgnoreOp is a function that returns true if the operand should be ignored. |
37 | IgnoreOperandFunc IgnoreOp = nullptr; |
38 | /// A mapping from instruction indices to instruction pointers. |
39 | /// The index represents the position of an instruction based on the order in |
40 | /// which it is first encountered. |
41 | std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr; |
42 | /// A mapping from pairs of instruction indices and operand indices |
43 | /// to the hashes of the operands. |
44 | std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr; |
45 | |
46 | /// Assign a unique ID to each Value in the order they are first seen. |
47 | DenseMap<const Value *, int> ValueToId; |
48 | |
49 | static stable_hash hashType(Type *ValueType) { |
50 | SmallVector<stable_hash> Hashes; |
51 | Hashes.emplace_back(Args: ValueType->getTypeID()); |
52 | if (ValueType->isIntegerTy()) |
53 | Hashes.emplace_back(Args: ValueType->getIntegerBitWidth()); |
54 | return stable_hash_combine(Buffer: Hashes); |
55 | } |
56 | |
57 | public: |
58 | StructuralHashImpl() = delete; |
59 | explicit StructuralHashImpl(bool DetailedHash, |
60 | IgnoreOperandFunc IgnoreOp = nullptr) |
61 | : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { |
62 | if (IgnoreOp) { |
63 | IndexInstruction = std::make_unique<IndexInstrMap>(); |
64 | IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>(); |
65 | } |
66 | } |
67 | |
68 | static stable_hash hashAPInt(const APInt &I) { |
69 | SmallVector<stable_hash> Hashes; |
70 | Hashes.emplace_back(Args: I.getBitWidth()); |
71 | auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords()); |
72 | Hashes.append(in_start: RawVals.begin(), in_end: RawVals.end()); |
73 | return stable_hash_combine(Buffer: Hashes); |
74 | } |
75 | |
76 | static stable_hash hashAPFloat(const APFloat &F) { |
77 | return hashAPInt(I: F.bitcastToAPInt()); |
78 | } |
79 | |
80 | static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { |
81 | if (!GVar.hasInitializer()) |
82 | return hashGlobalValue(GV: &GVar); |
83 | |
84 | // Hash the contents of a string. |
85 | if (GVar.getName().starts_with(Prefix: ".str" )) { |
86 | auto *C = GVar.getInitializer(); |
87 | if (const auto *Seq = dyn_cast<ConstantDataSequential>(Val: C)) |
88 | if (Seq->isString()) |
89 | return stable_hash_name(Name: Seq->getAsString()); |
90 | } |
91 | |
92 | // Hash structural contents of Objective-C metadata in specific sections. |
93 | // This can be extended to other metadata if needed. |
94 | static constexpr const char *SectionNames[] = { |
95 | "__cfstring" , "__cstring" , "__objc_classrefs" , |
96 | "__objc_methname" , "__objc_selrefs" , |
97 | }; |
98 | if (GVar.hasSection()) { |
99 | StringRef SectionName = GVar.getSection(); |
100 | for (const char *Name : SectionNames) |
101 | if (SectionName.contains(Other: Name)) |
102 | return hashConstant(C: GVar.getInitializer()); |
103 | } |
104 | |
105 | return hashGlobalValue(GV: &GVar); |
106 | } |
107 | |
108 | static stable_hash hashGlobalValue(const GlobalValue *GV) { |
109 | if (!GV->hasName()) |
110 | return 0; |
111 | return stable_hash_name(Name: GV->getName()); |
112 | } |
113 | |
114 | // Compute a hash for a Constant. This function is logically similar to |
115 | // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here |
116 | // we're interested in computing a hash rather than comparing two Constants. |
117 | // Some of the logic is simplified, e.g, we don't expand GEPOperator. |
118 | static stable_hash hashConstant(const Constant *C) { |
119 | SmallVector<stable_hash> Hashes; |
120 | |
121 | Type *Ty = C->getType(); |
122 | Hashes.emplace_back(Args: hashType(ValueType: Ty)); |
123 | |
124 | if (C->isNullValue()) { |
125 | Hashes.emplace_back(Args: static_cast<stable_hash>('N')); |
126 | return stable_hash_combine(Buffer: Hashes); |
127 | } |
128 | |
129 | if (auto *GVar = dyn_cast<GlobalVariable>(Val: C)) { |
130 | Hashes.emplace_back(Args: hashGlobalVariable(GVar: *GVar)); |
131 | return stable_hash_combine(Buffer: Hashes); |
132 | } |
133 | |
134 | if (auto *G = dyn_cast<GlobalValue>(Val: C)) { |
135 | Hashes.emplace_back(Args: hashGlobalValue(GV: G)); |
136 | return stable_hash_combine(Buffer: Hashes); |
137 | } |
138 | |
139 | if (const auto *Seq = dyn_cast<ConstantDataSequential>(Val: C)) { |
140 | if (Seq->isString()) { |
141 | Hashes.emplace_back(Args: stable_hash_name(Name: Seq->getAsString())); |
142 | return stable_hash_combine(Buffer: Hashes); |
143 | } |
144 | } |
145 | |
146 | switch (C->getValueID()) { |
147 | case Value::ConstantIntVal: { |
148 | const APInt &Int = cast<ConstantInt>(Val: C)->getValue(); |
149 | Hashes.emplace_back(Args: hashAPInt(I: Int)); |
150 | return stable_hash_combine(Buffer: Hashes); |
151 | } |
152 | case Value::ConstantFPVal: { |
153 | const APFloat &APF = cast<ConstantFP>(Val: C)->getValueAPF(); |
154 | Hashes.emplace_back(Args: hashAPFloat(F: APF)); |
155 | return stable_hash_combine(Buffer: Hashes); |
156 | } |
157 | case Value::ConstantArrayVal: |
158 | case Value::ConstantStructVal: |
159 | case Value::ConstantVectorVal: |
160 | case Value::ConstantExprVal: { |
161 | for (const auto &Op : C->operands()) { |
162 | auto H = hashConstant(C: cast<Constant>(Val: Op)); |
163 | Hashes.emplace_back(Args&: H); |
164 | } |
165 | return stable_hash_combine(Buffer: Hashes); |
166 | } |
167 | case Value::BlockAddressVal: { |
168 | const BlockAddress *BA = cast<BlockAddress>(Val: C); |
169 | auto H = hashGlobalValue(GV: BA->getFunction()); |
170 | Hashes.emplace_back(Args&: H); |
171 | return stable_hash_combine(Buffer: Hashes); |
172 | } |
173 | case Value::DSOLocalEquivalentVal: { |
174 | const auto *Equiv = cast<DSOLocalEquivalent>(Val: C); |
175 | auto H = hashGlobalValue(GV: Equiv->getGlobalValue()); |
176 | Hashes.emplace_back(Args&: H); |
177 | return stable_hash_combine(Buffer: Hashes); |
178 | } |
179 | default: |
180 | // Skip other types of constants for simplicity. |
181 | return stable_hash_combine(Buffer: Hashes); |
182 | } |
183 | } |
184 | |
185 | stable_hash hashValue(Value *V) { |
186 | // Check constant and return its hash. |
187 | Constant *C = dyn_cast<Constant>(Val: V); |
188 | if (C) |
189 | return hashConstant(C); |
190 | |
191 | // Hash argument number. |
192 | SmallVector<stable_hash> Hashes; |
193 | if (Argument *Arg = dyn_cast<Argument>(Val: V)) |
194 | Hashes.emplace_back(Args: Arg->getArgNo()); |
195 | |
196 | // Get an index (an insertion order) for the non-constant value. |
197 | auto [It, WasInserted] = ValueToId.try_emplace(Key: V, Args: ValueToId.size()); |
198 | Hashes.emplace_back(Args&: It->second); |
199 | |
200 | return stable_hash_combine(Buffer: Hashes); |
201 | } |
202 | |
203 | stable_hash hashOperand(Value *Operand) { |
204 | SmallVector<stable_hash> Hashes; |
205 | Hashes.emplace_back(Args: hashType(ValueType: Operand->getType())); |
206 | Hashes.emplace_back(Args: hashValue(V: Operand)); |
207 | return stable_hash_combine(Buffer: Hashes); |
208 | } |
209 | |
210 | stable_hash hashInstruction(const Instruction &Inst) { |
211 | SmallVector<stable_hash> Hashes; |
212 | Hashes.emplace_back(Args: Inst.getOpcode()); |
213 | |
214 | if (!DetailedHash) |
215 | return stable_hash_combine(Buffer: Hashes); |
216 | |
217 | Hashes.emplace_back(Args: hashType(ValueType: Inst.getType())); |
218 | |
219 | // Handle additional properties of specific instructions that cause |
220 | // semantic differences in the IR. |
221 | if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(Val: &Inst)) |
222 | Hashes.emplace_back(Args: ComparisonInstruction->getPredicate()); |
223 | |
224 | unsigned InstIdx = 0; |
225 | if (IndexInstruction) { |
226 | InstIdx = IndexInstruction->size(); |
227 | IndexInstruction->try_emplace(Key: InstIdx, Args: const_cast<Instruction *>(&Inst)); |
228 | } |
229 | |
230 | for (const auto [OpndIdx, Op] : enumerate(First: Inst.operands())) { |
231 | auto OpndHash = hashOperand(Operand: Op); |
232 | if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { |
233 | assert(IndexOperandHashMap); |
234 | IndexOperandHashMap->try_emplace(Key: {InstIdx, OpndIdx}, Args&: OpndHash); |
235 | } else |
236 | Hashes.emplace_back(Args&: OpndHash); |
237 | } |
238 | |
239 | return stable_hash_combine(Buffer: Hashes); |
240 | } |
241 | |
242 | // A function hash is calculated by considering only the number of arguments |
243 | // and whether a function is varargs, the order of basic blocks (given by the |
244 | // successors of each basic block in depth first order), and the order of |
245 | // opcodes of each instruction within each of these basic blocks. This mirrors |
246 | // the strategy FunctionComparator::compare() uses to compare functions by |
247 | // walking the BBs in depth first order and comparing each instruction in |
248 | // sequence. Because this hash currently does not look at the operands, it is |
249 | // insensitive to things such as the target of calls and the constants used in |
250 | // the function, which makes it useful when possibly merging functions which |
251 | // are the same modulo constants and call targets. |
252 | // |
253 | // Note that different users of StructuralHash will want different behavior |
254 | // out of it (i.e., MergeFunctions will want something different from PM |
255 | // expensive checks for pass modification status). When modifying this |
256 | // function, most changes should be gated behind an option and enabled |
257 | // selectively. |
258 | void update(const Function &F) { |
259 | // Declarations don't affect analyses. |
260 | if (F.isDeclaration()) |
261 | return; |
262 | |
263 | SmallVector<stable_hash> Hashes; |
264 | Hashes.emplace_back(Args&: Hash); |
265 | Hashes.emplace_back(Args: FunctionHeaderHash); |
266 | |
267 | Hashes.emplace_back(Args: F.isVarArg()); |
268 | Hashes.emplace_back(Args: F.arg_size()); |
269 | |
270 | SmallVector<const BasicBlock *, 8> BBs; |
271 | SmallPtrSet<const BasicBlock *, 16> VisitedBBs; |
272 | |
273 | // Walk the blocks in the same order as |
274 | // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the |
275 | // function "structure." (BB and opcode sequence) |
276 | BBs.push_back(Elt: &F.getEntryBlock()); |
277 | VisitedBBs.insert(Ptr: BBs[0]); |
278 | while (!BBs.empty()) { |
279 | const BasicBlock *BB = BBs.pop_back_val(); |
280 | |
281 | Hashes.emplace_back(Args: BlockHeaderHash); |
282 | for (auto &Inst : *BB) |
283 | Hashes.emplace_back(Args: hashInstruction(Inst)); |
284 | |
285 | for (const BasicBlock *Succ : successors(BB)) |
286 | if (VisitedBBs.insert(Ptr: Succ).second) |
287 | BBs.push_back(Elt: Succ); |
288 | } |
289 | |
290 | // Update the combined hash in place. |
291 | Hash = stable_hash_combine(Buffer: Hashes); |
292 | } |
293 | |
294 | void update(const GlobalVariable &GV) { |
295 | // Declarations and used/compiler.used don't affect analyses. |
296 | // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, |
297 | // we ignore anything with the `.llvm` prefix |
298 | if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm." )) |
299 | return; |
300 | SmallVector<stable_hash> Hashes; |
301 | Hashes.emplace_back(Args&: Hash); |
302 | Hashes.emplace_back(Args: GlobalHeaderHash); |
303 | Hashes.emplace_back(Args: GV.getValueType()->getTypeID()); |
304 | |
305 | // Update the combined hash in place. |
306 | Hash = stable_hash_combine(Buffer: Hashes); |
307 | } |
308 | |
309 | void update(const Module &M) { |
310 | for (const GlobalVariable &GV : M.globals()) |
311 | update(GV); |
312 | for (const Function &F : M) |
313 | update(F); |
314 | } |
315 | |
316 | uint64_t getHash() const { return Hash; } |
317 | |
318 | std::unique_ptr<IndexInstrMap> getIndexInstrMap() { |
319 | return std::move(IndexInstruction); |
320 | } |
321 | |
322 | std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { |
323 | return std::move(IndexOperandHashMap); |
324 | } |
325 | }; |
326 | |
327 | } // namespace |
328 | |
329 | stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { |
330 | StructuralHashImpl H(DetailedHash); |
331 | H.update(F); |
332 | return H.getHash(); |
333 | } |
334 | |
335 | stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { |
336 | return StructuralHashImpl::hashGlobalVariable(GVar); |
337 | } |
338 | |
339 | stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { |
340 | StructuralHashImpl H(DetailedHash); |
341 | H.update(M); |
342 | return H.getHash(); |
343 | } |
344 | |
345 | FunctionHashInfo |
346 | llvm::StructuralHashWithDifferences(const Function &F, |
347 | IgnoreOperandFunc IgnoreOp) { |
348 | StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); |
349 | H.update(F); |
350 | return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), |
351 | H.getIndexPairOpndHashMap()); |
352 | } |
353 | |