1//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/IR/StructuralHash.h"
10#include "llvm/IR/Function.h"
11#include "llvm/IR/GlobalVariable.h"
12#include "llvm/IR/InstrTypes.h"
13#include "llvm/IR/Instructions.h"
14#include "llvm/IR/IntrinsicInst.h"
15#include "llvm/IR/Module.h"
16
17using namespace llvm;
18
19namespace {
20
21// Basic hashing mechanism to detect structural change to the IR, used to verify
22// pass return status consistency with actual change. In addition to being used
23// by the MergeFunctions pass.
24
25class StructuralHashImpl {
26 stable_hash Hash = 4;
27
28 bool DetailedHash;
29
30 // This random value acts as a block header, as otherwise the partition of
31 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
32 static constexpr stable_hash BlockHeaderHash = 45798;
33 static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
34 static constexpr stable_hash GlobalHeaderHash = 23456;
35
36 /// IgnoreOp is a function that returns true if the operand should be ignored.
37 IgnoreOperandFunc IgnoreOp = nullptr;
38 /// A mapping from instruction indices to instruction pointers.
39 /// The index represents the position of an instruction based on the order in
40 /// which it is first encountered.
41 std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
42 /// A mapping from pairs of instruction indices and operand indices
43 /// to the hashes of the operands.
44 std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
45
46 /// Assign a unique ID to each Value in the order they are first seen.
47 DenseMap<const Value *, int> ValueToId;
48
49 static stable_hash hashType(Type *ValueType) {
50 SmallVector<stable_hash> Hashes;
51 Hashes.emplace_back(Args: ValueType->getTypeID());
52 if (ValueType->isIntegerTy())
53 Hashes.emplace_back(Args: ValueType->getIntegerBitWidth());
54 return stable_hash_combine(Buffer: Hashes);
55 }
56
57public:
58 StructuralHashImpl() = delete;
59 explicit StructuralHashImpl(bool DetailedHash,
60 IgnoreOperandFunc IgnoreOp = nullptr)
61 : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
62 if (IgnoreOp) {
63 IndexInstruction = std::make_unique<IndexInstrMap>();
64 IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
65 }
66 }
67
68 static stable_hash hashAPInt(const APInt &I) {
69 SmallVector<stable_hash> Hashes;
70 Hashes.emplace_back(Args: I.getBitWidth());
71 auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
72 Hashes.append(in_start: RawVals.begin(), in_end: RawVals.end());
73 return stable_hash_combine(Buffer: Hashes);
74 }
75
76 static stable_hash hashAPFloat(const APFloat &F) {
77 return hashAPInt(I: F.bitcastToAPInt());
78 }
79
80 static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81 if (!GVar.hasInitializer())
82 return hashGlobalValue(GV: &GVar);
83
84 // Hash the contents of a string.
85 if (GVar.getName().starts_with(Prefix: ".str")) {
86 auto *C = GVar.getInitializer();
87 if (const auto *Seq = dyn_cast<ConstantDataSequential>(Val: C))
88 if (Seq->isString())
89 return stable_hash_name(Name: Seq->getAsString());
90 }
91
92 // Hash structural contents of Objective-C metadata in specific sections.
93 // This can be extended to other metadata if needed.
94 static constexpr const char *SectionNames[] = {
95 "__cfstring", "__cstring", "__objc_classrefs",
96 "__objc_methname", "__objc_selrefs",
97 };
98 if (GVar.hasSection()) {
99 StringRef SectionName = GVar.getSection();
100 for (const char *Name : SectionNames)
101 if (SectionName.contains(Other: Name))
102 return hashConstant(C: GVar.getInitializer());
103 }
104
105 return hashGlobalValue(GV: &GVar);
106 }
107
108 static stable_hash hashGlobalValue(const GlobalValue *GV) {
109 if (!GV->hasName())
110 return 0;
111 return stable_hash_name(Name: GV->getName());
112 }
113
114 // Compute a hash for a Constant. This function is logically similar to
115 // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
116 // we're interested in computing a hash rather than comparing two Constants.
117 // Some of the logic is simplified, e.g, we don't expand GEPOperator.
118 static stable_hash hashConstant(const Constant *C) {
119 SmallVector<stable_hash> Hashes;
120
121 Type *Ty = C->getType();
122 Hashes.emplace_back(Args: hashType(ValueType: Ty));
123
124 if (C->isNullValue()) {
125 Hashes.emplace_back(Args: static_cast<stable_hash>('N'));
126 return stable_hash_combine(Buffer: Hashes);
127 }
128
129 if (auto *GVar = dyn_cast<GlobalVariable>(Val: C)) {
130 Hashes.emplace_back(Args: hashGlobalVariable(GVar: *GVar));
131 return stable_hash_combine(Buffer: Hashes);
132 }
133
134 if (auto *G = dyn_cast<GlobalValue>(Val: C)) {
135 Hashes.emplace_back(Args: hashGlobalValue(GV: G));
136 return stable_hash_combine(Buffer: Hashes);
137 }
138
139 if (const auto *Seq = dyn_cast<ConstantDataSequential>(Val: C)) {
140 if (Seq->isString()) {
141 Hashes.emplace_back(Args: stable_hash_name(Name: Seq->getAsString()));
142 return stable_hash_combine(Buffer: Hashes);
143 }
144 }
145
146 switch (C->getValueID()) {
147 case Value::ConstantIntVal: {
148 const APInt &Int = cast<ConstantInt>(Val: C)->getValue();
149 Hashes.emplace_back(Args: hashAPInt(I: Int));
150 return stable_hash_combine(Buffer: Hashes);
151 }
152 case Value::ConstantFPVal: {
153 const APFloat &APF = cast<ConstantFP>(Val: C)->getValueAPF();
154 Hashes.emplace_back(Args: hashAPFloat(F: APF));
155 return stable_hash_combine(Buffer: Hashes);
156 }
157 case Value::ConstantArrayVal:
158 case Value::ConstantStructVal:
159 case Value::ConstantVectorVal:
160 case Value::ConstantExprVal: {
161 for (const auto &Op : C->operands()) {
162 auto H = hashConstant(C: cast<Constant>(Val: Op));
163 Hashes.emplace_back(Args&: H);
164 }
165 return stable_hash_combine(Buffer: Hashes);
166 }
167 case Value::BlockAddressVal: {
168 const BlockAddress *BA = cast<BlockAddress>(Val: C);
169 auto H = hashGlobalValue(GV: BA->getFunction());
170 Hashes.emplace_back(Args&: H);
171 return stable_hash_combine(Buffer: Hashes);
172 }
173 case Value::DSOLocalEquivalentVal: {
174 const auto *Equiv = cast<DSOLocalEquivalent>(Val: C);
175 auto H = hashGlobalValue(GV: Equiv->getGlobalValue());
176 Hashes.emplace_back(Args&: H);
177 return stable_hash_combine(Buffer: Hashes);
178 }
179 default:
180 // Skip other types of constants for simplicity.
181 return stable_hash_combine(Buffer: Hashes);
182 }
183 }
184
185 stable_hash hashValue(Value *V) {
186 // Check constant and return its hash.
187 Constant *C = dyn_cast<Constant>(Val: V);
188 if (C)
189 return hashConstant(C);
190
191 // Hash argument number.
192 SmallVector<stable_hash> Hashes;
193 if (Argument *Arg = dyn_cast<Argument>(Val: V))
194 Hashes.emplace_back(Args: Arg->getArgNo());
195
196 // Get an index (an insertion order) for the non-constant value.
197 auto [It, WasInserted] = ValueToId.try_emplace(Key: V, Args: ValueToId.size());
198 Hashes.emplace_back(Args&: It->second);
199
200 return stable_hash_combine(Buffer: Hashes);
201 }
202
203 stable_hash hashOperand(Value *Operand) {
204 SmallVector<stable_hash> Hashes;
205 Hashes.emplace_back(Args: hashType(ValueType: Operand->getType()));
206 Hashes.emplace_back(Args: hashValue(V: Operand));
207 return stable_hash_combine(Buffer: Hashes);
208 }
209
210 stable_hash hashInstruction(const Instruction &Inst) {
211 SmallVector<stable_hash> Hashes;
212 Hashes.emplace_back(Args: Inst.getOpcode());
213
214 if (!DetailedHash)
215 return stable_hash_combine(Buffer: Hashes);
216
217 Hashes.emplace_back(Args: hashType(ValueType: Inst.getType()));
218
219 // Handle additional properties of specific instructions that cause
220 // semantic differences in the IR.
221 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(Val: &Inst))
222 Hashes.emplace_back(Args: ComparisonInstruction->getPredicate());
223
224 unsigned InstIdx = 0;
225 if (IndexInstruction) {
226 InstIdx = IndexInstruction->size();
227 IndexInstruction->try_emplace(Key: InstIdx, Args: const_cast<Instruction *>(&Inst));
228 }
229
230 for (const auto [OpndIdx, Op] : enumerate(First: Inst.operands())) {
231 auto OpndHash = hashOperand(Operand: Op);
232 if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
233 assert(IndexOperandHashMap);
234 IndexOperandHashMap->try_emplace(Key: {InstIdx, OpndIdx}, Args&: OpndHash);
235 } else
236 Hashes.emplace_back(Args&: OpndHash);
237 }
238
239 return stable_hash_combine(Buffer: Hashes);
240 }
241
242 // A function hash is calculated by considering only the number of arguments
243 // and whether a function is varargs, the order of basic blocks (given by the
244 // successors of each basic block in depth first order), and the order of
245 // opcodes of each instruction within each of these basic blocks. This mirrors
246 // the strategy FunctionComparator::compare() uses to compare functions by
247 // walking the BBs in depth first order and comparing each instruction in
248 // sequence. Because this hash currently does not look at the operands, it is
249 // insensitive to things such as the target of calls and the constants used in
250 // the function, which makes it useful when possibly merging functions which
251 // are the same modulo constants and call targets.
252 //
253 // Note that different users of StructuralHash will want different behavior
254 // out of it (i.e., MergeFunctions will want something different from PM
255 // expensive checks for pass modification status). When modifying this
256 // function, most changes should be gated behind an option and enabled
257 // selectively.
258 void update(const Function &F) {
259 // Declarations don't affect analyses.
260 if (F.isDeclaration())
261 return;
262
263 SmallVector<stable_hash> Hashes;
264 Hashes.emplace_back(Args&: Hash);
265 Hashes.emplace_back(Args: FunctionHeaderHash);
266
267 Hashes.emplace_back(Args: F.isVarArg());
268 Hashes.emplace_back(Args: F.arg_size());
269
270 SmallVector<const BasicBlock *, 8> BBs;
271 SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
272
273 // Walk the blocks in the same order as
274 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
275 // function "structure." (BB and opcode sequence)
276 BBs.push_back(Elt: &F.getEntryBlock());
277 VisitedBBs.insert(Ptr: BBs[0]);
278 while (!BBs.empty()) {
279 const BasicBlock *BB = BBs.pop_back_val();
280
281 Hashes.emplace_back(Args: BlockHeaderHash);
282 for (auto &Inst : *BB)
283 Hashes.emplace_back(Args: hashInstruction(Inst));
284
285 for (const BasicBlock *Succ : successors(BB))
286 if (VisitedBBs.insert(Ptr: Succ).second)
287 BBs.push_back(Elt: Succ);
288 }
289
290 // Update the combined hash in place.
291 Hash = stable_hash_combine(Buffer: Hashes);
292 }
293
294 void update(const GlobalVariable &GV) {
295 // Declarations and used/compiler.used don't affect analyses.
296 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
297 // we ignore anything with the `.llvm` prefix
298 if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm."))
299 return;
300 SmallVector<stable_hash> Hashes;
301 Hashes.emplace_back(Args&: Hash);
302 Hashes.emplace_back(Args: GlobalHeaderHash);
303 Hashes.emplace_back(Args: GV.getValueType()->getTypeID());
304
305 // Update the combined hash in place.
306 Hash = stable_hash_combine(Buffer: Hashes);
307 }
308
309 void update(const Module &M) {
310 for (const GlobalVariable &GV : M.globals())
311 update(GV);
312 for (const Function &F : M)
313 update(F);
314 }
315
316 uint64_t getHash() const { return Hash; }
317
318 std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
319 return std::move(IndexInstruction);
320 }
321
322 std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
323 return std::move(IndexOperandHashMap);
324 }
325};
326
327} // namespace
328
329stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
330 StructuralHashImpl H(DetailedHash);
331 H.update(F);
332 return H.getHash();
333}
334
335stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
336 return StructuralHashImpl::hashGlobalVariable(GVar);
337}
338
339stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
340 StructuralHashImpl H(DetailedHash);
341 H.update(M);
342 return H.getHash();
343}
344
345FunctionHashInfo
346llvm::StructuralHashWithDifferences(const Function &F,
347 IgnoreOperandFunc IgnoreOp) {
348 StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
349 H.update(F);
350 return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
351 H.getIndexPairOpndHashMap());
352}
353