1 | //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/IR/StructuralHash.h" |
10 | #include "llvm/ADT/Hashing.h" |
11 | #include "llvm/IR/Function.h" |
12 | #include "llvm/IR/GlobalVariable.h" |
13 | #include "llvm/IR/InstrTypes.h" |
14 | #include "llvm/IR/Instructions.h" |
15 | #include "llvm/IR/IntrinsicInst.h" |
16 | #include "llvm/IR/Module.h" |
17 | |
18 | using namespace llvm; |
19 | |
20 | namespace { |
21 | |
22 | // Basic hashing mechanism to detect structural change to the IR, used to verify |
23 | // pass return status consistency with actual change. In addition to being used |
24 | // by the MergeFunctions pass. |
25 | |
26 | class StructuralHashImpl { |
27 | uint64_t Hash; |
28 | |
29 | void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(low: Hash, high: V); } |
30 | |
31 | // This will produce different values on 32-bit and 64-bit systens as |
32 | // hash_combine returns a size_t. However, this is only used for |
33 | // detailed hashing which, in-tree, only needs to distinguish between |
34 | // differences in functions. |
35 | template <typename T> void hashArbitaryType(const T &V) { |
36 | hash(V: hash_combine(V)); |
37 | } |
38 | |
39 | void hashType(Type *ValueType) { |
40 | hash(V: ValueType->getTypeID()); |
41 | if (ValueType->isIntegerTy()) |
42 | hash(V: ValueType->getIntegerBitWidth()); |
43 | } |
44 | |
45 | public: |
46 | StructuralHashImpl() : Hash(4) {} |
47 | |
48 | void updateOperand(Value *Operand) { |
49 | hashType(ValueType: Operand->getType()); |
50 | |
51 | // The cases enumerated below are not exhaustive and are only aimed to |
52 | // get decent coverage over the function. |
53 | if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: Operand)) { |
54 | hashArbitaryType(V: ConstInt->getValue()); |
55 | } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Val: Operand)) { |
56 | hashArbitaryType(V: ConstFP->getValue()); |
57 | } else if (Argument *Arg = dyn_cast<Argument>(Val: Operand)) { |
58 | hash(V: Arg->getArgNo()); |
59 | } else if (Function *Func = dyn_cast<Function>(Val: Operand)) { |
60 | // Hashing the name will be deterministic as LLVM's hashing infrastructure |
61 | // has explicit support for hashing strings and will not simply hash |
62 | // the pointer. |
63 | hashArbitaryType(V: Func->getName()); |
64 | } |
65 | } |
66 | |
67 | void updateInstruction(const Instruction &Inst, bool DetailedHash) { |
68 | hash(V: Inst.getOpcode()); |
69 | |
70 | if (!DetailedHash) |
71 | return; |
72 | |
73 | hashType(ValueType: Inst.getType()); |
74 | |
75 | // Handle additional properties of specific instructions that cause |
76 | // semantic differences in the IR. |
77 | if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(Val: &Inst)) |
78 | hash(V: ComparisonInstruction->getPredicate()); |
79 | |
80 | for (const auto &Op : Inst.operands()) |
81 | updateOperand(Operand: Op); |
82 | } |
83 | |
84 | // A function hash is calculated by considering only the number of arguments |
85 | // and whether a function is varargs, the order of basic blocks (given by the |
86 | // successors of each basic block in depth first order), and the order of |
87 | // opcodes of each instruction within each of these basic blocks. This mirrors |
88 | // the strategy FunctionComparator::compare() uses to compare functions by |
89 | // walking the BBs in depth first order and comparing each instruction in |
90 | // sequence. Because this hash currently does not look at the operands, it is |
91 | // insensitive to things such as the target of calls and the constants used in |
92 | // the function, which makes it useful when possibly merging functions which |
93 | // are the same modulo constants and call targets. |
94 | // |
95 | // Note that different users of StructuralHash will want different behavior |
96 | // out of it (i.e., MergeFunctions will want something different from PM |
97 | // expensive checks for pass modification status). When modifying this |
98 | // function, most changes should be gated behind an option and enabled |
99 | // selectively. |
100 | void update(const Function &F, bool DetailedHash) { |
101 | // Declarations don't affect analyses. |
102 | if (F.isDeclaration()) |
103 | return; |
104 | |
105 | hash(V: 0x62642d6b6b2d6b72); // Function header |
106 | |
107 | hash(V: F.isVarArg()); |
108 | hash(V: F.arg_size()); |
109 | |
110 | SmallVector<const BasicBlock *, 8> BBs; |
111 | SmallPtrSet<const BasicBlock *, 16> VisitedBBs; |
112 | |
113 | // Walk the blocks in the same order as |
114 | // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the |
115 | // function "structure." (BB and opcode sequence) |
116 | BBs.push_back(Elt: &F.getEntryBlock()); |
117 | VisitedBBs.insert(Ptr: BBs[0]); |
118 | while (!BBs.empty()) { |
119 | const BasicBlock *BB = BBs.pop_back_val(); |
120 | |
121 | // This random value acts as a block header, as otherwise the partition of |
122 | // opcodes into BBs wouldn't affect the hash, only the order of the |
123 | // opcodes |
124 | hash(V: 45798); |
125 | for (auto &Inst : *BB) |
126 | updateInstruction(Inst, DetailedHash); |
127 | |
128 | for (const BasicBlock *Succ : successors(BB)) |
129 | if (VisitedBBs.insert(Ptr: Succ).second) |
130 | BBs.push_back(Elt: Succ); |
131 | } |
132 | } |
133 | |
134 | void update(const GlobalVariable &GV) { |
135 | // Declarations and used/compiler.used don't affect analyses. |
136 | // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, |
137 | // we ignore anything with the `.llvm` prefix |
138 | if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm." )) |
139 | return; |
140 | hash(V: 23456); // Global header |
141 | hash(V: GV.getValueType()->getTypeID()); |
142 | } |
143 | |
144 | void update(const Module &M, bool DetailedHash) { |
145 | for (const GlobalVariable &GV : M.globals()) |
146 | update(GV); |
147 | for (const Function &F : M) |
148 | update(F, DetailedHash); |
149 | } |
150 | |
151 | uint64_t getHash() const { return Hash; } |
152 | }; |
153 | |
154 | } // namespace |
155 | |
156 | IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { |
157 | StructuralHashImpl H; |
158 | H.update(F, DetailedHash); |
159 | return H.getHash(); |
160 | } |
161 | |
162 | IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { |
163 | StructuralHashImpl H; |
164 | H.update(M, DetailedHash); |
165 | return H.getHash(); |
166 | } |
167 | |