| 1 | //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/IR/StructuralHash.h" |
| 10 | #include "llvm/IR/Function.h" |
| 11 | #include "llvm/IR/GlobalVariable.h" |
| 12 | #include "llvm/IR/InstrTypes.h" |
| 13 | #include "llvm/IR/Instructions.h" |
| 14 | #include "llvm/IR/IntrinsicInst.h" |
| 15 | #include "llvm/IR/Module.h" |
| 16 | |
| 17 | using namespace llvm; |
| 18 | |
| 19 | namespace { |
| 20 | |
| 21 | // Basic hashing mechanism to detect structural change to the IR, used to verify |
| 22 | // pass return status consistency with actual change. In addition to being used |
| 23 | // by the MergeFunctions pass. |
| 24 | |
| 25 | class StructuralHashImpl { |
| 26 | stable_hash Hash = 4; |
| 27 | |
| 28 | bool DetailedHash; |
| 29 | |
| 30 | // This random value acts as a block header, as otherwise the partition of |
| 31 | // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. |
| 32 | static constexpr stable_hash = 45798; |
| 33 | static constexpr stable_hash = 0x62642d6b6b2d6b72; |
| 34 | static constexpr stable_hash = 23456; |
| 35 | |
| 36 | /// IgnoreOp is a function that returns true if the operand should be ignored. |
| 37 | IgnoreOperandFunc IgnoreOp = nullptr; |
| 38 | /// A mapping from instruction indices to instruction pointers. |
| 39 | /// The index represents the position of an instruction based on the order in |
| 40 | /// which it is first encountered. |
| 41 | std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr; |
| 42 | /// A mapping from pairs of instruction indices and operand indices |
| 43 | /// to the hashes of the operands. |
| 44 | std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr; |
| 45 | |
| 46 | /// Assign a unique ID to each Value in the order they are first seen. |
| 47 | DenseMap<const Value *, int> ValueToId; |
| 48 | |
| 49 | static stable_hash hashType(Type *ValueType) { |
| 50 | SmallVector<stable_hash> Hashes; |
| 51 | Hashes.emplace_back(Args: ValueType->getTypeID()); |
| 52 | if (ValueType->isIntegerTy()) |
| 53 | Hashes.emplace_back(Args: ValueType->getIntegerBitWidth()); |
| 54 | return stable_hash_combine(Buffer: Hashes); |
| 55 | } |
| 56 | |
| 57 | public: |
| 58 | StructuralHashImpl() = delete; |
| 59 | explicit StructuralHashImpl(bool DetailedHash, |
| 60 | IgnoreOperandFunc IgnoreOp = nullptr) |
| 61 | : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { |
| 62 | if (IgnoreOp) { |
| 63 | IndexInstruction = std::make_unique<IndexInstrMap>(); |
| 64 | IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>(); |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | static stable_hash hashAPInt(const APInt &I) { |
| 69 | SmallVector<stable_hash> Hashes; |
| 70 | Hashes.emplace_back(Args: I.getBitWidth()); |
| 71 | auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords()); |
| 72 | Hashes.append(in_start: RawVals.begin(), in_end: RawVals.end()); |
| 73 | return stable_hash_combine(Buffer: Hashes); |
| 74 | } |
| 75 | |
| 76 | static stable_hash hashAPFloat(const APFloat &F) { |
| 77 | return hashAPInt(I: F.bitcastToAPInt()); |
| 78 | } |
| 79 | |
| 80 | static stable_hash hashGlobalVariable(const GlobalVariable &GVar) { |
| 81 | if (!GVar.hasInitializer()) |
| 82 | return hashGlobalValue(GV: &GVar); |
| 83 | |
| 84 | // Hash the contents of a string. |
| 85 | if (GVar.getName().starts_with(Prefix: ".str" )) { |
| 86 | auto *C = GVar.getInitializer(); |
| 87 | if (const auto *Seq = dyn_cast<ConstantDataSequential>(Val: C)) |
| 88 | if (Seq->isString()) |
| 89 | return stable_hash_name(Name: Seq->getAsString()); |
| 90 | } |
| 91 | |
| 92 | // Hash structural contents of Objective-C metadata in specific sections. |
| 93 | // This can be extended to other metadata if needed. |
| 94 | static constexpr const char *SectionNames[] = { |
| 95 | "__cfstring" , "__cstring" , "__objc_classrefs" , |
| 96 | "__objc_methname" , "__objc_selrefs" , |
| 97 | }; |
| 98 | if (GVar.hasSection()) { |
| 99 | StringRef SectionName = GVar.getSection(); |
| 100 | for (const char *Name : SectionNames) |
| 101 | if (SectionName.contains(Other: Name)) |
| 102 | return hashConstant(C: GVar.getInitializer()); |
| 103 | } |
| 104 | |
| 105 | return hashGlobalValue(GV: &GVar); |
| 106 | } |
| 107 | |
| 108 | static stable_hash hashGlobalValue(const GlobalValue *GV) { |
| 109 | if (!GV->hasName()) |
| 110 | return 0; |
| 111 | return stable_hash_name(Name: GV->getName()); |
| 112 | } |
| 113 | |
| 114 | // Compute a hash for a Constant. This function is logically similar to |
| 115 | // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here |
| 116 | // we're interested in computing a hash rather than comparing two Constants. |
| 117 | // Some of the logic is simplified, e.g, we don't expand GEPOperator. |
| 118 | static stable_hash hashConstant(const Constant *C) { |
| 119 | SmallVector<stable_hash> Hashes; |
| 120 | |
| 121 | Type *Ty = C->getType(); |
| 122 | Hashes.emplace_back(Args: hashType(ValueType: Ty)); |
| 123 | |
| 124 | if (C->isNullValue()) { |
| 125 | Hashes.emplace_back(Args: static_cast<stable_hash>('N')); |
| 126 | return stable_hash_combine(Buffer: Hashes); |
| 127 | } |
| 128 | |
| 129 | if (auto *GVar = dyn_cast<GlobalVariable>(Val: C)) { |
| 130 | Hashes.emplace_back(Args: hashGlobalVariable(GVar: *GVar)); |
| 131 | return stable_hash_combine(Buffer: Hashes); |
| 132 | } |
| 133 | |
| 134 | if (auto *G = dyn_cast<GlobalValue>(Val: C)) { |
| 135 | Hashes.emplace_back(Args: hashGlobalValue(GV: G)); |
| 136 | return stable_hash_combine(Buffer: Hashes); |
| 137 | } |
| 138 | |
| 139 | if (const auto *Seq = dyn_cast<ConstantDataSequential>(Val: C)) { |
| 140 | if (Seq->isString()) { |
| 141 | Hashes.emplace_back(Args: stable_hash_name(Name: Seq->getAsString())); |
| 142 | return stable_hash_combine(Buffer: Hashes); |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | switch (C->getValueID()) { |
| 147 | case Value::ConstantIntVal: { |
| 148 | const APInt &Int = cast<ConstantInt>(Val: C)->getValue(); |
| 149 | Hashes.emplace_back(Args: hashAPInt(I: Int)); |
| 150 | return stable_hash_combine(Buffer: Hashes); |
| 151 | } |
| 152 | case Value::ConstantFPVal: { |
| 153 | const APFloat &APF = cast<ConstantFP>(Val: C)->getValueAPF(); |
| 154 | Hashes.emplace_back(Args: hashAPFloat(F: APF)); |
| 155 | return stable_hash_combine(Buffer: Hashes); |
| 156 | } |
| 157 | case Value::ConstantArrayVal: |
| 158 | case Value::ConstantStructVal: |
| 159 | case Value::ConstantVectorVal: |
| 160 | case Value::ConstantExprVal: { |
| 161 | for (const auto &Op : C->operands()) { |
| 162 | auto H = hashConstant(C: cast<Constant>(Val: Op)); |
| 163 | Hashes.emplace_back(Args&: H); |
| 164 | } |
| 165 | return stable_hash_combine(Buffer: Hashes); |
| 166 | } |
| 167 | case Value::BlockAddressVal: { |
| 168 | const BlockAddress *BA = cast<BlockAddress>(Val: C); |
| 169 | auto H = hashGlobalValue(GV: BA->getFunction()); |
| 170 | Hashes.emplace_back(Args&: H); |
| 171 | return stable_hash_combine(Buffer: Hashes); |
| 172 | } |
| 173 | case Value::DSOLocalEquivalentVal: { |
| 174 | const auto *Equiv = cast<DSOLocalEquivalent>(Val: C); |
| 175 | auto H = hashGlobalValue(GV: Equiv->getGlobalValue()); |
| 176 | Hashes.emplace_back(Args&: H); |
| 177 | return stable_hash_combine(Buffer: Hashes); |
| 178 | } |
| 179 | default: |
| 180 | // Skip other types of constants for simplicity. |
| 181 | return stable_hash_combine(Buffer: Hashes); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | stable_hash hashValue(Value *V) { |
| 186 | // Check constant and return its hash. |
| 187 | Constant *C = dyn_cast<Constant>(Val: V); |
| 188 | if (C) |
| 189 | return hashConstant(C); |
| 190 | |
| 191 | // Hash argument number. |
| 192 | SmallVector<stable_hash> Hashes; |
| 193 | if (Argument *Arg = dyn_cast<Argument>(Val: V)) |
| 194 | Hashes.emplace_back(Args: Arg->getArgNo()); |
| 195 | |
| 196 | // Get an index (an insertion order) for the non-constant value. |
| 197 | auto [It, WasInserted] = ValueToId.try_emplace(Key: V, Args: ValueToId.size()); |
| 198 | Hashes.emplace_back(Args&: It->second); |
| 199 | |
| 200 | return stable_hash_combine(Buffer: Hashes); |
| 201 | } |
| 202 | |
| 203 | stable_hash hashOperand(Value *Operand) { |
| 204 | SmallVector<stable_hash> Hashes; |
| 205 | Hashes.emplace_back(Args: hashType(ValueType: Operand->getType())); |
| 206 | Hashes.emplace_back(Args: hashValue(V: Operand)); |
| 207 | return stable_hash_combine(Buffer: Hashes); |
| 208 | } |
| 209 | |
| 210 | stable_hash hashInstruction(const Instruction &Inst) { |
| 211 | SmallVector<stable_hash> Hashes; |
| 212 | Hashes.emplace_back(Args: Inst.getOpcode()); |
| 213 | |
| 214 | if (!DetailedHash) |
| 215 | return stable_hash_combine(Buffer: Hashes); |
| 216 | |
| 217 | Hashes.emplace_back(Args: hashType(ValueType: Inst.getType())); |
| 218 | |
| 219 | // Handle additional properties of specific instructions that cause |
| 220 | // semantic differences in the IR. |
| 221 | if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(Val: &Inst)) |
| 222 | Hashes.emplace_back(Args: ComparisonInstruction->getPredicate()); |
| 223 | |
| 224 | unsigned InstIdx = 0; |
| 225 | if (IndexInstruction) { |
| 226 | InstIdx = IndexInstruction->size(); |
| 227 | IndexInstruction->try_emplace(Key: InstIdx, Args: const_cast<Instruction *>(&Inst)); |
| 228 | } |
| 229 | |
| 230 | for (const auto [OpndIdx, Op] : enumerate(First: Inst.operands())) { |
| 231 | auto OpndHash = hashOperand(Operand: Op); |
| 232 | if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { |
| 233 | assert(IndexOperandHashMap); |
| 234 | IndexOperandHashMap->try_emplace(Key: {InstIdx, OpndIdx}, Args&: OpndHash); |
| 235 | } else |
| 236 | Hashes.emplace_back(Args&: OpndHash); |
| 237 | } |
| 238 | |
| 239 | return stable_hash_combine(Buffer: Hashes); |
| 240 | } |
| 241 | |
| 242 | // A function hash is calculated by considering only the number of arguments |
| 243 | // and whether a function is varargs, the order of basic blocks (given by the |
| 244 | // successors of each basic block in depth first order), and the order of |
| 245 | // opcodes of each instruction within each of these basic blocks. This mirrors |
| 246 | // the strategy FunctionComparator::compare() uses to compare functions by |
| 247 | // walking the BBs in depth first order and comparing each instruction in |
| 248 | // sequence. Because this hash currently does not look at the operands, it is |
| 249 | // insensitive to things such as the target of calls and the constants used in |
| 250 | // the function, which makes it useful when possibly merging functions which |
| 251 | // are the same modulo constants and call targets. |
| 252 | // |
| 253 | // Note that different users of StructuralHash will want different behavior |
| 254 | // out of it (i.e., MergeFunctions will want something different from PM |
| 255 | // expensive checks for pass modification status). When modifying this |
| 256 | // function, most changes should be gated behind an option and enabled |
| 257 | // selectively. |
| 258 | void update(const Function &F) { |
| 259 | // Declarations don't affect analyses. |
| 260 | if (F.isDeclaration()) |
| 261 | return; |
| 262 | |
| 263 | SmallVector<stable_hash> Hashes; |
| 264 | Hashes.emplace_back(Args&: Hash); |
| 265 | Hashes.emplace_back(Args: FunctionHeaderHash); |
| 266 | |
| 267 | Hashes.emplace_back(Args: F.isVarArg()); |
| 268 | Hashes.emplace_back(Args: F.arg_size()); |
| 269 | |
| 270 | SmallVector<const BasicBlock *, 8> BBs; |
| 271 | SmallPtrSet<const BasicBlock *, 16> VisitedBBs; |
| 272 | |
| 273 | // Walk the blocks in the same order as |
| 274 | // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the |
| 275 | // function "structure." (BB and opcode sequence) |
| 276 | BBs.push_back(Elt: &F.getEntryBlock()); |
| 277 | VisitedBBs.insert(Ptr: BBs[0]); |
| 278 | while (!BBs.empty()) { |
| 279 | const BasicBlock *BB = BBs.pop_back_val(); |
| 280 | |
| 281 | Hashes.emplace_back(Args: BlockHeaderHash); |
| 282 | for (auto &Inst : *BB) |
| 283 | Hashes.emplace_back(Args: hashInstruction(Inst)); |
| 284 | |
| 285 | for (const BasicBlock *Succ : successors(BB)) |
| 286 | if (VisitedBBs.insert(Ptr: Succ).second) |
| 287 | BBs.push_back(Elt: Succ); |
| 288 | } |
| 289 | |
| 290 | // Update the combined hash in place. |
| 291 | Hash = stable_hash_combine(Buffer: Hashes); |
| 292 | } |
| 293 | |
| 294 | void update(const GlobalVariable &GV) { |
| 295 | // Declarations and used/compiler.used don't affect analyses. |
| 296 | // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, |
| 297 | // we ignore anything with the `.llvm` prefix |
| 298 | if (GV.isDeclaration() || GV.getName().starts_with(Prefix: "llvm." )) |
| 299 | return; |
| 300 | SmallVector<stable_hash> Hashes; |
| 301 | Hashes.emplace_back(Args&: Hash); |
| 302 | Hashes.emplace_back(Args: GlobalHeaderHash); |
| 303 | Hashes.emplace_back(Args: GV.getValueType()->getTypeID()); |
| 304 | |
| 305 | // Update the combined hash in place. |
| 306 | Hash = stable_hash_combine(Buffer: Hashes); |
| 307 | } |
| 308 | |
| 309 | void update(const Module &M) { |
| 310 | for (const GlobalVariable &GV : M.globals()) |
| 311 | update(GV); |
| 312 | for (const Function &F : M) |
| 313 | update(F); |
| 314 | } |
| 315 | |
| 316 | uint64_t getHash() const { return Hash; } |
| 317 | |
| 318 | std::unique_ptr<IndexInstrMap> getIndexInstrMap() { |
| 319 | return std::move(IndexInstruction); |
| 320 | } |
| 321 | |
| 322 | std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() { |
| 323 | return std::move(IndexOperandHashMap); |
| 324 | } |
| 325 | }; |
| 326 | |
| 327 | } // namespace |
| 328 | |
| 329 | stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { |
| 330 | StructuralHashImpl H(DetailedHash); |
| 331 | H.update(F); |
| 332 | return H.getHash(); |
| 333 | } |
| 334 | |
| 335 | stable_hash llvm::StructuralHash(const GlobalVariable &GVar) { |
| 336 | return StructuralHashImpl::hashGlobalVariable(GVar); |
| 337 | } |
| 338 | |
| 339 | stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { |
| 340 | StructuralHashImpl H(DetailedHash); |
| 341 | H.update(M); |
| 342 | return H.getHash(); |
| 343 | } |
| 344 | |
| 345 | FunctionHashInfo |
| 346 | llvm::StructuralHashWithDifferences(const Function &F, |
| 347 | IgnoreOperandFunc IgnoreOp) { |
| 348 | StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); |
| 349 | H.update(F); |
| 350 | return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), |
| 351 | H.getIndexPairOpndHashMap()); |
| 352 | } |
| 353 | |