| 1 | //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // \file |
| 10 | // Implementation file for the IRSimilarityIdentifier for identifying |
| 11 | // similarities in IR including the IRInstructionMapper. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/Analysis/IRSimilarityIdentifier.h" |
| 16 | #include "llvm/ADT/DenseMap.h" |
| 17 | #include "llvm/ADT/SetOperations.h" |
| 18 | #include "llvm/IR/Intrinsics.h" |
| 19 | #include "llvm/IR/Operator.h" |
| 20 | #include "llvm/IR/User.h" |
| 21 | #include "llvm/InitializePasses.h" |
| 22 | #include "llvm/Support/SuffixTree.h" |
| 23 | |
| 24 | using namespace llvm; |
| 25 | using namespace IRSimilarity; |
| 26 | |
| 27 | namespace llvm { |
| 28 | cl::opt<bool> |
| 29 | DisableBranches("no-ir-sim-branch-matching" , cl::init(Val: false), |
| 30 | cl::ReallyHidden, |
| 31 | cl::desc("disable similarity matching, and outlining, " |
| 32 | "across branches for debugging purposes." )); |
| 33 | |
| 34 | cl::opt<bool> |
| 35 | DisableIndirectCalls("no-ir-sim-indirect-calls" , cl::init(Val: false), |
| 36 | cl::ReallyHidden, |
| 37 | cl::desc("disable outlining indirect calls." )); |
| 38 | |
| 39 | static cl::opt<bool> |
| 40 | MatchCallsByName("ir-sim-calls-by-name" , cl::init(Val: false), cl::ReallyHidden, |
| 41 | cl::desc("only allow matching call instructions if the " |
| 42 | "name and type signature match." )); |
| 43 | |
| 44 | cl::opt<bool> |
| 45 | DisableIntrinsics("no-ir-sim-intrinsics" , cl::init(Val: false), cl::ReallyHidden, |
| 46 | cl::desc("Don't match or outline intrinsics" )); |
| 47 | } // namespace llvm |
| 48 | |
| 49 | IRInstructionData::IRInstructionData(Instruction &I, bool Legality, |
| 50 | IRInstructionDataList &IDList) |
| 51 | : Inst(&I), Legal(Legality), IDL(&IDList) { |
| 52 | initializeInstruction(); |
| 53 | } |
| 54 | |
| 55 | void IRInstructionData::initializeInstruction() { |
| 56 | // We check for whether we have a comparison instruction. If it is, we |
| 57 | // find the "less than" version of the predicate for consistency for |
| 58 | // comparison instructions throught the program. |
| 59 | if (CmpInst *C = dyn_cast<CmpInst>(Val: Inst)) { |
| 60 | CmpInst::Predicate Predicate = predicateForConsistency(CI: C); |
| 61 | if (Predicate != C->getPredicate()) |
| 62 | RevisedPredicate = Predicate; |
| 63 | } |
| 64 | |
| 65 | // Here we collect the operands and their types for determining whether |
| 66 | // the structure of the operand use matches between two different candidates. |
| 67 | for (Use &OI : Inst->operands()) { |
| 68 | if (isa<CmpInst>(Val: Inst) && RevisedPredicate) { |
| 69 | // If we have a CmpInst where the predicate is reversed, it means the |
| 70 | // operands must be reversed as well. |
| 71 | OperVals.insert(I: OperVals.begin(), Elt: OI.get()); |
| 72 | continue; |
| 73 | } |
| 74 | |
| 75 | OperVals.push_back(Elt: OI.get()); |
| 76 | } |
| 77 | |
| 78 | // We capture the incoming BasicBlocks as values as well as the incoming |
| 79 | // Values in order to check for structural similarity. |
| 80 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) |
| 81 | llvm::append_range(C&: OperVals, R: PN->blocks()); |
| 82 | } |
| 83 | |
| 84 | IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) |
| 85 | : IDL(&IDList) {} |
| 86 | |
| 87 | void IRInstructionData::setBranchSuccessors( |
| 88 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { |
| 89 | assert(isa<BranchInst>(Inst) && "Instruction must be branch" ); |
| 90 | |
| 91 | BranchInst *BI = cast<BranchInst>(Val: Inst); |
| 92 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; |
| 93 | |
| 94 | BBNumIt = BasicBlockToInteger.find(Val: BI->getParent()); |
| 95 | assert(BBNumIt != BasicBlockToInteger.end() && |
| 96 | "Could not find location for BasicBlock!" ); |
| 97 | |
| 98 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); |
| 99 | |
| 100 | for (Value *V : getBlockOperVals()) { |
| 101 | BasicBlock *Successor = cast<BasicBlock>(Val: V); |
| 102 | BBNumIt = BasicBlockToInteger.find(Val: Successor); |
| 103 | assert(BBNumIt != BasicBlockToInteger.end() && |
| 104 | "Could not find number for BasicBlock!" ); |
| 105 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); |
| 106 | |
| 107 | int Relative = OtherBlockNumber - CurrentBlockNumber; |
| 108 | RelativeBlockLocations.push_back(Elt: Relative); |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | ArrayRef<Value *> IRInstructionData::getBlockOperVals() { |
| 113 | assert((isa<BranchInst>(Inst) || |
| 114 | isa<PHINode>(Inst)) && "Instruction must be branch or PHINode" ); |
| 115 | |
| 116 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: Inst)) |
| 117 | return ArrayRef<Value *>( |
| 118 | std::next(x: OperVals.begin(), n: BI->isConditional() ? 1 : 0), |
| 119 | OperVals.end() |
| 120 | ); |
| 121 | |
| 122 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) |
| 123 | return ArrayRef<Value *>( |
| 124 | std::next(x: OperVals.begin(), n: PN->getNumIncomingValues()), |
| 125 | OperVals.end() |
| 126 | ); |
| 127 | |
| 128 | return ArrayRef<Value *>(); |
| 129 | } |
| 130 | |
| 131 | void IRInstructionData::setCalleeName(bool MatchByName) { |
| 132 | CallInst *CI = dyn_cast<CallInst>(Val: Inst); |
| 133 | assert(CI && "Instruction must be call" ); |
| 134 | |
| 135 | CalleeName = "" ; |
| 136 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) { |
| 137 | // To hash intrinsics, we use the opcode, and types like the other |
| 138 | // instructions, but also, the Intrinsic ID, and the Name of the |
| 139 | // intrinsic. |
| 140 | Intrinsic::ID IntrinsicID = II->getIntrinsicID(); |
| 141 | FunctionType *FT = II->getFunctionType(); |
| 142 | // If there is an overloaded name, we have to use the complex version |
| 143 | // of getName to get the entire string. |
| 144 | if (Intrinsic::isOverloaded(id: IntrinsicID)) |
| 145 | CalleeName = |
| 146 | Intrinsic::getName(Id: IntrinsicID, Tys: FT->params(), M: II->getModule(), FT); |
| 147 | // If there is not an overloaded name, we only need to use this version. |
| 148 | else |
| 149 | CalleeName = Intrinsic::getName(id: IntrinsicID).str(); |
| 150 | |
| 151 | return; |
| 152 | } |
| 153 | |
| 154 | if (!CI->isIndirectCall() && MatchByName) |
| 155 | CalleeName = CI->getCalledFunction()->getName().str(); |
| 156 | } |
| 157 | |
| 158 | void IRInstructionData::setPHIPredecessors( |
| 159 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { |
| 160 | assert(isa<PHINode>(Inst) && "Instruction must be phi node" ); |
| 161 | |
| 162 | PHINode *PN = cast<PHINode>(Val: Inst); |
| 163 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; |
| 164 | |
| 165 | BBNumIt = BasicBlockToInteger.find(Val: PN->getParent()); |
| 166 | assert(BBNumIt != BasicBlockToInteger.end() && |
| 167 | "Could not find location for BasicBlock!" ); |
| 168 | |
| 169 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); |
| 170 | |
| 171 | // Convert the incoming blocks of the PHINode to an integer value, based on |
| 172 | // the relative distances between the current block and the incoming block. |
| 173 | for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { |
| 174 | BasicBlock *Incoming = PN->getIncomingBlock(i: Idx); |
| 175 | BBNumIt = BasicBlockToInteger.find(Val: Incoming); |
| 176 | assert(BBNumIt != BasicBlockToInteger.end() && |
| 177 | "Could not find number for BasicBlock!" ); |
| 178 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); |
| 179 | |
| 180 | int Relative = OtherBlockNumber - CurrentBlockNumber; |
| 181 | RelativeBlockLocations.push_back(Elt: Relative); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { |
| 186 | switch (CI->getPredicate()) { |
| 187 | case CmpInst::FCMP_OGT: |
| 188 | case CmpInst::FCMP_UGT: |
| 189 | case CmpInst::FCMP_OGE: |
| 190 | case CmpInst::FCMP_UGE: |
| 191 | case CmpInst::ICMP_SGT: |
| 192 | case CmpInst::ICMP_UGT: |
| 193 | case CmpInst::ICMP_SGE: |
| 194 | case CmpInst::ICMP_UGE: |
| 195 | return CI->getSwappedPredicate(); |
| 196 | default: |
| 197 | return CI->getPredicate(); |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | CmpInst::Predicate IRInstructionData::getPredicate() const { |
| 202 | assert(isa<CmpInst>(Inst) && |
| 203 | "Can only get a predicate from a compare instruction" ); |
| 204 | |
| 205 | if (RevisedPredicate) |
| 206 | return *RevisedPredicate; |
| 207 | |
| 208 | return cast<CmpInst>(Val: Inst)->getPredicate(); |
| 209 | } |
| 210 | |
| 211 | StringRef IRInstructionData::getCalleeName() const { |
| 212 | assert(isa<CallInst>(Inst) && |
| 213 | "Can only get a name from a call instruction" ); |
| 214 | |
| 215 | assert(CalleeName && "CalleeName has not been set" ); |
| 216 | |
| 217 | return *CalleeName; |
| 218 | } |
| 219 | |
| 220 | bool IRSimilarity::isClose(const IRInstructionData &A, |
| 221 | const IRInstructionData &B) { |
| 222 | |
| 223 | if (!A.Legal || !B.Legal) |
| 224 | return false; |
| 225 | |
| 226 | // Check if we are performing the same sort of operation on the same types |
| 227 | // but not on the same values. |
| 228 | if (!A.Inst->isSameOperationAs(I: B.Inst)) { |
| 229 | // If there is a predicate, this means that either there is a swapped |
| 230 | // predicate, or that the types are different, we want to make sure that |
| 231 | // the predicates are equivalent via swapping. |
| 232 | if (isa<CmpInst>(Val: A.Inst) && isa<CmpInst>(Val: B.Inst)) { |
| 233 | |
| 234 | if (A.getPredicate() != B.getPredicate()) |
| 235 | return false; |
| 236 | |
| 237 | // If the predicates are the same via swap, make sure that the types are |
| 238 | // still the same. |
| 239 | auto ZippedTypes = zip(t: A.OperVals, u: B.OperVals); |
| 240 | |
| 241 | return all_of( |
| 242 | Range&: ZippedTypes, P: [](std::tuple<llvm::Value *, llvm::Value *> R) { |
| 243 | return std::get<0>(t&: R)->getType() == std::get<1>(t&: R)->getType(); |
| 244 | }); |
| 245 | } |
| 246 | |
| 247 | return false; |
| 248 | } |
| 249 | |
| 250 | // Since any GEP Instruction operands after the first operand cannot be |
| 251 | // defined by a register, we must make sure that the operands after the first |
| 252 | // are the same in the two instructions |
| 253 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: A.Inst)) { |
| 254 | auto *OtherGEP = cast<GetElementPtrInst>(Val: B.Inst); |
| 255 | |
| 256 | // If the instructions do not have the same inbounds restrictions, we do |
| 257 | // not consider them the same. |
| 258 | if (GEP->isInBounds() != OtherGEP->isInBounds()) |
| 259 | return false; |
| 260 | |
| 261 | auto ZippedOperands = zip(t: GEP->indices(), u: OtherGEP->indices()); |
| 262 | |
| 263 | // We increment here since we do not care about the first instruction, |
| 264 | // we only care about the following operands since they must be the |
| 265 | // exact same to be considered similar. |
| 266 | return all_of(Range: drop_begin(RangeOrContainer&: ZippedOperands), |
| 267 | P: [](std::tuple<llvm::Use &, llvm::Use &> R) { |
| 268 | return std::get<0>(t&: R) == std::get<1>(t&: R); |
| 269 | }); |
| 270 | } |
| 271 | |
| 272 | // If the instructions are functions calls, we make sure that the function |
| 273 | // name is the same. We already know that the types are since is |
| 274 | // isSameOperationAs is true. |
| 275 | if (isa<CallInst>(Val: A.Inst) && isa<CallInst>(Val: B.Inst)) { |
| 276 | if (A.getCalleeName() != B.getCalleeName()) |
| 277 | return false; |
| 278 | } |
| 279 | |
| 280 | if (isa<BranchInst>(Val: A.Inst) && isa<BranchInst>(Val: B.Inst) && |
| 281 | A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) |
| 282 | return false; |
| 283 | |
| 284 | return true; |
| 285 | } |
| 286 | |
| 287 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
| 288 | // into the same interface. |
| 289 | void IRInstructionMapper::convertToUnsignedVec( |
| 290 | BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, |
| 291 | std::vector<unsigned> &IntegerMapping) { |
| 292 | BasicBlock::iterator It = BB.begin(); |
| 293 | |
| 294 | std::vector<unsigned> IntegerMappingForBB; |
| 295 | std::vector<IRInstructionData *> InstrListForBB; |
| 296 | |
| 297 | for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { |
| 298 | switch (InstClassifier.visit(I&: *It)) { |
| 299 | case InstrType::Legal: |
| 300 | mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); |
| 301 | break; |
| 302 | case InstrType::Illegal: |
| 303 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); |
| 304 | break; |
| 305 | case InstrType::Invisible: |
| 306 | AddedIllegalLastTime = false; |
| 307 | break; |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | if (AddedIllegalLastTime) |
| 312 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, End: true); |
| 313 | for (IRInstructionData *ID : InstrListForBB) |
| 314 | this->IDL->push_back(Node&: *ID); |
| 315 | llvm::append_range(C&: InstrList, R&: InstrListForBB); |
| 316 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForBB); |
| 317 | } |
| 318 | |
| 319 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
| 320 | // into the same interface. |
| 321 | unsigned IRInstructionMapper::mapToLegalUnsigned( |
| 322 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
| 323 | std::vector<IRInstructionData *> &InstrListForBB) { |
| 324 | // We added something legal, so we should unset the AddedLegalLastTime |
| 325 | // flag. |
| 326 | AddedIllegalLastTime = false; |
| 327 | |
| 328 | // If we have at least two adjacent legal instructions (which may have |
| 329 | // invisible instructions in between), remember that. |
| 330 | if (CanCombineWithPrevInstr) |
| 331 | HaveLegalRange = true; |
| 332 | CanCombineWithPrevInstr = true; |
| 333 | |
| 334 | // Get the integer for this instruction or give it the current |
| 335 | // LegalInstrNumber. |
| 336 | IRInstructionData *ID = allocateIRInstructionData(I&: *It, Legality: true, IDL&: *IDL); |
| 337 | InstrListForBB.push_back(x: ID); |
| 338 | |
| 339 | if (isa<BranchInst>(Val: *It)) |
| 340 | ID->setBranchSuccessors(BasicBlockToInteger); |
| 341 | |
| 342 | if (isa<CallInst>(Val: *It)) |
| 343 | ID->setCalleeName(EnableMatchCallsByName); |
| 344 | |
| 345 | if (isa<PHINode>(Val: *It)) |
| 346 | ID->setPHIPredecessors(BasicBlockToInteger); |
| 347 | |
| 348 | // Add to the instruction list |
| 349 | bool WasInserted; |
| 350 | DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator |
| 351 | ResultIt; |
| 352 | std::tie(args&: ResultIt, args&: WasInserted) = |
| 353 | InstructionIntegerMap.insert(KV: std::make_pair(x&: ID, y&: LegalInstrNumber)); |
| 354 | unsigned INumber = ResultIt->second; |
| 355 | |
| 356 | // There was an insertion. |
| 357 | if (WasInserted) |
| 358 | LegalInstrNumber++; |
| 359 | |
| 360 | IntegerMappingForBB.push_back(x: INumber); |
| 361 | |
| 362 | // Make sure we don't overflow or use any integers reserved by the DenseMap. |
| 363 | assert(LegalInstrNumber < IllegalInstrNumber && |
| 364 | "Instruction mapping overflow!" ); |
| 365 | |
| 366 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
| 367 | "Tried to assign DenseMap tombstone or empty key to instruction." ); |
| 368 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
| 369 | "Tried to assign DenseMap tombstone or empty key to instruction." ); |
| 370 | |
| 371 | return INumber; |
| 372 | } |
| 373 | |
| 374 | IRInstructionData * |
| 375 | IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, |
| 376 | IRInstructionDataList &IDL) { |
| 377 | return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); |
| 378 | } |
| 379 | |
| 380 | IRInstructionData * |
| 381 | IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { |
| 382 | return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); |
| 383 | } |
| 384 | |
| 385 | IRInstructionDataList * |
| 386 | IRInstructionMapper::allocateIRInstructionDataList() { |
| 387 | return new (IDLAllocator->Allocate()) IRInstructionDataList(); |
| 388 | } |
| 389 | |
| 390 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
| 391 | // into the same interface. |
| 392 | unsigned IRInstructionMapper::mapToIllegalUnsigned( |
| 393 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
| 394 | std::vector<IRInstructionData *> &InstrListForBB, bool End) { |
| 395 | // Can't combine an illegal instruction. Set the flag. |
| 396 | CanCombineWithPrevInstr = false; |
| 397 | |
| 398 | // Only add one illegal number per range of legal numbers. |
| 399 | if (AddedIllegalLastTime) |
| 400 | return IllegalInstrNumber; |
| 401 | |
| 402 | IRInstructionData *ID = nullptr; |
| 403 | if (!End) |
| 404 | ID = allocateIRInstructionData(I&: *It, Legality: false, IDL&: *IDL); |
| 405 | else |
| 406 | ID = allocateIRInstructionData(IDL&: *IDL); |
| 407 | InstrListForBB.push_back(x: ID); |
| 408 | |
| 409 | // Remember that we added an illegal number last time. |
| 410 | AddedIllegalLastTime = true; |
| 411 | unsigned INumber = IllegalInstrNumber; |
| 412 | IntegerMappingForBB.push_back(x: IllegalInstrNumber--); |
| 413 | |
| 414 | assert(LegalInstrNumber < IllegalInstrNumber && |
| 415 | "Instruction mapping overflow!" ); |
| 416 | |
| 417 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
| 418 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!" ); |
| 419 | |
| 420 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
| 421 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!" ); |
| 422 | |
| 423 | return INumber; |
| 424 | } |
| 425 | |
| 426 | IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, |
| 427 | IRInstructionData *FirstInstIt, |
| 428 | IRInstructionData *LastInstIt) |
| 429 | : StartIdx(StartIdx), Len(Len) { |
| 430 | |
| 431 | assert(FirstInstIt != nullptr && "Instruction is nullptr!" ); |
| 432 | assert(LastInstIt != nullptr && "Instruction is nullptr!" ); |
| 433 | assert(StartIdx + Len > StartIdx && |
| 434 | "Overflow for IRSimilarityCandidate range?" ); |
| 435 | assert(Len - 1 == static_cast<unsigned>(std::distance( |
| 436 | iterator(FirstInstIt), iterator(LastInstIt))) && |
| 437 | "Length of the first and last IRInstructionData do not match the " |
| 438 | "given length" ); |
| 439 | |
| 440 | // We iterate over the given instructions, and map each unique value |
| 441 | // to a unique number in the IRSimilarityCandidate ValueToNumber and |
| 442 | // NumberToValue maps. A constant get its own value globally, the individual |
| 443 | // uses of the constants are not considered to be unique. |
| 444 | // |
| 445 | // IR: Mapping Added: |
| 446 | // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 |
| 447 | // %add2 = add i32 %a, %1 %add2 -> 4 |
| 448 | // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 |
| 449 | // |
| 450 | // when replace with global values, starting from 1, would be |
| 451 | // |
| 452 | // 3 = add i32 1, 2 |
| 453 | // 4 = add i32 1, 3 |
| 454 | // 6 = add i32 5, 2 |
| 455 | unsigned LocalValNumber = 1; |
| 456 | IRInstructionDataList::iterator ID = iterator(*FirstInstIt); |
| 457 | for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { |
| 458 | // Map the operand values to an unsigned integer if it does not already |
| 459 | // have an unsigned integer assigned to it. |
| 460 | for (Value *Arg : ID->OperVals) |
| 461 | if (ValueToNumber.try_emplace(Key: Arg, Args&: LocalValNumber).second) { |
| 462 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: Arg); |
| 463 | LocalValNumber++; |
| 464 | } |
| 465 | |
| 466 | // Mapping the instructions to an unsigned integer if it is not already |
| 467 | // exist in the mapping. |
| 468 | if (ValueToNumber.try_emplace(Key: ID->Inst, Args&: LocalValNumber).second) { |
| 469 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: ID->Inst); |
| 470 | LocalValNumber++; |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | // Setting the first and last instruction data pointers for the candidate. If |
| 475 | // we got through the entire for loop without hitting an assert, we know |
| 476 | // that both of these instructions are not nullptrs. |
| 477 | FirstInst = FirstInstIt; |
| 478 | LastInst = LastInstIt; |
| 479 | |
| 480 | // Add the basic blocks contained in the set into the global value numbering. |
| 481 | DenseSet<BasicBlock *> BBSet; |
| 482 | getBasicBlocks(BBSet); |
| 483 | for (BasicBlock *BB : BBSet) { |
| 484 | if (ValueToNumber.try_emplace(Key: BB, Args&: LocalValNumber).second) { |
| 485 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: BB); |
| 486 | LocalValNumber++; |
| 487 | } |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, |
| 492 | const IRSimilarityCandidate &B) { |
| 493 | if (A.getLength() != B.getLength()) |
| 494 | return false; |
| 495 | |
| 496 | auto InstrDataForBoth = |
| 497 | zip(t: make_range(x: A.begin(), y: A.end()), u: make_range(x: B.begin(), y: B.end())); |
| 498 | |
| 499 | return all_of(Range&: InstrDataForBoth, |
| 500 | P: [](std::tuple<IRInstructionData &, IRInstructionData &> R) { |
| 501 | IRInstructionData &A = std::get<0>(t&: R); |
| 502 | IRInstructionData &B = std::get<1>(t&: R); |
| 503 | if (!A.Legal || !B.Legal) |
| 504 | return false; |
| 505 | return isClose(A, B); |
| 506 | }); |
| 507 | } |
| 508 | |
| 509 | /// Determine if one or more of the assigned global value numbers for the |
| 510 | /// operands in \p TargetValueNumbers is in the current mapping set for operand |
| 511 | /// numbers in \p SourceOperands. The set of possible corresponding global |
| 512 | /// value numbers are replaced with the most recent version of compatible |
| 513 | /// values. |
| 514 | /// |
| 515 | /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global |
| 516 | /// value number for the source IRInstructionCandidate. |
| 517 | /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source |
| 518 | /// IRSimilarityCandidate global value numbers to a set of possible numbers in |
| 519 | /// the target. |
| 520 | /// \param [in] SourceOperands - The operands in the original |
| 521 | /// IRSimilarityCandidate in the current instruction. |
| 522 | /// \param [in] TargetValueNumbers - The global value numbers of the operands in |
| 523 | /// the corresponding Instruction in the other IRSimilarityCandidate. |
| 524 | /// \returns true if there exists a possible mapping between the source |
| 525 | /// Instruction operands and the target Instruction operands, and false if not. |
| 526 | static bool checkNumberingAndReplaceCommutative( |
| 527 | const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, |
| 528 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, |
| 529 | ArrayRef<Value *> &SourceOperands, |
| 530 | DenseSet<unsigned> &TargetValueNumbers){ |
| 531 | |
| 532 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; |
| 533 | |
| 534 | unsigned ArgVal; |
| 535 | bool WasInserted; |
| 536 | |
| 537 | // Iterate over the operands in the source IRSimilarityCandidate to determine |
| 538 | // whether there exists an operand in the other IRSimilarityCandidate that |
| 539 | // creates a valid mapping of Value to Value between the |
| 540 | // IRSimilarityCaniddates. |
| 541 | for (Value *V : SourceOperands) { |
| 542 | ArgVal = SourceValueToNumberMapping.find(Val: V)->second; |
| 543 | |
| 544 | // Instead of finding a current mapping, we attempt to insert a set. |
| 545 | std::tie(args&: ValueMappingIt, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( |
| 546 | KV: std::make_pair(x&: ArgVal, y&: TargetValueNumbers)); |
| 547 | |
| 548 | // We need to iterate over the items in other IRSimilarityCandidate's |
| 549 | // Instruction to determine whether there is a valid mapping of |
| 550 | // Value to Value. |
| 551 | DenseSet<unsigned> NewSet; |
| 552 | for (unsigned &Curr : ValueMappingIt->second) |
| 553 | // If we can find the value in the mapping, we add it to the new set. |
| 554 | if (TargetValueNumbers.contains(V: Curr)) |
| 555 | NewSet.insert(V: Curr); |
| 556 | |
| 557 | // If we could not find a Value, return 0. |
| 558 | if (NewSet.empty()) |
| 559 | return false; |
| 560 | |
| 561 | // Otherwise replace the old mapping with the newly constructed one. |
| 562 | if (NewSet.size() != ValueMappingIt->second.size()) |
| 563 | ValueMappingIt->second.swap(RHS&: NewSet); |
| 564 | |
| 565 | // We have reached no conclusions about the mapping, and cannot remove |
| 566 | // any items from the other operands, so we move to check the next operand. |
| 567 | if (ValueMappingIt->second.size() != 1) |
| 568 | continue; |
| 569 | |
| 570 | unsigned ValToRemove = *ValueMappingIt->second.begin(); |
| 571 | // When there is only one item left in the mapping for and operand, remove |
| 572 | // the value from the other operands. If it results in there being no |
| 573 | // mapping, return false, it means the mapping is wrong |
| 574 | for (Value *InnerV : SourceOperands) { |
| 575 | if (V == InnerV) |
| 576 | continue; |
| 577 | |
| 578 | unsigned InnerVal = SourceValueToNumberMapping.find(Val: InnerV)->second; |
| 579 | ValueMappingIt = CurrentSrcTgtNumberMapping.find(Val: InnerVal); |
| 580 | if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) |
| 581 | continue; |
| 582 | |
| 583 | ValueMappingIt->second.erase(V: ValToRemove); |
| 584 | if (ValueMappingIt->second.empty()) |
| 585 | return false; |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | return true; |
| 590 | } |
| 591 | |
| 592 | /// Determine if operand number \p TargetArgVal is in the current mapping set |
| 593 | /// for operand number \p SourceArgVal. |
| 594 | /// |
| 595 | /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global |
| 596 | /// value numbers from source IRSimilarityCandidate to target |
| 597 | /// IRSimilarityCandidate. |
| 598 | /// \param [in] SourceArgVal The global value number for an operand in the |
| 599 | /// in the original candidate. |
| 600 | /// \param [in] TargetArgVal The global value number for the corresponding |
| 601 | /// operand in the other candidate. |
| 602 | /// \returns True if there exists a mapping and false if not. |
| 603 | bool checkNumberingAndReplace( |
| 604 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, |
| 605 | unsigned SourceArgVal, unsigned TargetArgVal) { |
| 606 | // We are given two unsigned integers representing the global values of |
| 607 | // the operands in different IRSimilarityCandidates and a current mapping |
| 608 | // between the two. |
| 609 | // |
| 610 | // Source Operand GVN: 1 |
| 611 | // Target Operand GVN: 2 |
| 612 | // CurrentMapping: {1: {1, 2}} |
| 613 | // |
| 614 | // Since we have mapping, and the target operand is contained in the set, we |
| 615 | // update it to: |
| 616 | // CurrentMapping: {1: {2}} |
| 617 | // and can return true. But, if the mapping was |
| 618 | // CurrentMapping: {1: {3}} |
| 619 | // we would return false. |
| 620 | |
| 621 | bool WasInserted; |
| 622 | DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; |
| 623 | |
| 624 | std::tie(args&: Val, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( |
| 625 | KV: std::make_pair(x&: SourceArgVal, y: DenseSet<unsigned>({TargetArgVal}))); |
| 626 | |
| 627 | // If we created a new mapping, then we are done. |
| 628 | if (WasInserted) |
| 629 | return true; |
| 630 | |
| 631 | // If there is more than one option in the mapping set, and the target value |
| 632 | // is included in the mapping set replace that set with one that only includes |
| 633 | // the target value, as it is the only valid mapping via the non commutative |
| 634 | // instruction. |
| 635 | |
| 636 | DenseSet<unsigned> &TargetSet = Val->second; |
| 637 | if (TargetSet.size() > 1 && TargetSet.contains(V: TargetArgVal)) { |
| 638 | TargetSet.clear(); |
| 639 | TargetSet.insert(V: TargetArgVal); |
| 640 | return true; |
| 641 | } |
| 642 | |
| 643 | // Return true if we can find the value in the set. |
| 644 | return TargetSet.contains(V: TargetArgVal); |
| 645 | } |
| 646 | |
| 647 | bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( |
| 648 | OperandMapping A, OperandMapping B) { |
| 649 | // Iterators to keep track of where we are in the operands for each |
| 650 | // Instruction. |
| 651 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); |
| 652 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); |
| 653 | unsigned OperandLength = A.OperVals.size(); |
| 654 | |
| 655 | // For each operand, get the value numbering and ensure it is consistent. |
| 656 | for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { |
| 657 | unsigned OperValA = A.IRSC.ValueToNumber.find(Val: *VItA)->second; |
| 658 | unsigned OperValB = B.IRSC.ValueToNumber.find(Val: *VItB)->second; |
| 659 | |
| 660 | // Attempt to add a set with only the target value. If there is no mapping |
| 661 | // we can create it here. |
| 662 | // |
| 663 | // For an instruction like a subtraction: |
| 664 | // IRSimilarityCandidateA: IRSimilarityCandidateB: |
| 665 | // %resultA = sub %a, %b %resultB = sub %d, %e |
| 666 | // |
| 667 | // We map %a -> %d and %b -> %e. |
| 668 | // |
| 669 | // And check to see whether their mapping is consistent in |
| 670 | // checkNumberingAndReplace. |
| 671 | |
| 672 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceArgVal: OperValA, TargetArgVal: OperValB)) |
| 673 | return false; |
| 674 | |
| 675 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceArgVal: OperValB, TargetArgVal: OperValA)) |
| 676 | return false; |
| 677 | } |
| 678 | return true; |
| 679 | } |
| 680 | |
| 681 | bool IRSimilarityCandidate::compareCommutativeOperandMapping( |
| 682 | OperandMapping A, OperandMapping B) { |
| 683 | DenseSet<unsigned> ValueNumbersA; |
| 684 | DenseSet<unsigned> ValueNumbersB; |
| 685 | |
| 686 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); |
| 687 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); |
| 688 | unsigned OperandLength = A.OperVals.size(); |
| 689 | |
| 690 | // Find the value number sets for the operands. |
| 691 | for (unsigned Idx = 0; Idx < OperandLength; |
| 692 | Idx++, VItA++, VItB++) { |
| 693 | ValueNumbersA.insert(V: A.IRSC.ValueToNumber.find(Val: *VItA)->second); |
| 694 | ValueNumbersB.insert(V: B.IRSC.ValueToNumber.find(Val: *VItB)->second); |
| 695 | } |
| 696 | |
| 697 | // Iterate over the operands in the first IRSimilarityCandidate and make sure |
| 698 | // there exists a possible mapping with the operands in the second |
| 699 | // IRSimilarityCandidate. |
| 700 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: A.IRSC.ValueToNumber, |
| 701 | CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceOperands&: A.OperVals, |
| 702 | TargetValueNumbers&: ValueNumbersB)) |
| 703 | return false; |
| 704 | |
| 705 | // Iterate over the operands in the second IRSimilarityCandidate and make sure |
| 706 | // there exists a possible mapping with the operands in the first |
| 707 | // IRSimilarityCandidate. |
| 708 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: B.IRSC.ValueToNumber, |
| 709 | CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceOperands&: B.OperVals, |
| 710 | TargetValueNumbers&: ValueNumbersA)) |
| 711 | return false; |
| 712 | |
| 713 | return true; |
| 714 | } |
| 715 | |
| 716 | bool IRSimilarityCandidate::compareAssignmentMapping( |
| 717 | const unsigned InstValA, const unsigned &InstValB, |
| 718 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
| 719 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { |
| 720 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; |
| 721 | bool WasInserted; |
| 722 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( |
| 723 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); |
| 724 | if (!WasInserted && !ValueMappingIt->second.contains(V: InstValB)) |
| 725 | return false; |
| 726 | else if (ValueMappingIt->second.size() != 1) { |
| 727 | for (unsigned OtherVal : ValueMappingIt->second) { |
| 728 | if (OtherVal == InstValB) |
| 729 | continue; |
| 730 | auto OtherValIt = ValueNumberMappingA.find(Val: OtherVal); |
| 731 | if (OtherValIt == ValueNumberMappingA.end()) |
| 732 | continue; |
| 733 | OtherValIt->second.erase(V: InstValA); |
| 734 | } |
| 735 | ValueNumberMappingA.erase(I: ValueMappingIt); |
| 736 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( |
| 737 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); |
| 738 | } |
| 739 | |
| 740 | return true; |
| 741 | } |
| 742 | |
| 743 | bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, |
| 744 | RelativeLocMapping B) { |
| 745 | // Get the basic blocks the label refers to. |
| 746 | BasicBlock *ABB = cast<BasicBlock>(Val: A.OperVal); |
| 747 | BasicBlock *BBB = cast<BasicBlock>(Val: B.OperVal); |
| 748 | |
| 749 | // Get the basic blocks contained in each region. |
| 750 | DenseSet<BasicBlock *> BasicBlockA; |
| 751 | DenseSet<BasicBlock *> BasicBlockB; |
| 752 | A.IRSC.getBasicBlocks(BBSet&: BasicBlockA); |
| 753 | B.IRSC.getBasicBlocks(BBSet&: BasicBlockB); |
| 754 | |
| 755 | // Determine if the block is contained in the region. |
| 756 | bool AContained = BasicBlockA.contains(V: ABB); |
| 757 | bool BContained = BasicBlockB.contains(V: BBB); |
| 758 | |
| 759 | // Both blocks need to be contained in the region, or both need to be outside |
| 760 | // the region. |
| 761 | if (AContained != BContained) |
| 762 | return false; |
| 763 | |
| 764 | // If both are contained, then we need to make sure that the relative |
| 765 | // distance to the target blocks are the same. |
| 766 | if (AContained) |
| 767 | return A.RelativeLocation == B.RelativeLocation; |
| 768 | return true; |
| 769 | } |
| 770 | |
| 771 | bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, |
| 772 | const IRSimilarityCandidate &B) { |
| 773 | DenseMap<unsigned, DenseSet<unsigned>> MappingA; |
| 774 | DenseMap<unsigned, DenseSet<unsigned>> MappingB; |
| 775 | return IRSimilarityCandidate::compareStructure(A, B, ValueNumberMappingA&: MappingA, ValueNumberMappingB&: MappingB); |
| 776 | } |
| 777 | |
| 778 | typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, |
| 779 | SmallVector<int, 4> &, ArrayRef<Value *> &, |
| 780 | ArrayRef<Value *> &> |
| 781 | ZippedRelativeLocationsT; |
| 782 | |
| 783 | bool IRSimilarityCandidate::compareStructure( |
| 784 | const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, |
| 785 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
| 786 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { |
| 787 | if (A.getLength() != B.getLength()) |
| 788 | return false; |
| 789 | |
| 790 | if (A.ValueToNumber.size() != B.ValueToNumber.size()) |
| 791 | return false; |
| 792 | |
| 793 | iterator ItA = A.begin(); |
| 794 | iterator ItB = B.begin(); |
| 795 | |
| 796 | // These ValueNumber Mapping sets create a create a mapping between the values |
| 797 | // in one candidate to values in the other candidate. If we create a set with |
| 798 | // one element, and that same element maps to the original element in the |
| 799 | // candidate we have a good mapping. |
| 800 | |
| 801 | // Iterate over the instructions contained in each candidate |
| 802 | unsigned SectionLength = A.getStartIdx() + A.getLength(); |
| 803 | for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; |
| 804 | ItA++, ItB++, Loc++) { |
| 805 | // Make sure the instructions are similar to one another. |
| 806 | if (!isClose(A: *ItA, B: *ItB)) |
| 807 | return false; |
| 808 | |
| 809 | Instruction *IA = ItA->Inst; |
| 810 | Instruction *IB = ItB->Inst; |
| 811 | |
| 812 | if (!ItA->Legal || !ItB->Legal) |
| 813 | return false; |
| 814 | |
| 815 | // Get the operand sets for the instructions. |
| 816 | ArrayRef<Value *> OperValsA = ItA->OperVals; |
| 817 | ArrayRef<Value *> OperValsB = ItB->OperVals; |
| 818 | |
| 819 | unsigned InstValA = A.ValueToNumber.find(Val: IA)->second; |
| 820 | unsigned InstValB = B.ValueToNumber.find(Val: IB)->second; |
| 821 | |
| 822 | // Ensure that the mappings for the instructions exists. |
| 823 | if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, |
| 824 | ValueNumberMappingB)) |
| 825 | return false; |
| 826 | |
| 827 | if (!compareAssignmentMapping(InstValA: InstValB, InstValB: InstValA, ValueNumberMappingA&: ValueNumberMappingB, |
| 828 | ValueNumberMappingB&: ValueNumberMappingA)) |
| 829 | return false; |
| 830 | |
| 831 | // We have different paths for commutative instructions and non-commutative |
| 832 | // instructions since commutative instructions could allow multiple mappings |
| 833 | // to certain values. |
| 834 | if (IA->isCommutative() && !isa<FPMathOperator>(Val: IA) && |
| 835 | !isa<IntrinsicInst>(Val: IA)) { |
| 836 | if (!compareCommutativeOperandMapping( |
| 837 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, |
| 838 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) |
| 839 | return false; |
| 840 | continue; |
| 841 | } |
| 842 | |
| 843 | // Handle the non-commutative cases. |
| 844 | if (!compareNonCommutativeOperandMapping( |
| 845 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, |
| 846 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) |
| 847 | return false; |
| 848 | |
| 849 | // Here we check that between two corresponding instructions, |
| 850 | // when referring to a basic block in the same region, the |
| 851 | // relative locations are the same. And, that the instructions refer to |
| 852 | // basic blocks outside the region in the same corresponding locations. |
| 853 | |
| 854 | // We are able to make the assumption about blocks outside of the region |
| 855 | // since the target block labels are considered values and will follow the |
| 856 | // same number matching that we defined for the other instructions in the |
| 857 | // region. So, at this point, in each location we target a specific block |
| 858 | // outside the region, we are targeting a corresponding block in each |
| 859 | // analagous location in the region we are comparing to. |
| 860 | if (!(isa<BranchInst>(Val: IA) && isa<BranchInst>(Val: IB)) && |
| 861 | !(isa<PHINode>(Val: IA) && isa<PHINode>(Val: IB))) |
| 862 | continue; |
| 863 | |
| 864 | SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; |
| 865 | SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; |
| 866 | ArrayRef<Value *> ABL = ItA->getBlockOperVals(); |
| 867 | ArrayRef<Value *> BBL = ItB->getBlockOperVals(); |
| 868 | |
| 869 | // Check to make sure that the number of operands, and branching locations |
| 870 | // between BranchInsts is the same. |
| 871 | if (RelBlockLocsA.size() != RelBlockLocsB.size() && |
| 872 | ABL.size() != BBL.size()) |
| 873 | return false; |
| 874 | |
| 875 | assert(RelBlockLocsA.size() == ABL.size() && |
| 876 | "Block information vectors not the same size." ); |
| 877 | assert(RelBlockLocsB.size() == BBL.size() && |
| 878 | "Block information vectors not the same size." ); |
| 879 | |
| 880 | ZippedRelativeLocationsT ZippedRelativeLocations = |
| 881 | zip(t&: RelBlockLocsA, u&: RelBlockLocsB, args&: ABL, args&: BBL); |
| 882 | if (any_of(Range&: ZippedRelativeLocations, |
| 883 | P: [&A, &B](std::tuple<int, int, Value *, Value *> R) { |
| 884 | return !checkRelativeLocations( |
| 885 | A: {.IRSC: A, .RelativeLocation: std::get<0>(t&: R), .OperVal: std::get<2>(t&: R)}, |
| 886 | B: {.IRSC: B, .RelativeLocation: std::get<1>(t&: R), .OperVal: std::get<3>(t&: R)}); |
| 887 | })) |
| 888 | return false; |
| 889 | } |
| 890 | return true; |
| 891 | } |
| 892 | |
| 893 | bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, |
| 894 | const IRSimilarityCandidate &B) { |
| 895 | auto DoesOverlap = [](const IRSimilarityCandidate &X, |
| 896 | const IRSimilarityCandidate &Y) { |
| 897 | // Check: |
| 898 | // XXXXXX X starts before Y ends |
| 899 | // YYYYYYY Y starts after X starts |
| 900 | return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; |
| 901 | }; |
| 902 | |
| 903 | return DoesOverlap(A, B) || DoesOverlap(B, A); |
| 904 | } |
| 905 | |
| 906 | void IRSimilarityIdentifier::populateMapper( |
| 907 | Module &M, std::vector<IRInstructionData *> &InstrList, |
| 908 | std::vector<unsigned> &IntegerMapping) { |
| 909 | |
| 910 | std::vector<IRInstructionData *> InstrListForModule; |
| 911 | std::vector<unsigned> IntegerMappingForModule; |
| 912 | // Iterate over the functions in the module to map each Instruction in each |
| 913 | // BasicBlock to an unsigned integer. |
| 914 | Mapper.initializeForBBs(M); |
| 915 | |
| 916 | for (Function &F : M) { |
| 917 | |
| 918 | if (F.empty()) |
| 919 | continue; |
| 920 | |
| 921 | for (BasicBlock &BB : F) { |
| 922 | |
| 923 | // BB has potential to have similarity since it has a size greater than 2 |
| 924 | // and can therefore match other regions greater than 2. Map it to a list |
| 925 | // of unsigned integers. |
| 926 | Mapper.convertToUnsignedVec(BB, InstrList&: InstrListForModule, |
| 927 | IntegerMapping&: IntegerMappingForModule); |
| 928 | } |
| 929 | |
| 930 | BasicBlock::iterator It = F.begin()->end(); |
| 931 | Mapper.mapToIllegalUnsigned(It, IntegerMappingForBB&: IntegerMappingForModule, InstrListForBB&: InstrListForModule, |
| 932 | End: true); |
| 933 | if (InstrListForModule.size() > 0) |
| 934 | Mapper.IDL->push_back(Node&: *InstrListForModule.back()); |
| 935 | } |
| 936 | |
| 937 | // Insert the InstrListForModule at the end of the overall InstrList so that |
| 938 | // we can have a long InstrList for the entire set of Modules being analyzed. |
| 939 | llvm::append_range(C&: InstrList, R&: InstrListForModule); |
| 940 | // Do the same as above, but for IntegerMapping. |
| 941 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForModule); |
| 942 | } |
| 943 | |
| 944 | void IRSimilarityIdentifier::populateMapper( |
| 945 | ArrayRef<std::unique_ptr<Module>> &Modules, |
| 946 | std::vector<IRInstructionData *> &InstrList, |
| 947 | std::vector<unsigned> &IntegerMapping) { |
| 948 | |
| 949 | // Iterate over, and map the instructions in each module. |
| 950 | for (const std::unique_ptr<Module> &M : Modules) |
| 951 | populateMapper(M&: *M, InstrList, IntegerMapping); |
| 952 | } |
| 953 | |
| 954 | /// From a repeated subsequence, find all the different instances of the |
| 955 | /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from |
| 956 | /// the IRInstructionData in subsequence. |
| 957 | /// |
| 958 | /// \param [in] Mapper - The instruction mapper for basic correctness checks. |
| 959 | /// \param [in] InstrList - The vector that holds the instruction data. |
| 960 | /// \param [in] IntegerMapping - The vector that holds the mapped integers. |
| 961 | /// \param [out] CandsForRepSubstring - The vector to store the generated |
| 962 | /// IRSimilarityCandidates. |
| 963 | static void createCandidatesFromSuffixTree( |
| 964 | const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, |
| 965 | std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, |
| 966 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { |
| 967 | |
| 968 | unsigned StringLen = RS.Length; |
| 969 | if (StringLen < 2) |
| 970 | return; |
| 971 | |
| 972 | // Create an IRSimilarityCandidate for instance of this subsequence \p RS. |
| 973 | for (const unsigned &StartIdx : RS.StartIndices) { |
| 974 | unsigned EndIdx = StartIdx + StringLen - 1; |
| 975 | |
| 976 | // Check that this subsequence does not contain an illegal instruction. |
| 977 | bool ContainsIllegal = false; |
| 978 | for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { |
| 979 | unsigned Key = IntegerMapping[CurrIdx]; |
| 980 | if (Key > Mapper.IllegalInstrNumber) { |
| 981 | ContainsIllegal = true; |
| 982 | break; |
| 983 | } |
| 984 | } |
| 985 | |
| 986 | // If we have an illegal instruction, we should not create an |
| 987 | // IRSimilarityCandidate for this region. |
| 988 | if (ContainsIllegal) |
| 989 | continue; |
| 990 | |
| 991 | // We are getting iterators to the instructions in this region of code |
| 992 | // by advancing the start and end indices from the start of the |
| 993 | // InstrList. |
| 994 | std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); |
| 995 | std::advance(i&: StartIt, n: StartIdx); |
| 996 | std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); |
| 997 | std::advance(i&: EndIt, n: EndIdx); |
| 998 | |
| 999 | CandsForRepSubstring.emplace_back(args: StartIdx, args&: StringLen, args&: *StartIt, args&: *EndIt); |
| 1000 | } |
| 1001 | } |
| 1002 | |
| 1003 | void IRSimilarityCandidate::createCanonicalRelationFrom( |
| 1004 | IRSimilarityCandidate &SourceCand, |
| 1005 | DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, |
| 1006 | DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { |
| 1007 | assert(SourceCand.CanonNumToNumber.size() != 0 && |
| 1008 | "Base canonical relationship is empty!" ); |
| 1009 | assert(SourceCand.NumberToCanonNum.size() != 0 && |
| 1010 | "Base canonical relationship is empty!" ); |
| 1011 | |
| 1012 | assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty" ); |
| 1013 | assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty" ); |
| 1014 | |
| 1015 | DenseSet<unsigned> UsedGVNs; |
| 1016 | // Iterate over the mappings provided from this candidate to SourceCand. We |
| 1017 | // are then able to map the GVN in this candidate to the same canonical number |
| 1018 | // given to the corresponding GVN in SourceCand. |
| 1019 | for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { |
| 1020 | unsigned SourceGVN = GVNMapping.first; |
| 1021 | |
| 1022 | assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!" ); |
| 1023 | |
| 1024 | unsigned ResultGVN; |
| 1025 | // We need special handling if we have more than one potential value. This |
| 1026 | // means that there are at least two GVNs that could correspond to this GVN. |
| 1027 | // This could lead to potential swapping later on, so we make a decision |
| 1028 | // here to ensure a one-to-one mapping. |
| 1029 | if (GVNMapping.second.size() > 1) { |
| 1030 | bool Found = false; |
| 1031 | for (unsigned Val : GVNMapping.second) { |
| 1032 | // We make sure the target value number hasn't already been reserved. |
| 1033 | if (UsedGVNs.contains(V: Val)) |
| 1034 | continue; |
| 1035 | |
| 1036 | // We make sure that the opposite mapping is still consistent. |
| 1037 | DenseMap<unsigned, DenseSet<unsigned>>::iterator It = |
| 1038 | FromSourceMapping.find(Val); |
| 1039 | |
| 1040 | if (!It->second.contains(V: SourceGVN)) |
| 1041 | continue; |
| 1042 | |
| 1043 | // We pick the first item that satisfies these conditions. |
| 1044 | Found = true; |
| 1045 | ResultGVN = Val; |
| 1046 | break; |
| 1047 | } |
| 1048 | |
| 1049 | assert(Found && "Could not find matching value for source GVN" ); |
| 1050 | (void)Found; |
| 1051 | |
| 1052 | } else |
| 1053 | ResultGVN = *GVNMapping.second.begin(); |
| 1054 | |
| 1055 | // Whatever GVN is found, we mark it as used. |
| 1056 | UsedGVNs.insert(V: ResultGVN); |
| 1057 | |
| 1058 | unsigned CanonNum = *SourceCand.getCanonicalNum(N: ResultGVN); |
| 1059 | CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: SourceGVN)); |
| 1060 | NumberToCanonNum.insert(KV: std::make_pair(x&: SourceGVN, y&: CanonNum)); |
| 1061 | } |
| 1062 | |
| 1063 | DenseSet<BasicBlock *> BBSet; |
| 1064 | getBasicBlocks(BBSet); |
| 1065 | // Find canonical numbers for the BasicBlocks in the current candidate. |
| 1066 | // This is done by finding the corresponding value for the first instruction |
| 1067 | // in the block in the current candidate, finding the matching value in the |
| 1068 | // source candidate. Then by finding the parent of this value, use the |
| 1069 | // canonical number of the block in the source candidate for the canonical |
| 1070 | // number in the current candidate. |
| 1071 | for (BasicBlock *BB : BBSet) { |
| 1072 | unsigned BBGVNForCurrCand = ValueToNumber.find(Val: BB)->second; |
| 1073 | |
| 1074 | // We can skip the BasicBlock if the canonical numbering has already been |
| 1075 | // found in a separate instruction. |
| 1076 | if (NumberToCanonNum.contains(Val: BBGVNForCurrCand)) |
| 1077 | continue; |
| 1078 | |
| 1079 | // If the basic block is the starting block, then the shared instruction may |
| 1080 | // not be the first instruction in the block, it will be the first |
| 1081 | // instruction in the similarity region. |
| 1082 | Value *FirstOutlineInst = BB == getStartBB() |
| 1083 | ? frontInstruction() |
| 1084 | : &*BB->instructionsWithoutDebug().begin(); |
| 1085 | |
| 1086 | unsigned FirstInstGVN = *getGVN(V: FirstOutlineInst); |
| 1087 | unsigned FirstInstCanonNum = *getCanonicalNum(N: FirstInstGVN); |
| 1088 | unsigned SourceGVN = *SourceCand.fromCanonicalNum(N: FirstInstCanonNum); |
| 1089 | Value *SourceV = *SourceCand.fromGVN(Num: SourceGVN); |
| 1090 | BasicBlock *SourceBB = cast<Instruction>(Val: SourceV)->getParent(); |
| 1091 | unsigned SourceBBGVN = *SourceCand.getGVN(V: SourceBB); |
| 1092 | unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(N: SourceBBGVN); |
| 1093 | CanonNumToNumber.insert(KV: std::make_pair(x&: SourceCanonBBGVN, y&: BBGVNForCurrCand)); |
| 1094 | NumberToCanonNum.insert(KV: std::make_pair(x&: BBGVNForCurrCand, y&: SourceCanonBBGVN)); |
| 1095 | } |
| 1096 | } |
| 1097 | |
| 1098 | void IRSimilarityCandidate::createCanonicalRelationFrom( |
| 1099 | IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, |
| 1100 | IRSimilarityCandidate &TargetCandLarge) { |
| 1101 | assert(!SourceCand.CanonNumToNumber.empty() && |
| 1102 | "Canonical Relationship is non-empty" ); |
| 1103 | assert(!SourceCand.NumberToCanonNum.empty() && |
| 1104 | "Canonical Relationship is non-empty" ); |
| 1105 | |
| 1106 | assert(!SourceCandLarge.CanonNumToNumber.empty() && |
| 1107 | "Canonical Relationship is non-empty" ); |
| 1108 | assert(!SourceCandLarge.NumberToCanonNum.empty() && |
| 1109 | "Canonical Relationship is non-empty" ); |
| 1110 | |
| 1111 | assert(!TargetCandLarge.CanonNumToNumber.empty() && |
| 1112 | "Canonical Relationship is non-empty" ); |
| 1113 | assert(!TargetCandLarge.NumberToCanonNum.empty() && |
| 1114 | "Canonical Relationship is non-empty" ); |
| 1115 | |
| 1116 | assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty" ); |
| 1117 | assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty" ); |
| 1118 | |
| 1119 | // We're going to use the larger candidates as a "bridge" to create the |
| 1120 | // canonical number for the target candidate since we have idetified two |
| 1121 | // candidates as subsequences of larger sequences, and therefore must be |
| 1122 | // structurally similar. |
| 1123 | for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) { |
| 1124 | Value *CurrVal = ValueNumPair.first; |
| 1125 | unsigned TargetCandGVN = ValueNumPair.second; |
| 1126 | |
| 1127 | // Find the numbering in the large candidate that surrounds the |
| 1128 | // current candidate. |
| 1129 | std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(V: CurrVal); |
| 1130 | assert(OLargeTargetGVN.has_value() && "GVN not found for Value" ); |
| 1131 | |
| 1132 | // Get the canonical numbering in the large target candidate. |
| 1133 | std::optional<unsigned> OTargetCandCanon = |
| 1134 | TargetCandLarge.getCanonicalNum(N: OLargeTargetGVN.value()); |
| 1135 | assert(OTargetCandCanon.has_value() && |
| 1136 | "Canononical Number not found for GVN" ); |
| 1137 | |
| 1138 | // Get the GVN in the large source candidate from the canonical numbering. |
| 1139 | std::optional<unsigned> OLargeSourceGVN = |
| 1140 | SourceCandLarge.fromCanonicalNum(N: OTargetCandCanon.value()); |
| 1141 | assert(OLargeSourceGVN.has_value() && |
| 1142 | "GVN Number not found for Canonical Number" ); |
| 1143 | |
| 1144 | // Get the Value from the GVN in the large source candidate. |
| 1145 | std::optional<Value *> OLargeSourceV = |
| 1146 | SourceCandLarge.fromGVN(Num: OLargeSourceGVN.value()); |
| 1147 | assert(OLargeSourceV.has_value() && "Value not found for GVN" ); |
| 1148 | |
| 1149 | // Get the GVN number for the Value in the source candidate. |
| 1150 | std::optional<unsigned> OSourceGVN = |
| 1151 | SourceCand.getGVN(V: OLargeSourceV.value()); |
| 1152 | assert(OSourceGVN.has_value() && "GVN Number not found for Value" ); |
| 1153 | |
| 1154 | // Get the canonical numbering from the GVN/ |
| 1155 | std::optional<unsigned> OSourceCanon = |
| 1156 | SourceCand.getCanonicalNum(N: OSourceGVN.value()); |
| 1157 | assert(OSourceCanon.has_value() && "Canon Number not found for GVN" ); |
| 1158 | |
| 1159 | // Insert the canonical numbering and GVN pair into their respective |
| 1160 | // mappings. |
| 1161 | CanonNumToNumber.insert( |
| 1162 | KV: std::make_pair(x&: OSourceCanon.value(), y&: TargetCandGVN)); |
| 1163 | NumberToCanonNum.insert( |
| 1164 | KV: std::make_pair(x&: TargetCandGVN, y&: OSourceCanon.value())); |
| 1165 | } |
| 1166 | } |
| 1167 | |
| 1168 | void IRSimilarityCandidate::createCanonicalMappingFor( |
| 1169 | IRSimilarityCandidate &CurrCand) { |
| 1170 | assert(CurrCand.CanonNumToNumber.size() == 0 && |
| 1171 | "Canonical Relationship is non-empty" ); |
| 1172 | assert(CurrCand.NumberToCanonNum.size() == 0 && |
| 1173 | "Canonical Relationship is non-empty" ); |
| 1174 | |
| 1175 | unsigned CanonNum = 0; |
| 1176 | // Iterate over the value numbers found, the order does not matter in this |
| 1177 | // case. |
| 1178 | for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { |
| 1179 | CurrCand.NumberToCanonNum.insert(KV: std::make_pair(x&: NumToVal.first, y&: CanonNum)); |
| 1180 | CurrCand.CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: NumToVal.first)); |
| 1181 | CanonNum++; |
| 1182 | } |
| 1183 | } |
| 1184 | |
| 1185 | /// Look for larger IRSimilarityCandidates From the previously matched |
| 1186 | /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is |
| 1187 | /// an overlap, return a pair of structurally similar, larger |
| 1188 | /// IRSimilarityCandidates. |
| 1189 | /// |
| 1190 | /// \param [in] CandA - The first candidate we are trying to determine the |
| 1191 | /// structure of. |
| 1192 | /// \param [in] CandB - The second candidate we are trying to determine the |
| 1193 | /// structure of. |
| 1194 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in |
| 1195 | /// a circuit to the IRSimilarityCandidates that include this instruction. |
| 1196 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a |
| 1197 | /// number representing the structural group assigned to it. |
| 1198 | static std::optional< |
| 1199 | std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
| 1200 | CheckLargerCands( |
| 1201 | IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, |
| 1202 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, |
| 1203 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) { |
| 1204 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA; |
| 1205 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB; |
| 1206 | DenseSet<unsigned> IncludedGroupsA; |
| 1207 | DenseSet<unsigned> IncludedGroupsB; |
| 1208 | |
| 1209 | // Find the overall similarity group numbers that fully contain the candidate, |
| 1210 | // and record the larger candidate for each group. |
| 1211 | auto IdxToCandidateIt = IndexToIncludedCand.find(Val: CandA.getStartIdx()); |
| 1212 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
| 1213 | Result; |
| 1214 | |
| 1215 | unsigned CandAStart = CandA.getStartIdx(); |
| 1216 | unsigned CandAEnd = CandA.getEndIdx(); |
| 1217 | unsigned CandBStart = CandB.getStartIdx(); |
| 1218 | unsigned CandBEnd = CandB.getEndIdx(); |
| 1219 | if (IdxToCandidateIt == IndexToIncludedCand.end()) |
| 1220 | return Result; |
| 1221 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { |
| 1222 | if (MatchedCand->getStartIdx() > CandAStart || |
| 1223 | (MatchedCand->getEndIdx() < CandAEnd)) |
| 1224 | continue; |
| 1225 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; |
| 1226 | IncludedGroupAndCandA.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); |
| 1227 | IncludedGroupsA.insert(V: GroupNum); |
| 1228 | } |
| 1229 | |
| 1230 | // Find the overall similarity group numbers that fully contain the next |
| 1231 | // candidate, and record the larger candidate for each group. |
| 1232 | IdxToCandidateIt = IndexToIncludedCand.find(Val: CandBStart); |
| 1233 | if (IdxToCandidateIt == IndexToIncludedCand.end()) |
| 1234 | return Result; |
| 1235 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { |
| 1236 | if (MatchedCand->getStartIdx() > CandBStart || |
| 1237 | MatchedCand->getEndIdx() < CandBEnd) |
| 1238 | continue; |
| 1239 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; |
| 1240 | IncludedGroupAndCandB.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); |
| 1241 | IncludedGroupsB.insert(V: GroupNum); |
| 1242 | } |
| 1243 | |
| 1244 | // Find the intersection between the two groups, these are the groups where |
| 1245 | // the larger candidates exist. |
| 1246 | set_intersect(S1&: IncludedGroupsA, S2: IncludedGroupsB); |
| 1247 | |
| 1248 | // If there is no intersection between the sets, then we cannot determine |
| 1249 | // whether or not there is a match. |
| 1250 | if (IncludedGroupsA.empty()) |
| 1251 | return Result; |
| 1252 | |
| 1253 | // Create a pair that contains the larger candidates. |
| 1254 | auto ItA = IncludedGroupAndCandA.find(Val: *IncludedGroupsA.begin()); |
| 1255 | auto ItB = IncludedGroupAndCandB.find(Val: *IncludedGroupsA.begin()); |
| 1256 | Result = std::make_pair(x&: ItA->second, y&: ItB->second); |
| 1257 | return Result; |
| 1258 | } |
| 1259 | |
| 1260 | /// From the list of IRSimilarityCandidates, perform a comparison between each |
| 1261 | /// IRSimilarityCandidate to determine if there are overlapping |
| 1262 | /// IRInstructionData, or if they do not have the same structure. |
| 1263 | /// |
| 1264 | /// \param [in] CandsForRepSubstring - The vector containing the |
| 1265 | /// IRSimilarityCandidates. |
| 1266 | /// \param [out] StructuralGroups - the mapping of unsigned integers to vector |
| 1267 | /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the |
| 1268 | /// vector are structurally similar to one another. |
| 1269 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in |
| 1270 | /// a circuit to the IRSimilarityCandidates that include this instruction. |
| 1271 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a |
| 1272 | /// number representing the structural group assigned to it. |
| 1273 | static void findCandidateStructures( |
| 1274 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring, |
| 1275 | DenseMap<unsigned, SimilarityGroup> &StructuralGroups, |
| 1276 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, |
| 1277 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup |
| 1278 | ) { |
| 1279 | std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, |
| 1280 | InnerCandEndIt; |
| 1281 | |
| 1282 | // IRSimilarityCandidates each have a structure for operand use. It is |
| 1283 | // possible that two instances of the same subsequences have different |
| 1284 | // structure. Each type of structure found is assigned a number. This |
| 1285 | // DenseMap maps an IRSimilarityCandidate to which type of similarity |
| 1286 | // discovered it fits within. |
| 1287 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; |
| 1288 | |
| 1289 | // Find the compatibility from each candidate to the others to determine |
| 1290 | // which candidates overlap and which have the same structure by mapping |
| 1291 | // each structure to a different group. |
| 1292 | bool SameStructure; |
| 1293 | bool Inserted; |
| 1294 | unsigned CurrentGroupNum = 0; |
| 1295 | unsigned OuterGroupNum; |
| 1296 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; |
| 1297 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; |
| 1298 | DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; |
| 1299 | |
| 1300 | // Iterate over the candidates to determine its structural and overlapping |
| 1301 | // compatibility with other instructions |
| 1302 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; |
| 1303 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; |
| 1304 | for (CandIt = CandsForRepSubstring.begin(), |
| 1305 | CandEndIt = CandsForRepSubstring.end(); |
| 1306 | CandIt != CandEndIt; CandIt++) { |
| 1307 | |
| 1308 | // Determine if it has an assigned structural group already. |
| 1309 | // If not, we assign it one, and add it to our mapping. |
| 1310 | std::tie(args&: CandToGroupIt, args&: Inserted) = |
| 1311 | CandToGroup.try_emplace(Key: &*CandIt, Args&: CurrentGroupNum); |
| 1312 | if (Inserted) |
| 1313 | ++CurrentGroupNum; |
| 1314 | |
| 1315 | // Get the structural group number from the iterator. |
| 1316 | OuterGroupNum = CandToGroupIt->second; |
| 1317 | |
| 1318 | // Check if we already have a list of IRSimilarityCandidates for the current |
| 1319 | // structural group. Create one if one does not exist. |
| 1320 | CurrentGroupPair = StructuralGroups.find(Val: OuterGroupNum); |
| 1321 | if (CurrentGroupPair == StructuralGroups.end()) { |
| 1322 | IRSimilarityCandidate::createCanonicalMappingFor(CurrCand&: *CandIt); |
| 1323 | std::tie(args&: CurrentGroupPair, args&: Inserted) = StructuralGroups.insert( |
| 1324 | KV: std::make_pair(x&: OuterGroupNum, y: SimilarityGroup({*CandIt}))); |
| 1325 | } |
| 1326 | |
| 1327 | // Iterate over the IRSimilarityCandidates following the current |
| 1328 | // IRSimilarityCandidate in the list to determine whether the two |
| 1329 | // IRSimilarityCandidates are compatible. This is so we do not repeat pairs |
| 1330 | // of IRSimilarityCandidates. |
| 1331 | for (InnerCandIt = std::next(x: CandIt), |
| 1332 | InnerCandEndIt = CandsForRepSubstring.end(); |
| 1333 | InnerCandIt != InnerCandEndIt; InnerCandIt++) { |
| 1334 | |
| 1335 | // We check if the inner item has a group already, if it does, we skip it. |
| 1336 | CandToGroupItInner = CandToGroup.find(Val: &*InnerCandIt); |
| 1337 | if (CandToGroupItInner != CandToGroup.end()) |
| 1338 | continue; |
| 1339 | |
| 1340 | // Check if we have found structural similarity between two candidates |
| 1341 | // that fully contains the first and second candidates. |
| 1342 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
| 1343 | LargerPair = CheckLargerCands( |
| 1344 | CandA&: *CandIt, CandB&: *InnerCandIt, IndexToIncludedCand, CandToGroup&: CandToOverallGroup); |
| 1345 | |
| 1346 | // If a pair was found, it means that we can assume that these smaller |
| 1347 | // substrings are also structurally similar. Use the larger candidates to |
| 1348 | // determine the canonical mapping between the two sections. |
| 1349 | if (LargerPair.has_value()) { |
| 1350 | SameStructure = true; |
| 1351 | InnerCandIt->createCanonicalRelationFrom( |
| 1352 | SourceCand&: *CandIt, SourceCandLarge&: *LargerPair.value().first, TargetCandLarge&: *LargerPair.value().second); |
| 1353 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); |
| 1354 | CurrentGroupPair->second.push_back(x: *InnerCandIt); |
| 1355 | continue; |
| 1356 | } |
| 1357 | |
| 1358 | // Otherwise we determine if they have the same structure and add it to |
| 1359 | // vector if they match. |
| 1360 | ValueNumberMappingA.clear(); |
| 1361 | ValueNumberMappingB.clear(); |
| 1362 | SameStructure = IRSimilarityCandidate::compareStructure( |
| 1363 | A: *CandIt, B: *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); |
| 1364 | if (!SameStructure) |
| 1365 | continue; |
| 1366 | |
| 1367 | InnerCandIt->createCanonicalRelationFrom(SourceCand&: *CandIt, ToSourceMapping&: ValueNumberMappingA, |
| 1368 | FromSourceMapping&: ValueNumberMappingB); |
| 1369 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); |
| 1370 | CurrentGroupPair->second.push_back(x: *InnerCandIt); |
| 1371 | } |
| 1372 | } |
| 1373 | } |
| 1374 | |
| 1375 | void IRSimilarityIdentifier::findCandidates( |
| 1376 | std::vector<IRInstructionData *> &InstrList, |
| 1377 | std::vector<unsigned> &IntegerMapping) { |
| 1378 | SuffixTree ST(IntegerMapping); |
| 1379 | |
| 1380 | std::vector<IRSimilarityCandidate> CandsForRepSubstring; |
| 1381 | std::vector<SimilarityGroup> NewCandidateGroups; |
| 1382 | |
| 1383 | DenseMap<unsigned, SimilarityGroup> StructuralGroups; |
| 1384 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand; |
| 1385 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; |
| 1386 | |
| 1387 | // Iterate over the subsequences found by the Suffix Tree to create |
| 1388 | // IRSimilarityCandidates for each repeated subsequence and determine which |
| 1389 | // instances are structurally similar to one another. |
| 1390 | |
| 1391 | // Sort the suffix tree from longest substring to shortest. |
| 1392 | std::vector<SuffixTree::RepeatedSubstring> RSes; |
| 1393 | for (SuffixTree::RepeatedSubstring &RS : ST) |
| 1394 | RSes.push_back(x: RS); |
| 1395 | |
| 1396 | llvm::stable_sort(Range&: RSes, C: [](const SuffixTree::RepeatedSubstring &LHS, |
| 1397 | const SuffixTree::RepeatedSubstring &RHS) { |
| 1398 | return LHS.Length > RHS.Length; |
| 1399 | }); |
| 1400 | for (SuffixTree::RepeatedSubstring &RS : RSes) { |
| 1401 | createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, |
| 1402 | CandsForRepSubstring); |
| 1403 | |
| 1404 | if (CandsForRepSubstring.size() < 2) |
| 1405 | continue; |
| 1406 | |
| 1407 | findCandidateStructures(CandsForRepSubstring, StructuralGroups, |
| 1408 | IndexToIncludedCand, CandToOverallGroup&: CandToGroup); |
| 1409 | for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) { |
| 1410 | // We only add the group if it contains more than one |
| 1411 | // IRSimilarityCandidate. If there is only one, that means there is no |
| 1412 | // other repeated subsequence with the same structure. |
| 1413 | if (Group.second.size() > 1) { |
| 1414 | SimilarityCandidates->push_back(x: Group.second); |
| 1415 | // Iterate over each candidate in the group, and add an entry for each |
| 1416 | // instruction included with a mapping to a set of |
| 1417 | // IRSimilarityCandidates that include that instruction. |
| 1418 | for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { |
| 1419 | for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); |
| 1420 | Idx <= Edx; ++Idx) |
| 1421 | IndexToIncludedCand[Idx].insert(V: &IRCand); |
| 1422 | // Add mapping of candidate to the overall similarity group number. |
| 1423 | CandToGroup.insert( |
| 1424 | KV: std::make_pair(x: &IRCand, y: SimilarityCandidates->size() - 1)); |
| 1425 | } |
| 1426 | } |
| 1427 | } |
| 1428 | |
| 1429 | CandsForRepSubstring.clear(); |
| 1430 | StructuralGroups.clear(); |
| 1431 | NewCandidateGroups.clear(); |
| 1432 | } |
| 1433 | } |
| 1434 | |
| 1435 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( |
| 1436 | ArrayRef<std::unique_ptr<Module>> Modules) { |
| 1437 | resetSimilarityCandidates(); |
| 1438 | |
| 1439 | std::vector<IRInstructionData *> InstrList; |
| 1440 | std::vector<unsigned> IntegerMapping; |
| 1441 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; |
| 1442 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; |
| 1443 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; |
| 1444 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; |
| 1445 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; |
| 1446 | |
| 1447 | populateMapper(Modules, InstrList, IntegerMapping); |
| 1448 | findCandidates(InstrList, IntegerMapping); |
| 1449 | |
| 1450 | return *SimilarityCandidates; |
| 1451 | } |
| 1452 | |
| 1453 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { |
| 1454 | resetSimilarityCandidates(); |
| 1455 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; |
| 1456 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; |
| 1457 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; |
| 1458 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; |
| 1459 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; |
| 1460 | |
| 1461 | std::vector<IRInstructionData *> InstrList; |
| 1462 | std::vector<unsigned> IntegerMapping; |
| 1463 | |
| 1464 | populateMapper(M, InstrList, IntegerMapping); |
| 1465 | findCandidates(InstrList, IntegerMapping); |
| 1466 | |
| 1467 | return *SimilarityCandidates; |
| 1468 | } |
| 1469 | |
| 1470 | INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier" , |
| 1471 | "ir-similarity-identifier" , false, true) |
| 1472 | |
| 1473 | IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() |
| 1474 | : ModulePass(ID) {} |
| 1475 | |
| 1476 | bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { |
| 1477 | IRSI.reset(p: new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, |
| 1478 | MatchCallsByName, !DisableIntrinsics, |
| 1479 | false)); |
| 1480 | return false; |
| 1481 | } |
| 1482 | |
| 1483 | bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { |
| 1484 | IRSI.reset(); |
| 1485 | return false; |
| 1486 | } |
| 1487 | |
| 1488 | bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { |
| 1489 | IRSI->findSimilarity(M); |
| 1490 | return false; |
| 1491 | } |
| 1492 | |
| 1493 | AnalysisKey IRSimilarityAnalysis::Key; |
| 1494 | IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, |
| 1495 | ModuleAnalysisManager &) { |
| 1496 | auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, |
| 1497 | MatchCallsByName, !DisableIntrinsics, |
| 1498 | false); |
| 1499 | IRSI.findSimilarity(M); |
| 1500 | return IRSI; |
| 1501 | } |
| 1502 | |
| 1503 | PreservedAnalyses |
| 1504 | IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { |
| 1505 | IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(IR&: M); |
| 1506 | std::optional<SimilarityGroupList> &SimilarityCandidatesOpt = |
| 1507 | IRSI.getSimilarity(); |
| 1508 | |
| 1509 | for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { |
| 1510 | OS << CandVec.size() << " candidates of length " |
| 1511 | << CandVec.begin()->getLength() << ". Found in: \n" ; |
| 1512 | for (IRSimilarityCandidate &Cand : CandVec) { |
| 1513 | OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() |
| 1514 | << ", Basic Block: " ; |
| 1515 | if (Cand.front()->Inst->getParent()->getName().str() == "" ) |
| 1516 | OS << "(unnamed)" ; |
| 1517 | else |
| 1518 | OS << Cand.front()->Inst->getParent()->getName().str(); |
| 1519 | OS << "\n Start Instruction: " ; |
| 1520 | Cand.frontInstruction()->print(O&: OS); |
| 1521 | OS << "\n End Instruction: " ; |
| 1522 | Cand.backInstruction()->print(O&: OS); |
| 1523 | OS << "\n" ; |
| 1524 | } |
| 1525 | } |
| 1526 | |
| 1527 | return PreservedAnalyses::all(); |
| 1528 | } |
| 1529 | |
| 1530 | char IRSimilarityIdentifierWrapperPass::ID = 0; |
| 1531 | |