| 1 | //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // \file | 
|---|
| 10 | // Implementation file for the IRSimilarityIdentifier for identifying | 
|---|
| 11 | // similarities in IR including the IRInstructionMapper. | 
|---|
| 12 | // | 
|---|
| 13 | //===----------------------------------------------------------------------===// | 
|---|
| 14 |  | 
|---|
| 15 | #include "llvm/Analysis/IRSimilarityIdentifier.h" | 
|---|
| 16 | #include "llvm/ADT/DenseMap.h" | 
|---|
| 17 | #include "llvm/ADT/SetOperations.h" | 
|---|
| 18 | #include "llvm/IR/Intrinsics.h" | 
|---|
| 19 | #include "llvm/IR/Operator.h" | 
|---|
| 20 | #include "llvm/IR/User.h" | 
|---|
| 21 | #include "llvm/InitializePasses.h" | 
|---|
| 22 | #include "llvm/Support/SuffixTree.h" | 
|---|
| 23 |  | 
|---|
| 24 | using namespace llvm; | 
|---|
| 25 | using namespace IRSimilarity; | 
|---|
| 26 |  | 
|---|
| 27 | namespace llvm { | 
|---|
| 28 | cl::opt<bool> | 
|---|
| 29 | DisableBranches( "no-ir-sim-branch-matching", cl::init(Val: false), | 
|---|
| 30 | cl::ReallyHidden, | 
|---|
| 31 | cl::desc( "disable similarity matching, and outlining, " | 
|---|
| 32 | "across branches for debugging purposes.")); | 
|---|
| 33 |  | 
|---|
| 34 | cl::opt<bool> | 
|---|
| 35 | DisableIndirectCalls( "no-ir-sim-indirect-calls", cl::init(Val: false), | 
|---|
| 36 | cl::ReallyHidden, | 
|---|
| 37 | cl::desc( "disable outlining indirect calls.")); | 
|---|
| 38 |  | 
|---|
| 39 | static cl::opt<bool> | 
|---|
| 40 | MatchCallsByName( "ir-sim-calls-by-name", cl::init(Val: false), cl::ReallyHidden, | 
|---|
| 41 | cl::desc( "only allow matching call instructions if the " | 
|---|
| 42 | "name and type signature match.")); | 
|---|
| 43 |  | 
|---|
| 44 | cl::opt<bool> | 
|---|
| 45 | DisableIntrinsics( "no-ir-sim-intrinsics", cl::init(Val: false), cl::ReallyHidden, | 
|---|
| 46 | cl::desc( "Don't match or outline intrinsics")); | 
|---|
| 47 | } // namespace llvm | 
|---|
| 48 |  | 
|---|
| 49 | IRInstructionData::IRInstructionData(Instruction &I, bool Legality, | 
|---|
| 50 | IRInstructionDataList &IDList) | 
|---|
| 51 | : Inst(&I), Legal(Legality), IDL(&IDList) { | 
|---|
| 52 | initializeInstruction(); | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | void IRInstructionData::initializeInstruction() { | 
|---|
| 56 | // We check for whether we have a comparison instruction.  If it is, we | 
|---|
| 57 | // find the "less than" version of the predicate for consistency for | 
|---|
| 58 | // comparison instructions throught the program. | 
|---|
| 59 | if (CmpInst *C = dyn_cast<CmpInst>(Val: Inst)) { | 
|---|
| 60 | CmpInst::Predicate Predicate = predicateForConsistency(CI: C); | 
|---|
| 61 | if (Predicate != C->getPredicate()) | 
|---|
| 62 | RevisedPredicate = Predicate; | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | // Here we collect the operands and their types for determining whether | 
|---|
| 66 | // the structure of the operand use matches between two different candidates. | 
|---|
| 67 | for (Use &OI : Inst->operands()) { | 
|---|
| 68 | if (isa<CmpInst>(Val: Inst) && RevisedPredicate) { | 
|---|
| 69 | // If we have a CmpInst where the predicate is reversed, it means the | 
|---|
| 70 | // operands must be reversed as well. | 
|---|
| 71 | OperVals.insert(I: OperVals.begin(), Elt: OI.get()); | 
|---|
| 72 | continue; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | OperVals.push_back(Elt: OI.get()); | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | // We capture the incoming BasicBlocks as values as well as the incoming | 
|---|
| 79 | // Values in order to check for structural similarity. | 
|---|
| 80 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) | 
|---|
| 81 | llvm::append_range(C&: OperVals, R: PN->blocks()); | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) | 
|---|
| 85 | : IDL(&IDList) {} | 
|---|
| 86 |  | 
|---|
| 87 | void IRInstructionData::setBranchSuccessors( | 
|---|
| 88 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { | 
|---|
| 89 | assert(isa<BranchInst>(Inst) && "Instruction must be branch"); | 
|---|
| 90 |  | 
|---|
| 91 | BranchInst *BI = cast<BranchInst>(Val: Inst); | 
|---|
| 92 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; | 
|---|
| 93 |  | 
|---|
| 94 | BBNumIt = BasicBlockToInteger.find(Val: BI->getParent()); | 
|---|
| 95 | assert(BBNumIt != BasicBlockToInteger.end() && | 
|---|
| 96 | "Could not find location for BasicBlock!"); | 
|---|
| 97 |  | 
|---|
| 98 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); | 
|---|
| 99 |  | 
|---|
| 100 | for (Value *V : getBlockOperVals()) { | 
|---|
| 101 | BasicBlock *Successor = cast<BasicBlock>(Val: V); | 
|---|
| 102 | BBNumIt = BasicBlockToInteger.find(Val: Successor); | 
|---|
| 103 | assert(BBNumIt != BasicBlockToInteger.end() && | 
|---|
| 104 | "Could not find number for BasicBlock!"); | 
|---|
| 105 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); | 
|---|
| 106 |  | 
|---|
| 107 | int Relative = OtherBlockNumber - CurrentBlockNumber; | 
|---|
| 108 | RelativeBlockLocations.push_back(Elt: Relative); | 
|---|
| 109 | } | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | ArrayRef<Value *> IRInstructionData::getBlockOperVals() { | 
|---|
| 113 | assert((isa<BranchInst>(Inst) || | 
|---|
| 114 | isa<PHINode>(Inst)) && "Instruction must be branch or PHINode"); | 
|---|
| 115 |  | 
|---|
| 116 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: Inst)) | 
|---|
| 117 | return ArrayRef<Value *>( | 
|---|
| 118 | std::next(x: OperVals.begin(), n: BI->isConditional() ? 1 : 0), | 
|---|
| 119 | OperVals.end() | 
|---|
| 120 | ); | 
|---|
| 121 |  | 
|---|
| 122 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) | 
|---|
| 123 | return ArrayRef<Value *>( | 
|---|
| 124 | std::next(x: OperVals.begin(), n: PN->getNumIncomingValues()), | 
|---|
| 125 | OperVals.end() | 
|---|
| 126 | ); | 
|---|
| 127 |  | 
|---|
| 128 | return ArrayRef<Value *>(); | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | void IRInstructionData::setCalleeName(bool MatchByName) { | 
|---|
| 132 | CallInst *CI = dyn_cast<CallInst>(Val: Inst); | 
|---|
| 133 | assert(CI && "Instruction must be call"); | 
|---|
| 134 |  | 
|---|
| 135 | CalleeName = ""; | 
|---|
| 136 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) { | 
|---|
| 137 | // To hash intrinsics, we use the opcode, and types like the other | 
|---|
| 138 | // instructions, but also, the Intrinsic ID, and the Name of the | 
|---|
| 139 | // intrinsic. | 
|---|
| 140 | Intrinsic::ID IntrinsicID = II->getIntrinsicID(); | 
|---|
| 141 | FunctionType *FT = II->getFunctionType(); | 
|---|
| 142 | // If there is an overloaded name, we have to use the complex version | 
|---|
| 143 | // of getName to get the entire string. | 
|---|
| 144 | if (Intrinsic::isOverloaded(id: IntrinsicID)) | 
|---|
| 145 | CalleeName = | 
|---|
| 146 | Intrinsic::getName(Id: IntrinsicID, Tys: FT->params(), M: II->getModule(), FT); | 
|---|
| 147 | // If there is not an overloaded name, we only need to use this version. | 
|---|
| 148 | else | 
|---|
| 149 | CalleeName = Intrinsic::getName(id: IntrinsicID).str(); | 
|---|
| 150 |  | 
|---|
| 151 | return; | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | if (!CI->isIndirectCall() && MatchByName) | 
|---|
| 155 | CalleeName = CI->getCalledFunction()->getName().str(); | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | void IRInstructionData::setPHIPredecessors( | 
|---|
| 159 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { | 
|---|
| 160 | assert(isa<PHINode>(Inst) && "Instruction must be phi node"); | 
|---|
| 161 |  | 
|---|
| 162 | PHINode *PN = cast<PHINode>(Val: Inst); | 
|---|
| 163 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; | 
|---|
| 164 |  | 
|---|
| 165 | BBNumIt = BasicBlockToInteger.find(Val: PN->getParent()); | 
|---|
| 166 | assert(BBNumIt != BasicBlockToInteger.end() && | 
|---|
| 167 | "Could not find location for BasicBlock!"); | 
|---|
| 168 |  | 
|---|
| 169 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); | 
|---|
| 170 |  | 
|---|
| 171 | // Convert the incoming blocks of the PHINode to an integer value, based on | 
|---|
| 172 | // the relative distances between the current block and the incoming block. | 
|---|
| 173 | for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { | 
|---|
| 174 | BasicBlock *Incoming = PN->getIncomingBlock(i: Idx); | 
|---|
| 175 | BBNumIt = BasicBlockToInteger.find(Val: Incoming); | 
|---|
| 176 | assert(BBNumIt != BasicBlockToInteger.end() && | 
|---|
| 177 | "Could not find number for BasicBlock!"); | 
|---|
| 178 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); | 
|---|
| 179 |  | 
|---|
| 180 | int Relative = OtherBlockNumber - CurrentBlockNumber; | 
|---|
| 181 | RelativeBlockLocations.push_back(Elt: Relative); | 
|---|
| 182 | } | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { | 
|---|
| 186 | switch (CI->getPredicate()) { | 
|---|
| 187 | case CmpInst::FCMP_OGT: | 
|---|
| 188 | case CmpInst::FCMP_UGT: | 
|---|
| 189 | case CmpInst::FCMP_OGE: | 
|---|
| 190 | case CmpInst::FCMP_UGE: | 
|---|
| 191 | case CmpInst::ICMP_SGT: | 
|---|
| 192 | case CmpInst::ICMP_UGT: | 
|---|
| 193 | case CmpInst::ICMP_SGE: | 
|---|
| 194 | case CmpInst::ICMP_UGE: | 
|---|
| 195 | return CI->getSwappedPredicate(); | 
|---|
| 196 | default: | 
|---|
| 197 | return CI->getPredicate(); | 
|---|
| 198 | } | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | CmpInst::Predicate IRInstructionData::getPredicate() const { | 
|---|
| 202 | assert(isa<CmpInst>(Inst) && | 
|---|
| 203 | "Can only get a predicate from a compare instruction"); | 
|---|
| 204 |  | 
|---|
| 205 | if (RevisedPredicate) | 
|---|
| 206 | return *RevisedPredicate; | 
|---|
| 207 |  | 
|---|
| 208 | return cast<CmpInst>(Val: Inst)->getPredicate(); | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | StringRef IRInstructionData::getCalleeName() const { | 
|---|
| 212 | assert(isa<CallInst>(Inst) && | 
|---|
| 213 | "Can only get a name from a call instruction"); | 
|---|
| 214 |  | 
|---|
| 215 | assert(CalleeName && "CalleeName has not been set"); | 
|---|
| 216 |  | 
|---|
| 217 | return *CalleeName; | 
|---|
| 218 | } | 
|---|
| 219 |  | 
|---|
| 220 | bool IRSimilarity::isClose(const IRInstructionData &A, | 
|---|
| 221 | const IRInstructionData &B) { | 
|---|
| 222 |  | 
|---|
| 223 | if (!A.Legal || !B.Legal) | 
|---|
| 224 | return false; | 
|---|
| 225 |  | 
|---|
| 226 | // Check if we are performing the same sort of operation on the same types | 
|---|
| 227 | // but not on the same values. | 
|---|
| 228 | if (!A.Inst->isSameOperationAs(I: B.Inst)) { | 
|---|
| 229 | // If there is a predicate, this means that either there is a swapped | 
|---|
| 230 | // predicate, or that the types are different, we want to make sure that | 
|---|
| 231 | // the predicates are equivalent via swapping. | 
|---|
| 232 | if (isa<CmpInst>(Val: A.Inst) && isa<CmpInst>(Val: B.Inst)) { | 
|---|
| 233 |  | 
|---|
| 234 | if (A.getPredicate() != B.getPredicate()) | 
|---|
| 235 | return false; | 
|---|
| 236 |  | 
|---|
| 237 | // If the predicates are the same via swap, make sure that the types are | 
|---|
| 238 | // still the same. | 
|---|
| 239 | auto ZippedTypes = zip(t: A.OperVals, u: B.OperVals); | 
|---|
| 240 |  | 
|---|
| 241 | return all_of( | 
|---|
| 242 | Range&: ZippedTypes, P: [](std::tuple<llvm::Value *, llvm::Value *> R) { | 
|---|
| 243 | return std::get<0>(t&: R)->getType() == std::get<1>(t&: R)->getType(); | 
|---|
| 244 | }); | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | return false; | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | // Since any GEP Instruction operands after the first operand cannot be | 
|---|
| 251 | // defined by a register, we must make sure that the operands after the first | 
|---|
| 252 | // are the same in the two instructions | 
|---|
| 253 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: A.Inst)) { | 
|---|
| 254 | auto *OtherGEP = cast<GetElementPtrInst>(Val: B.Inst); | 
|---|
| 255 |  | 
|---|
| 256 | // If the instructions do not have the same inbounds restrictions, we do | 
|---|
| 257 | // not consider them the same. | 
|---|
| 258 | if (GEP->isInBounds() != OtherGEP->isInBounds()) | 
|---|
| 259 | return false; | 
|---|
| 260 |  | 
|---|
| 261 | auto ZippedOperands = zip(t: GEP->indices(), u: OtherGEP->indices()); | 
|---|
| 262 |  | 
|---|
| 263 | // We increment here since we do not care about the first instruction, | 
|---|
| 264 | // we only care about the following operands since they must be the | 
|---|
| 265 | // exact same to be considered similar. | 
|---|
| 266 | return all_of(Range: drop_begin(RangeOrContainer&: ZippedOperands), | 
|---|
| 267 | P: [](std::tuple<llvm::Use &, llvm::Use &> R) { | 
|---|
| 268 | return std::get<0>(t&: R) == std::get<1>(t&: R); | 
|---|
| 269 | }); | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | // If the instructions are functions calls, we make sure that the function | 
|---|
| 273 | // name is the same.  We already know that the types are since is | 
|---|
| 274 | // isSameOperationAs is true. | 
|---|
| 275 | if (isa<CallInst>(Val: A.Inst) && isa<CallInst>(Val: B.Inst)) { | 
|---|
| 276 | if (A.getCalleeName() != B.getCalleeName()) | 
|---|
| 277 | return false; | 
|---|
| 278 | } | 
|---|
| 279 |  | 
|---|
| 280 | if (isa<BranchInst>(Val: A.Inst) && isa<BranchInst>(Val: B.Inst) && | 
|---|
| 281 | A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) | 
|---|
| 282 | return false; | 
|---|
| 283 |  | 
|---|
| 284 | return true; | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | // TODO: This is the same as the MachineOutliner, and should be consolidated | 
|---|
| 288 | // into the same interface. | 
|---|
| 289 | void IRInstructionMapper::convertToUnsignedVec( | 
|---|
| 290 | BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, | 
|---|
| 291 | std::vector<unsigned> &IntegerMapping) { | 
|---|
| 292 | BasicBlock::iterator It = BB.begin(); | 
|---|
| 293 |  | 
|---|
| 294 | std::vector<unsigned> IntegerMappingForBB; | 
|---|
| 295 | std::vector<IRInstructionData *> InstrListForBB; | 
|---|
| 296 |  | 
|---|
| 297 | for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { | 
|---|
| 298 | switch (InstClassifier.visit(I&: *It)) { | 
|---|
| 299 | case InstrType::Legal: | 
|---|
| 300 | mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); | 
|---|
| 301 | break; | 
|---|
| 302 | case InstrType::Illegal: | 
|---|
| 303 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); | 
|---|
| 304 | break; | 
|---|
| 305 | case InstrType::Invisible: | 
|---|
| 306 | AddedIllegalLastTime = false; | 
|---|
| 307 | break; | 
|---|
| 308 | } | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | if (AddedIllegalLastTime) | 
|---|
| 312 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, End: true); | 
|---|
| 313 | for (IRInstructionData *ID : InstrListForBB) | 
|---|
| 314 | this->IDL->push_back(Node&: *ID); | 
|---|
| 315 | llvm::append_range(C&: InstrList, R&: InstrListForBB); | 
|---|
| 316 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForBB); | 
|---|
| 317 | } | 
|---|
| 318 |  | 
|---|
| 319 | // TODO: This is the same as the MachineOutliner, and should be consolidated | 
|---|
| 320 | // into the same interface. | 
|---|
| 321 | unsigned IRInstructionMapper::mapToLegalUnsigned( | 
|---|
| 322 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, | 
|---|
| 323 | std::vector<IRInstructionData *> &InstrListForBB) { | 
|---|
| 324 | // We added something legal, so we should unset the AddedLegalLastTime | 
|---|
| 325 | // flag. | 
|---|
| 326 | AddedIllegalLastTime = false; | 
|---|
| 327 |  | 
|---|
| 328 | // If we have at least two adjacent legal instructions (which may have | 
|---|
| 329 | // invisible instructions in between), remember that. | 
|---|
| 330 | if (CanCombineWithPrevInstr) | 
|---|
| 331 | HaveLegalRange = true; | 
|---|
| 332 | CanCombineWithPrevInstr = true; | 
|---|
| 333 |  | 
|---|
| 334 | // Get the integer for this instruction or give it the current | 
|---|
| 335 | // LegalInstrNumber. | 
|---|
| 336 | IRInstructionData *ID = allocateIRInstructionData(I&: *It, Legality: true, IDL&: *IDL); | 
|---|
| 337 | InstrListForBB.push_back(x: ID); | 
|---|
| 338 |  | 
|---|
| 339 | if (isa<BranchInst>(Val: *It)) | 
|---|
| 340 | ID->setBranchSuccessors(BasicBlockToInteger); | 
|---|
| 341 |  | 
|---|
| 342 | if (isa<CallInst>(Val: *It)) | 
|---|
| 343 | ID->setCalleeName(EnableMatchCallsByName); | 
|---|
| 344 |  | 
|---|
| 345 | if (isa<PHINode>(Val: *It)) | 
|---|
| 346 | ID->setPHIPredecessors(BasicBlockToInteger); | 
|---|
| 347 |  | 
|---|
| 348 | // Add to the instruction list | 
|---|
| 349 | bool WasInserted; | 
|---|
| 350 | DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator | 
|---|
| 351 | ResultIt; | 
|---|
| 352 | std::tie(args&: ResultIt, args&: WasInserted) = | 
|---|
| 353 | InstructionIntegerMap.insert(KV: std::make_pair(x&: ID, y&: LegalInstrNumber)); | 
|---|
| 354 | unsigned INumber = ResultIt->second; | 
|---|
| 355 |  | 
|---|
| 356 | // There was an insertion. | 
|---|
| 357 | if (WasInserted) | 
|---|
| 358 | LegalInstrNumber++; | 
|---|
| 359 |  | 
|---|
| 360 | IntegerMappingForBB.push_back(x: INumber); | 
|---|
| 361 |  | 
|---|
| 362 | // Make sure we don't overflow or use any integers reserved by the DenseMap. | 
|---|
| 363 | assert(LegalInstrNumber < IllegalInstrNumber && | 
|---|
| 364 | "Instruction mapping overflow!"); | 
|---|
| 365 |  | 
|---|
| 366 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && | 
|---|
| 367 | "Tried to assign DenseMap tombstone or empty key to instruction."); | 
|---|
| 368 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && | 
|---|
| 369 | "Tried to assign DenseMap tombstone or empty key to instruction."); | 
|---|
| 370 |  | 
|---|
| 371 | return INumber; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | IRInstructionData * | 
|---|
| 375 | IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, | 
|---|
| 376 | IRInstructionDataList &IDL) { | 
|---|
| 377 | return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); | 
|---|
| 378 | } | 
|---|
| 379 |  | 
|---|
| 380 | IRInstructionData * | 
|---|
| 381 | IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { | 
|---|
| 382 | return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); | 
|---|
| 383 | } | 
|---|
| 384 |  | 
|---|
| 385 | IRInstructionDataList * | 
|---|
| 386 | IRInstructionMapper::allocateIRInstructionDataList() { | 
|---|
| 387 | return new (IDLAllocator->Allocate()) IRInstructionDataList(); | 
|---|
| 388 | } | 
|---|
| 389 |  | 
|---|
| 390 | // TODO: This is the same as the MachineOutliner, and should be consolidated | 
|---|
| 391 | // into the same interface. | 
|---|
| 392 | unsigned IRInstructionMapper::mapToIllegalUnsigned( | 
|---|
| 393 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, | 
|---|
| 394 | std::vector<IRInstructionData *> &InstrListForBB, bool End) { | 
|---|
| 395 | // Can't combine an illegal instruction. Set the flag. | 
|---|
| 396 | CanCombineWithPrevInstr = false; | 
|---|
| 397 |  | 
|---|
| 398 | // Only add one illegal number per range of legal numbers. | 
|---|
| 399 | if (AddedIllegalLastTime) | 
|---|
| 400 | return IllegalInstrNumber; | 
|---|
| 401 |  | 
|---|
| 402 | IRInstructionData *ID = nullptr; | 
|---|
| 403 | if (!End) | 
|---|
| 404 | ID = allocateIRInstructionData(I&: *It, Legality: false, IDL&: *IDL); | 
|---|
| 405 | else | 
|---|
| 406 | ID = allocateIRInstructionData(IDL&: *IDL); | 
|---|
| 407 | InstrListForBB.push_back(x: ID); | 
|---|
| 408 |  | 
|---|
| 409 | // Remember that we added an illegal number last time. | 
|---|
| 410 | AddedIllegalLastTime = true; | 
|---|
| 411 | unsigned INumber = IllegalInstrNumber; | 
|---|
| 412 | IntegerMappingForBB.push_back(x: IllegalInstrNumber--); | 
|---|
| 413 |  | 
|---|
| 414 | assert(LegalInstrNumber < IllegalInstrNumber && | 
|---|
| 415 | "Instruction mapping overflow!"); | 
|---|
| 416 |  | 
|---|
| 417 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && | 
|---|
| 418 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); | 
|---|
| 419 |  | 
|---|
| 420 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && | 
|---|
| 421 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); | 
|---|
| 422 |  | 
|---|
| 423 | return INumber; | 
|---|
| 424 | } | 
|---|
| 425 |  | 
|---|
| 426 | IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, | 
|---|
| 427 | IRInstructionData *FirstInstIt, | 
|---|
| 428 | IRInstructionData *LastInstIt) | 
|---|
| 429 | : StartIdx(StartIdx), Len(Len) { | 
|---|
| 430 |  | 
|---|
| 431 | assert(FirstInstIt != nullptr && "Instruction is nullptr!"); | 
|---|
| 432 | assert(LastInstIt != nullptr && "Instruction is nullptr!"); | 
|---|
| 433 | assert(StartIdx + Len > StartIdx && | 
|---|
| 434 | "Overflow for IRSimilarityCandidate range?"); | 
|---|
| 435 | assert(Len - 1 == static_cast<unsigned>(std::distance( | 
|---|
| 436 | iterator(FirstInstIt), iterator(LastInstIt))) && | 
|---|
| 437 | "Length of the first and last IRInstructionData do not match the " | 
|---|
| 438 | "given length"); | 
|---|
| 439 |  | 
|---|
| 440 | // We iterate over the given instructions, and map each unique value | 
|---|
| 441 | // to a unique number in the IRSimilarityCandidate ValueToNumber and | 
|---|
| 442 | // NumberToValue maps.  A constant get its own value globally, the individual | 
|---|
| 443 | // uses of the constants are not considered to be unique. | 
|---|
| 444 | // | 
|---|
| 445 | // IR:                    Mapping Added: | 
|---|
| 446 | // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2 | 
|---|
| 447 | // %add2 = add i32 %a, %1    %add2 -> 4 | 
|---|
| 448 | // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5 | 
|---|
| 449 | // | 
|---|
| 450 | // when replace with global values, starting from 1, would be | 
|---|
| 451 | // | 
|---|
| 452 | // 3 = add i32 1, 2 | 
|---|
| 453 | // 4 = add i32 1, 3 | 
|---|
| 454 | // 6 = add i32 5, 2 | 
|---|
| 455 | unsigned LocalValNumber = 1; | 
|---|
| 456 | IRInstructionDataList::iterator ID = iterator(*FirstInstIt); | 
|---|
| 457 | for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { | 
|---|
| 458 | // Map the operand values to an unsigned integer if it does not already | 
|---|
| 459 | // have an unsigned integer assigned to it. | 
|---|
| 460 | for (Value *Arg : ID->OperVals) | 
|---|
| 461 | if (ValueToNumber.try_emplace(Key: Arg, Args&: LocalValNumber).second) { | 
|---|
| 462 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: Arg); | 
|---|
| 463 | LocalValNumber++; | 
|---|
| 464 | } | 
|---|
| 465 |  | 
|---|
| 466 | // Mapping the instructions to an unsigned integer if it is not already | 
|---|
| 467 | // exist in the mapping. | 
|---|
| 468 | if (ValueToNumber.try_emplace(Key: ID->Inst, Args&: LocalValNumber).second) { | 
|---|
| 469 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: ID->Inst); | 
|---|
| 470 | LocalValNumber++; | 
|---|
| 471 | } | 
|---|
| 472 | } | 
|---|
| 473 |  | 
|---|
| 474 | // Setting the first and last instruction data pointers for the candidate.  If | 
|---|
| 475 | // we got through the entire for loop without hitting an assert, we know | 
|---|
| 476 | // that both of these instructions are not nullptrs. | 
|---|
| 477 | FirstInst = FirstInstIt; | 
|---|
| 478 | LastInst = LastInstIt; | 
|---|
| 479 |  | 
|---|
| 480 | // Add the basic blocks contained in the set into the global value numbering. | 
|---|
| 481 | DenseSet<BasicBlock *> BBSet; | 
|---|
| 482 | getBasicBlocks(BBSet); | 
|---|
| 483 | for (BasicBlock *BB : BBSet) { | 
|---|
| 484 | if (ValueToNumber.try_emplace(Key: BB, Args&: LocalValNumber).second) { | 
|---|
| 485 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: BB); | 
|---|
| 486 | LocalValNumber++; | 
|---|
| 487 | } | 
|---|
| 488 | } | 
|---|
| 489 | } | 
|---|
| 490 |  | 
|---|
| 491 | bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, | 
|---|
| 492 | const IRSimilarityCandidate &B) { | 
|---|
| 493 | if (A.getLength() != B.getLength()) | 
|---|
| 494 | return false; | 
|---|
| 495 |  | 
|---|
| 496 | auto InstrDataForBoth = | 
|---|
| 497 | zip(t: make_range(x: A.begin(), y: A.end()), u: make_range(x: B.begin(), y: B.end())); | 
|---|
| 498 |  | 
|---|
| 499 | return all_of(Range&: InstrDataForBoth, | 
|---|
| 500 | P: [](std::tuple<IRInstructionData &, IRInstructionData &> R) { | 
|---|
| 501 | IRInstructionData &A = std::get<0>(t&: R); | 
|---|
| 502 | IRInstructionData &B = std::get<1>(t&: R); | 
|---|
| 503 | if (!A.Legal || !B.Legal) | 
|---|
| 504 | return false; | 
|---|
| 505 | return isClose(A, B); | 
|---|
| 506 | }); | 
|---|
| 507 | } | 
|---|
| 508 |  | 
|---|
| 509 | /// Determine if one or more of the assigned global value numbers for the | 
|---|
| 510 | /// operands in \p TargetValueNumbers is in the current mapping set for operand | 
|---|
| 511 | /// numbers in \p SourceOperands.  The set of possible corresponding global | 
|---|
| 512 | /// value numbers are replaced with the most recent version of compatible | 
|---|
| 513 | /// values. | 
|---|
| 514 | /// | 
|---|
| 515 | /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global | 
|---|
| 516 | /// value number for the source IRInstructionCandidate. | 
|---|
| 517 | /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source | 
|---|
| 518 | /// IRSimilarityCandidate global value numbers to a set of possible numbers in | 
|---|
| 519 | /// the target. | 
|---|
| 520 | /// \param [in] SourceOperands - The operands in the original | 
|---|
| 521 | /// IRSimilarityCandidate in the current instruction. | 
|---|
| 522 | /// \param [in] TargetValueNumbers - The global value numbers of the operands in | 
|---|
| 523 | /// the corresponding Instruction in the other IRSimilarityCandidate. | 
|---|
| 524 | /// \returns true if there exists a possible mapping between the source | 
|---|
| 525 | /// Instruction operands and the target Instruction operands, and false if not. | 
|---|
| 526 | static bool checkNumberingAndReplaceCommutative( | 
|---|
| 527 | const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, | 
|---|
| 528 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, | 
|---|
| 529 | ArrayRef<Value *> &SourceOperands, | 
|---|
| 530 | DenseSet<unsigned> &TargetValueNumbers){ | 
|---|
| 531 |  | 
|---|
| 532 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; | 
|---|
| 533 |  | 
|---|
| 534 | unsigned ArgVal; | 
|---|
| 535 | bool WasInserted; | 
|---|
| 536 |  | 
|---|
| 537 | // Iterate over the operands in the source IRSimilarityCandidate to determine | 
|---|
| 538 | // whether there exists an operand in the other IRSimilarityCandidate that | 
|---|
| 539 | // creates a valid mapping of Value to Value between the | 
|---|
| 540 | // IRSimilarityCaniddates. | 
|---|
| 541 | for (Value *V : SourceOperands) { | 
|---|
| 542 | ArgVal = SourceValueToNumberMapping.find(Val: V)->second; | 
|---|
| 543 |  | 
|---|
| 544 | // Instead of finding a current mapping, we attempt to insert a set. | 
|---|
| 545 | std::tie(args&: ValueMappingIt, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( | 
|---|
| 546 | KV: std::make_pair(x&: ArgVal, y&: TargetValueNumbers)); | 
|---|
| 547 |  | 
|---|
| 548 | // We need to iterate over the items in other IRSimilarityCandidate's | 
|---|
| 549 | // Instruction to determine whether there is a valid mapping of | 
|---|
| 550 | // Value to Value. | 
|---|
| 551 | DenseSet<unsigned> NewSet; | 
|---|
| 552 | for (unsigned &Curr : ValueMappingIt->second) | 
|---|
| 553 | // If we can find the value in the mapping, we add it to the new set. | 
|---|
| 554 | if (TargetValueNumbers.contains(V: Curr)) | 
|---|
| 555 | NewSet.insert(V: Curr); | 
|---|
| 556 |  | 
|---|
| 557 | // If we could not find a Value, return 0. | 
|---|
| 558 | if (NewSet.empty()) | 
|---|
| 559 | return false; | 
|---|
| 560 |  | 
|---|
| 561 | // Otherwise replace the old mapping with the newly constructed one. | 
|---|
| 562 | if (NewSet.size() != ValueMappingIt->second.size()) | 
|---|
| 563 | ValueMappingIt->second.swap(RHS&: NewSet); | 
|---|
| 564 |  | 
|---|
| 565 | // We have reached no conclusions about the mapping, and cannot remove | 
|---|
| 566 | // any items from the other operands, so we move to check the next operand. | 
|---|
| 567 | if (ValueMappingIt->second.size() != 1) | 
|---|
| 568 | continue; | 
|---|
| 569 |  | 
|---|
| 570 | unsigned ValToRemove = *ValueMappingIt->second.begin(); | 
|---|
| 571 | // When there is only one item left in the mapping for and operand, remove | 
|---|
| 572 | // the value from the other operands.  If it results in there being no | 
|---|
| 573 | // mapping, return false, it means the mapping is wrong | 
|---|
| 574 | for (Value *InnerV : SourceOperands) { | 
|---|
| 575 | if (V == InnerV) | 
|---|
| 576 | continue; | 
|---|
| 577 |  | 
|---|
| 578 | unsigned InnerVal = SourceValueToNumberMapping.find(Val: InnerV)->second; | 
|---|
| 579 | ValueMappingIt = CurrentSrcTgtNumberMapping.find(Val: InnerVal); | 
|---|
| 580 | if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) | 
|---|
| 581 | continue; | 
|---|
| 582 |  | 
|---|
| 583 | ValueMappingIt->second.erase(V: ValToRemove); | 
|---|
| 584 | if (ValueMappingIt->second.empty()) | 
|---|
| 585 | return false; | 
|---|
| 586 | } | 
|---|
| 587 | } | 
|---|
| 588 |  | 
|---|
| 589 | return true; | 
|---|
| 590 | } | 
|---|
| 591 |  | 
|---|
| 592 | /// Determine if operand number \p TargetArgVal is in the current mapping set | 
|---|
| 593 | /// for operand number \p SourceArgVal. | 
|---|
| 594 | /// | 
|---|
| 595 | /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global | 
|---|
| 596 | /// value numbers from source IRSimilarityCandidate to target | 
|---|
| 597 | /// IRSimilarityCandidate. | 
|---|
| 598 | /// \param [in] SourceArgVal The global value number for an operand in the | 
|---|
| 599 | /// in the original candidate. | 
|---|
| 600 | /// \param [in] TargetArgVal The global value number for the corresponding | 
|---|
| 601 | /// operand in the other candidate. | 
|---|
| 602 | /// \returns True if there exists a mapping and false if not. | 
|---|
| 603 | bool checkNumberingAndReplace( | 
|---|
| 604 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, | 
|---|
| 605 | unsigned SourceArgVal, unsigned TargetArgVal) { | 
|---|
| 606 | // We are given two unsigned integers representing the global values of | 
|---|
| 607 | // the operands in different IRSimilarityCandidates and a current mapping | 
|---|
| 608 | // between the two. | 
|---|
| 609 | // | 
|---|
| 610 | // Source Operand GVN: 1 | 
|---|
| 611 | // Target Operand GVN: 2 | 
|---|
| 612 | // CurrentMapping: {1: {1, 2}} | 
|---|
| 613 | // | 
|---|
| 614 | // Since we have mapping, and the target operand is contained in the set, we | 
|---|
| 615 | // update it to: | 
|---|
| 616 | // CurrentMapping: {1: {2}} | 
|---|
| 617 | // and can return true. But, if the mapping was | 
|---|
| 618 | // CurrentMapping: {1: {3}} | 
|---|
| 619 | // we would return false. | 
|---|
| 620 |  | 
|---|
| 621 | bool WasInserted; | 
|---|
| 622 | DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; | 
|---|
| 623 |  | 
|---|
| 624 | std::tie(args&: Val, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( | 
|---|
| 625 | KV: std::make_pair(x&: SourceArgVal, y: DenseSet<unsigned>({TargetArgVal}))); | 
|---|
| 626 |  | 
|---|
| 627 | // If we created a new mapping, then we are done. | 
|---|
| 628 | if (WasInserted) | 
|---|
| 629 | return true; | 
|---|
| 630 |  | 
|---|
| 631 | // If there is more than one option in the mapping set, and the target value | 
|---|
| 632 | // is included in the mapping set replace that set with one that only includes | 
|---|
| 633 | // the target value, as it is the only valid mapping via the non commutative | 
|---|
| 634 | // instruction. | 
|---|
| 635 |  | 
|---|
| 636 | DenseSet<unsigned> &TargetSet = Val->second; | 
|---|
| 637 | if (TargetSet.size() > 1 && TargetSet.contains(V: TargetArgVal)) { | 
|---|
| 638 | TargetSet.clear(); | 
|---|
| 639 | TargetSet.insert(V: TargetArgVal); | 
|---|
| 640 | return true; | 
|---|
| 641 | } | 
|---|
| 642 |  | 
|---|
| 643 | // Return true if we can find the value in the set. | 
|---|
| 644 | return TargetSet.contains(V: TargetArgVal); | 
|---|
| 645 | } | 
|---|
| 646 |  | 
|---|
| 647 | bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( | 
|---|
| 648 | OperandMapping A, OperandMapping B) { | 
|---|
| 649 | // Iterators to keep track of where we are in the operands for each | 
|---|
| 650 | // Instruction. | 
|---|
| 651 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); | 
|---|
| 652 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); | 
|---|
| 653 | unsigned OperandLength = A.OperVals.size(); | 
|---|
| 654 |  | 
|---|
| 655 | // For each operand, get the value numbering and ensure it is consistent. | 
|---|
| 656 | for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { | 
|---|
| 657 | unsigned OperValA = A.IRSC.ValueToNumber.find(Val: *VItA)->second; | 
|---|
| 658 | unsigned OperValB = B.IRSC.ValueToNumber.find(Val: *VItB)->second; | 
|---|
| 659 |  | 
|---|
| 660 | // Attempt to add a set with only the target value.  If there is no mapping | 
|---|
| 661 | // we can create it here. | 
|---|
| 662 | // | 
|---|
| 663 | // For an instruction like a subtraction: | 
|---|
| 664 | // IRSimilarityCandidateA:  IRSimilarityCandidateB: | 
|---|
| 665 | // %resultA = sub %a, %b    %resultB = sub %d, %e | 
|---|
| 666 | // | 
|---|
| 667 | // We map %a -> %d and %b -> %e. | 
|---|
| 668 | // | 
|---|
| 669 | // And check to see whether their mapping is consistent in | 
|---|
| 670 | // checkNumberingAndReplace. | 
|---|
| 671 |  | 
|---|
| 672 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceArgVal: OperValA, TargetArgVal: OperValB)) | 
|---|
| 673 | return false; | 
|---|
| 674 |  | 
|---|
| 675 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceArgVal: OperValB, TargetArgVal: OperValA)) | 
|---|
| 676 | return false; | 
|---|
| 677 | } | 
|---|
| 678 | return true; | 
|---|
| 679 | } | 
|---|
| 680 |  | 
|---|
| 681 | bool IRSimilarityCandidate::compareCommutativeOperandMapping( | 
|---|
| 682 | OperandMapping A, OperandMapping B) { | 
|---|
| 683 | DenseSet<unsigned> ValueNumbersA; | 
|---|
| 684 | DenseSet<unsigned> ValueNumbersB; | 
|---|
| 685 |  | 
|---|
| 686 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); | 
|---|
| 687 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); | 
|---|
| 688 | unsigned OperandLength = A.OperVals.size(); | 
|---|
| 689 |  | 
|---|
| 690 | // Find the value number sets for the operands. | 
|---|
| 691 | for (unsigned Idx = 0; Idx < OperandLength; | 
|---|
| 692 | Idx++, VItA++, VItB++) { | 
|---|
| 693 | ValueNumbersA.insert(V: A.IRSC.ValueToNumber.find(Val: *VItA)->second); | 
|---|
| 694 | ValueNumbersB.insert(V: B.IRSC.ValueToNumber.find(Val: *VItB)->second); | 
|---|
| 695 | } | 
|---|
| 696 |  | 
|---|
| 697 | // Iterate over the operands in the first IRSimilarityCandidate and make sure | 
|---|
| 698 | // there exists a possible mapping with the operands in the second | 
|---|
| 699 | // IRSimilarityCandidate. | 
|---|
| 700 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: A.IRSC.ValueToNumber, | 
|---|
| 701 | CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceOperands&: A.OperVals, | 
|---|
| 702 | TargetValueNumbers&: ValueNumbersB)) | 
|---|
| 703 | return false; | 
|---|
| 704 |  | 
|---|
| 705 | // Iterate over the operands in the second IRSimilarityCandidate and make sure | 
|---|
| 706 | // there exists a possible mapping with the operands in the first | 
|---|
| 707 | // IRSimilarityCandidate. | 
|---|
| 708 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: B.IRSC.ValueToNumber, | 
|---|
| 709 | CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceOperands&: B.OperVals, | 
|---|
| 710 | TargetValueNumbers&: ValueNumbersA)) | 
|---|
| 711 | return false; | 
|---|
| 712 |  | 
|---|
| 713 | return true; | 
|---|
| 714 | } | 
|---|
| 715 |  | 
|---|
| 716 | bool IRSimilarityCandidate::compareAssignmentMapping( | 
|---|
| 717 | const unsigned InstValA, const unsigned &InstValB, | 
|---|
| 718 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, | 
|---|
| 719 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { | 
|---|
| 720 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; | 
|---|
| 721 | bool WasInserted; | 
|---|
| 722 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( | 
|---|
| 723 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); | 
|---|
| 724 | if (!WasInserted && !ValueMappingIt->second.contains(V: InstValB)) | 
|---|
| 725 | return false; | 
|---|
| 726 | else if (ValueMappingIt->second.size() != 1) { | 
|---|
| 727 | for (unsigned OtherVal : ValueMappingIt->second) { | 
|---|
| 728 | if (OtherVal == InstValB) | 
|---|
| 729 | continue; | 
|---|
| 730 | auto OtherValIt = ValueNumberMappingA.find(Val: OtherVal); | 
|---|
| 731 | if (OtherValIt == ValueNumberMappingA.end()) | 
|---|
| 732 | continue; | 
|---|
| 733 | OtherValIt->second.erase(V: InstValA); | 
|---|
| 734 | } | 
|---|
| 735 | ValueNumberMappingA.erase(I: ValueMappingIt); | 
|---|
| 736 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( | 
|---|
| 737 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); | 
|---|
| 738 | } | 
|---|
| 739 |  | 
|---|
| 740 | return true; | 
|---|
| 741 | } | 
|---|
| 742 |  | 
|---|
| 743 | bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, | 
|---|
| 744 | RelativeLocMapping B) { | 
|---|
| 745 | // Get the basic blocks the label refers to. | 
|---|
| 746 | BasicBlock *ABB = cast<BasicBlock>(Val: A.OperVal); | 
|---|
| 747 | BasicBlock *BBB = cast<BasicBlock>(Val: B.OperVal); | 
|---|
| 748 |  | 
|---|
| 749 | // Get the basic blocks contained in each region. | 
|---|
| 750 | DenseSet<BasicBlock *> BasicBlockA; | 
|---|
| 751 | DenseSet<BasicBlock *> BasicBlockB; | 
|---|
| 752 | A.IRSC.getBasicBlocks(BBSet&: BasicBlockA); | 
|---|
| 753 | B.IRSC.getBasicBlocks(BBSet&: BasicBlockB); | 
|---|
| 754 |  | 
|---|
| 755 | // Determine if the block is contained in the region. | 
|---|
| 756 | bool AContained = BasicBlockA.contains(V: ABB); | 
|---|
| 757 | bool BContained = BasicBlockB.contains(V: BBB); | 
|---|
| 758 |  | 
|---|
| 759 | // Both blocks need to be contained in the region, or both need to be outside | 
|---|
| 760 | // the region. | 
|---|
| 761 | if (AContained != BContained) | 
|---|
| 762 | return false; | 
|---|
| 763 |  | 
|---|
| 764 | // If both are contained, then we need to make sure that the relative | 
|---|
| 765 | // distance to the target blocks are the same. | 
|---|
| 766 | if (AContained) | 
|---|
| 767 | return A.RelativeLocation == B.RelativeLocation; | 
|---|
| 768 | return true; | 
|---|
| 769 | } | 
|---|
| 770 |  | 
|---|
| 771 | bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, | 
|---|
| 772 | const IRSimilarityCandidate &B) { | 
|---|
| 773 | DenseMap<unsigned, DenseSet<unsigned>> MappingA; | 
|---|
| 774 | DenseMap<unsigned, DenseSet<unsigned>> MappingB; | 
|---|
| 775 | return IRSimilarityCandidate::compareStructure(A, B, ValueNumberMappingA&: MappingA, ValueNumberMappingB&: MappingB); | 
|---|
| 776 | } | 
|---|
| 777 |  | 
|---|
| 778 | typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, | 
|---|
| 779 | SmallVector<int, 4> &, ArrayRef<Value *> &, | 
|---|
| 780 | ArrayRef<Value *> &> | 
|---|
| 781 | ZippedRelativeLocationsT; | 
|---|
| 782 |  | 
|---|
| 783 | bool IRSimilarityCandidate::compareStructure( | 
|---|
| 784 | const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, | 
|---|
| 785 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, | 
|---|
| 786 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { | 
|---|
| 787 | if (A.getLength() != B.getLength()) | 
|---|
| 788 | return false; | 
|---|
| 789 |  | 
|---|
| 790 | if (A.ValueToNumber.size() != B.ValueToNumber.size()) | 
|---|
| 791 | return false; | 
|---|
| 792 |  | 
|---|
| 793 | iterator ItA = A.begin(); | 
|---|
| 794 | iterator ItB = B.begin(); | 
|---|
| 795 |  | 
|---|
| 796 | // These ValueNumber Mapping sets create a create a mapping between the values | 
|---|
| 797 | // in one candidate to values in the other candidate.  If we create a set with | 
|---|
| 798 | // one element, and that same element maps to the original element in the | 
|---|
| 799 | // candidate we have a good mapping. | 
|---|
| 800 |  | 
|---|
| 801 | // Iterate over the instructions contained in each candidate | 
|---|
| 802 | unsigned SectionLength = A.getStartIdx() + A.getLength(); | 
|---|
| 803 | for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; | 
|---|
| 804 | ItA++, ItB++, Loc++) { | 
|---|
| 805 | // Make sure the instructions are similar to one another. | 
|---|
| 806 | if (!isClose(A: *ItA, B: *ItB)) | 
|---|
| 807 | return false; | 
|---|
| 808 |  | 
|---|
| 809 | Instruction *IA = ItA->Inst; | 
|---|
| 810 | Instruction *IB = ItB->Inst; | 
|---|
| 811 |  | 
|---|
| 812 | if (!ItA->Legal || !ItB->Legal) | 
|---|
| 813 | return false; | 
|---|
| 814 |  | 
|---|
| 815 | // Get the operand sets for the instructions. | 
|---|
| 816 | ArrayRef<Value *> OperValsA = ItA->OperVals; | 
|---|
| 817 | ArrayRef<Value *> OperValsB = ItB->OperVals; | 
|---|
| 818 |  | 
|---|
| 819 | unsigned InstValA = A.ValueToNumber.find(Val: IA)->second; | 
|---|
| 820 | unsigned InstValB = B.ValueToNumber.find(Val: IB)->second; | 
|---|
| 821 |  | 
|---|
| 822 | // Ensure that the mappings for the instructions exists. | 
|---|
| 823 | if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, | 
|---|
| 824 | ValueNumberMappingB)) | 
|---|
| 825 | return false; | 
|---|
| 826 |  | 
|---|
| 827 | if (!compareAssignmentMapping(InstValA: InstValB, InstValB: InstValA, ValueNumberMappingA&: ValueNumberMappingB, | 
|---|
| 828 | ValueNumberMappingB&: ValueNumberMappingA)) | 
|---|
| 829 | return false; | 
|---|
| 830 |  | 
|---|
| 831 | // We have different paths for commutative instructions and non-commutative | 
|---|
| 832 | // instructions since commutative instructions could allow multiple mappings | 
|---|
| 833 | // to certain values. | 
|---|
| 834 | if (IA->isCommutative() && !isa<FPMathOperator>(Val: IA) && | 
|---|
| 835 | !isa<IntrinsicInst>(Val: IA)) { | 
|---|
| 836 | if (!compareCommutativeOperandMapping( | 
|---|
| 837 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, | 
|---|
| 838 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) | 
|---|
| 839 | return false; | 
|---|
| 840 | continue; | 
|---|
| 841 | } | 
|---|
| 842 |  | 
|---|
| 843 | // Handle the non-commutative cases. | 
|---|
| 844 | if (!compareNonCommutativeOperandMapping( | 
|---|
| 845 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, | 
|---|
| 846 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) | 
|---|
| 847 | return false; | 
|---|
| 848 |  | 
|---|
| 849 | // Here we check that between two corresponding instructions, | 
|---|
| 850 | // when referring to a basic block in the same region, the | 
|---|
| 851 | // relative locations are the same. And, that the instructions refer to | 
|---|
| 852 | // basic blocks outside the region in the same corresponding locations. | 
|---|
| 853 |  | 
|---|
| 854 | // We are able to make the assumption about blocks outside of the region | 
|---|
| 855 | // since the target block labels are considered values and will follow the | 
|---|
| 856 | // same number matching that we defined for the other instructions in the | 
|---|
| 857 | // region.  So, at this point, in each location we target a specific block | 
|---|
| 858 | // outside the region, we are targeting a corresponding block in each | 
|---|
| 859 | // analagous location in the region we are comparing to. | 
|---|
| 860 | if (!(isa<BranchInst>(Val: IA) && isa<BranchInst>(Val: IB)) && | 
|---|
| 861 | !(isa<PHINode>(Val: IA) && isa<PHINode>(Val: IB))) | 
|---|
| 862 | continue; | 
|---|
| 863 |  | 
|---|
| 864 | SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; | 
|---|
| 865 | SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; | 
|---|
| 866 | ArrayRef<Value *> ABL = ItA->getBlockOperVals(); | 
|---|
| 867 | ArrayRef<Value *> BBL = ItB->getBlockOperVals(); | 
|---|
| 868 |  | 
|---|
| 869 | // Check to make sure that the number of operands, and branching locations | 
|---|
| 870 | // between BranchInsts is the same. | 
|---|
| 871 | if (RelBlockLocsA.size() != RelBlockLocsB.size() && | 
|---|
| 872 | ABL.size() != BBL.size()) | 
|---|
| 873 | return false; | 
|---|
| 874 |  | 
|---|
| 875 | assert(RelBlockLocsA.size() == ABL.size() && | 
|---|
| 876 | "Block information vectors not the same size."); | 
|---|
| 877 | assert(RelBlockLocsB.size() == BBL.size() && | 
|---|
| 878 | "Block information vectors not the same size."); | 
|---|
| 879 |  | 
|---|
| 880 | ZippedRelativeLocationsT ZippedRelativeLocations = | 
|---|
| 881 | zip(t&: RelBlockLocsA, u&: RelBlockLocsB, args&: ABL, args&: BBL); | 
|---|
| 882 | if (any_of(Range&: ZippedRelativeLocations, | 
|---|
| 883 | P: [&A, &B](std::tuple<int, int, Value *, Value *> R) { | 
|---|
| 884 | return !checkRelativeLocations( | 
|---|
| 885 | A: {.IRSC: A, .RelativeLocation: std::get<0>(t&: R), .OperVal: std::get<2>(t&: R)}, | 
|---|
| 886 | B: {.IRSC: B, .RelativeLocation: std::get<1>(t&: R), .OperVal: std::get<3>(t&: R)}); | 
|---|
| 887 | })) | 
|---|
| 888 | return false; | 
|---|
| 889 | } | 
|---|
| 890 | return true; | 
|---|
| 891 | } | 
|---|
| 892 |  | 
|---|
| 893 | bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, | 
|---|
| 894 | const IRSimilarityCandidate &B) { | 
|---|
| 895 | auto DoesOverlap = [](const IRSimilarityCandidate &X, | 
|---|
| 896 | const IRSimilarityCandidate &Y) { | 
|---|
| 897 | // Check: | 
|---|
| 898 | // XXXXXX        X starts before Y ends | 
|---|
| 899 | //      YYYYYYY  Y starts after X starts | 
|---|
| 900 | return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; | 
|---|
| 901 | }; | 
|---|
| 902 |  | 
|---|
| 903 | return DoesOverlap(A, B) || DoesOverlap(B, A); | 
|---|
| 904 | } | 
|---|
| 905 |  | 
|---|
| 906 | void IRSimilarityIdentifier::populateMapper( | 
|---|
| 907 | Module &M, std::vector<IRInstructionData *> &InstrList, | 
|---|
| 908 | std::vector<unsigned> &IntegerMapping) { | 
|---|
| 909 |  | 
|---|
| 910 | std::vector<IRInstructionData *> InstrListForModule; | 
|---|
| 911 | std::vector<unsigned> IntegerMappingForModule; | 
|---|
| 912 | // Iterate over the functions in the module to map each Instruction in each | 
|---|
| 913 | // BasicBlock to an unsigned integer. | 
|---|
| 914 | Mapper.initializeForBBs(M); | 
|---|
| 915 |  | 
|---|
| 916 | for (Function &F : M) { | 
|---|
| 917 |  | 
|---|
| 918 | if (F.empty()) | 
|---|
| 919 | continue; | 
|---|
| 920 |  | 
|---|
| 921 | for (BasicBlock &BB : F) { | 
|---|
| 922 |  | 
|---|
| 923 | // BB has potential to have similarity since it has a size greater than 2 | 
|---|
| 924 | // and can therefore match other regions greater than 2. Map it to a list | 
|---|
| 925 | // of unsigned integers. | 
|---|
| 926 | Mapper.convertToUnsignedVec(BB, InstrList&: InstrListForModule, | 
|---|
| 927 | IntegerMapping&: IntegerMappingForModule); | 
|---|
| 928 | } | 
|---|
| 929 |  | 
|---|
| 930 | BasicBlock::iterator It = F.begin()->end(); | 
|---|
| 931 | Mapper.mapToIllegalUnsigned(It, IntegerMappingForBB&: IntegerMappingForModule, InstrListForBB&: InstrListForModule, | 
|---|
| 932 | End: true); | 
|---|
| 933 | if (InstrListForModule.size() > 0) | 
|---|
| 934 | Mapper.IDL->push_back(Node&: *InstrListForModule.back()); | 
|---|
| 935 | } | 
|---|
| 936 |  | 
|---|
| 937 | // Insert the InstrListForModule at the end of the overall InstrList so that | 
|---|
| 938 | // we can have a long InstrList for the entire set of Modules being analyzed. | 
|---|
| 939 | llvm::append_range(C&: InstrList, R&: InstrListForModule); | 
|---|
| 940 | // Do the same as above, but for IntegerMapping. | 
|---|
| 941 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForModule); | 
|---|
| 942 | } | 
|---|
| 943 |  | 
|---|
| 944 | void IRSimilarityIdentifier::populateMapper( | 
|---|
| 945 | ArrayRef<std::unique_ptr<Module>> &Modules, | 
|---|
| 946 | std::vector<IRInstructionData *> &InstrList, | 
|---|
| 947 | std::vector<unsigned> &IntegerMapping) { | 
|---|
| 948 |  | 
|---|
| 949 | // Iterate over, and map the instructions in each module. | 
|---|
| 950 | for (const std::unique_ptr<Module> &M : Modules) | 
|---|
| 951 | populateMapper(M&: *M, InstrList, IntegerMapping); | 
|---|
| 952 | } | 
|---|
| 953 |  | 
|---|
| 954 | /// From a repeated subsequence, find all the different instances of the | 
|---|
| 955 | /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from | 
|---|
| 956 | /// the IRInstructionData in subsequence. | 
|---|
| 957 | /// | 
|---|
| 958 | /// \param [in] Mapper - The instruction mapper for basic correctness checks. | 
|---|
| 959 | /// \param [in] InstrList - The vector that holds the instruction data. | 
|---|
| 960 | /// \param [in] IntegerMapping - The vector that holds the mapped integers. | 
|---|
| 961 | /// \param [out] CandsForRepSubstring - The vector to store the generated | 
|---|
| 962 | /// IRSimilarityCandidates. | 
|---|
| 963 | static void createCandidatesFromSuffixTree( | 
|---|
| 964 | const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, | 
|---|
| 965 | std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, | 
|---|
| 966 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { | 
|---|
| 967 |  | 
|---|
| 968 | unsigned StringLen = RS.Length; | 
|---|
| 969 | if (StringLen < 2) | 
|---|
| 970 | return; | 
|---|
| 971 |  | 
|---|
| 972 | // Create an IRSimilarityCandidate for instance of this subsequence \p RS. | 
|---|
| 973 | for (const unsigned &StartIdx : RS.StartIndices) { | 
|---|
| 974 | unsigned EndIdx = StartIdx + StringLen - 1; | 
|---|
| 975 |  | 
|---|
| 976 | // Check that this subsequence does not contain an illegal instruction. | 
|---|
| 977 | bool ContainsIllegal = false; | 
|---|
| 978 | for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { | 
|---|
| 979 | unsigned Key = IntegerMapping[CurrIdx]; | 
|---|
| 980 | if (Key > Mapper.IllegalInstrNumber) { | 
|---|
| 981 | ContainsIllegal = true; | 
|---|
| 982 | break; | 
|---|
| 983 | } | 
|---|
| 984 | } | 
|---|
| 985 |  | 
|---|
| 986 | // If we have an illegal instruction, we should not create an | 
|---|
| 987 | // IRSimilarityCandidate for this region. | 
|---|
| 988 | if (ContainsIllegal) | 
|---|
| 989 | continue; | 
|---|
| 990 |  | 
|---|
| 991 | // We are getting iterators to the instructions in this region of code | 
|---|
| 992 | // by advancing the start and end indices from the start of the | 
|---|
| 993 | // InstrList. | 
|---|
| 994 | std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); | 
|---|
| 995 | std::advance(i&: StartIt, n: StartIdx); | 
|---|
| 996 | std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); | 
|---|
| 997 | std::advance(i&: EndIt, n: EndIdx); | 
|---|
| 998 |  | 
|---|
| 999 | CandsForRepSubstring.emplace_back(args: StartIdx, args&: StringLen, args&: *StartIt, args&: *EndIt); | 
|---|
| 1000 | } | 
|---|
| 1001 | } | 
|---|
| 1002 |  | 
|---|
| 1003 | void IRSimilarityCandidate::createCanonicalRelationFrom( | 
|---|
| 1004 | IRSimilarityCandidate &SourceCand, | 
|---|
| 1005 | DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, | 
|---|
| 1006 | DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { | 
|---|
| 1007 | assert(SourceCand.CanonNumToNumber.size() != 0 && | 
|---|
| 1008 | "Base canonical relationship is empty!"); | 
|---|
| 1009 | assert(SourceCand.NumberToCanonNum.size() != 0 && | 
|---|
| 1010 | "Base canonical relationship is empty!"); | 
|---|
| 1011 |  | 
|---|
| 1012 | assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); | 
|---|
| 1013 | assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); | 
|---|
| 1014 |  | 
|---|
| 1015 | DenseSet<unsigned> UsedGVNs; | 
|---|
| 1016 | // Iterate over the mappings provided from this candidate to SourceCand.  We | 
|---|
| 1017 | // are then able to map the GVN in this candidate to the same canonical number | 
|---|
| 1018 | // given to the corresponding GVN in SourceCand. | 
|---|
| 1019 | for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { | 
|---|
| 1020 | unsigned SourceGVN = GVNMapping.first; | 
|---|
| 1021 |  | 
|---|
| 1022 | assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); | 
|---|
| 1023 |  | 
|---|
| 1024 | unsigned ResultGVN; | 
|---|
| 1025 | // We need special handling if we have more than one potential value.  This | 
|---|
| 1026 | // means that there are at least two GVNs that could correspond to this GVN. | 
|---|
| 1027 | // This could lead to potential swapping later on, so we make a decision | 
|---|
| 1028 | // here to ensure a one-to-one mapping. | 
|---|
| 1029 | if (GVNMapping.second.size() > 1) { | 
|---|
| 1030 | bool Found = false; | 
|---|
| 1031 | for (unsigned Val : GVNMapping.second) { | 
|---|
| 1032 | // We make sure the target value number hasn't already been reserved. | 
|---|
| 1033 | if (UsedGVNs.contains(V: Val)) | 
|---|
| 1034 | continue; | 
|---|
| 1035 |  | 
|---|
| 1036 | // We make sure that the opposite mapping is still consistent. | 
|---|
| 1037 | DenseMap<unsigned, DenseSet<unsigned>>::iterator It = | 
|---|
| 1038 | FromSourceMapping.find(Val); | 
|---|
| 1039 |  | 
|---|
| 1040 | if (!It->second.contains(V: SourceGVN)) | 
|---|
| 1041 | continue; | 
|---|
| 1042 |  | 
|---|
| 1043 | // We pick the first item that satisfies these conditions. | 
|---|
| 1044 | Found = true; | 
|---|
| 1045 | ResultGVN = Val; | 
|---|
| 1046 | break; | 
|---|
| 1047 | } | 
|---|
| 1048 |  | 
|---|
| 1049 | assert(Found && "Could not find matching value for source GVN"); | 
|---|
| 1050 | (void)Found; | 
|---|
| 1051 |  | 
|---|
| 1052 | } else | 
|---|
| 1053 | ResultGVN = *GVNMapping.second.begin(); | 
|---|
| 1054 |  | 
|---|
| 1055 | // Whatever GVN is found, we mark it as used. | 
|---|
| 1056 | UsedGVNs.insert(V: ResultGVN); | 
|---|
| 1057 |  | 
|---|
| 1058 | unsigned CanonNum = *SourceCand.getCanonicalNum(N: ResultGVN); | 
|---|
| 1059 | CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: SourceGVN)); | 
|---|
| 1060 | NumberToCanonNum.insert(KV: std::make_pair(x&: SourceGVN, y&: CanonNum)); | 
|---|
| 1061 | } | 
|---|
| 1062 |  | 
|---|
| 1063 | DenseSet<BasicBlock *> BBSet; | 
|---|
| 1064 | getBasicBlocks(BBSet); | 
|---|
| 1065 | // Find canonical numbers for the BasicBlocks in the current candidate. | 
|---|
| 1066 | // This is done by finding the corresponding value for the first instruction | 
|---|
| 1067 | // in the block in the current candidate, finding the matching value in the | 
|---|
| 1068 | // source candidate.  Then by finding the parent of this value, use the | 
|---|
| 1069 | // canonical number of the block in the source candidate for the canonical | 
|---|
| 1070 | // number in the current candidate. | 
|---|
| 1071 | for (BasicBlock *BB : BBSet) { | 
|---|
| 1072 | unsigned BBGVNForCurrCand = ValueToNumber.find(Val: BB)->second; | 
|---|
| 1073 |  | 
|---|
| 1074 | // We can skip the BasicBlock if the canonical numbering has already been | 
|---|
| 1075 | // found in a separate instruction. | 
|---|
| 1076 | if (NumberToCanonNum.contains(Val: BBGVNForCurrCand)) | 
|---|
| 1077 | continue; | 
|---|
| 1078 |  | 
|---|
| 1079 | // If the basic block is the starting block, then the shared instruction may | 
|---|
| 1080 | // not be the first instruction in the block, it will be the first | 
|---|
| 1081 | // instruction in the similarity region. | 
|---|
| 1082 | Value *FirstOutlineInst = BB == getStartBB() | 
|---|
| 1083 | ? frontInstruction() | 
|---|
| 1084 | : &*BB->instructionsWithoutDebug().begin(); | 
|---|
| 1085 |  | 
|---|
| 1086 | unsigned FirstInstGVN = *getGVN(V: FirstOutlineInst); | 
|---|
| 1087 | unsigned FirstInstCanonNum = *getCanonicalNum(N: FirstInstGVN); | 
|---|
| 1088 | unsigned SourceGVN = *SourceCand.fromCanonicalNum(N: FirstInstCanonNum); | 
|---|
| 1089 | Value *SourceV = *SourceCand.fromGVN(Num: SourceGVN); | 
|---|
| 1090 | BasicBlock *SourceBB = cast<Instruction>(Val: SourceV)->getParent(); | 
|---|
| 1091 | unsigned SourceBBGVN = *SourceCand.getGVN(V: SourceBB); | 
|---|
| 1092 | unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(N: SourceBBGVN); | 
|---|
| 1093 | CanonNumToNumber.insert(KV: std::make_pair(x&: SourceCanonBBGVN, y&: BBGVNForCurrCand)); | 
|---|
| 1094 | NumberToCanonNum.insert(KV: std::make_pair(x&: BBGVNForCurrCand, y&: SourceCanonBBGVN)); | 
|---|
| 1095 | } | 
|---|
| 1096 | } | 
|---|
| 1097 |  | 
|---|
| 1098 | void IRSimilarityCandidate::createCanonicalRelationFrom( | 
|---|
| 1099 | IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, | 
|---|
| 1100 | IRSimilarityCandidate &TargetCandLarge) { | 
|---|
| 1101 | assert(!SourceCand.CanonNumToNumber.empty() && | 
|---|
| 1102 | "Canonical Relationship is non-empty"); | 
|---|
| 1103 | assert(!SourceCand.NumberToCanonNum.empty() && | 
|---|
| 1104 | "Canonical Relationship is non-empty"); | 
|---|
| 1105 |  | 
|---|
| 1106 | assert(!SourceCandLarge.CanonNumToNumber.empty() && | 
|---|
| 1107 | "Canonical Relationship is non-empty"); | 
|---|
| 1108 | assert(!SourceCandLarge.NumberToCanonNum.empty() && | 
|---|
| 1109 | "Canonical Relationship is non-empty"); | 
|---|
| 1110 |  | 
|---|
| 1111 | assert(!TargetCandLarge.CanonNumToNumber.empty() && | 
|---|
| 1112 | "Canonical Relationship is non-empty"); | 
|---|
| 1113 | assert(!TargetCandLarge.NumberToCanonNum.empty() && | 
|---|
| 1114 | "Canonical Relationship is non-empty"); | 
|---|
| 1115 |  | 
|---|
| 1116 | assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); | 
|---|
| 1117 | assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); | 
|---|
| 1118 |  | 
|---|
| 1119 | // We're going to use the larger candidates as a "bridge" to create the | 
|---|
| 1120 | // canonical number for the target candidate since we have idetified two | 
|---|
| 1121 | // candidates as subsequences of larger sequences, and therefore must be | 
|---|
| 1122 | // structurally similar. | 
|---|
| 1123 | for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) { | 
|---|
| 1124 | Value *CurrVal = ValueNumPair.first; | 
|---|
| 1125 | unsigned TargetCandGVN = ValueNumPair.second; | 
|---|
| 1126 |  | 
|---|
| 1127 | // Find the numbering in the large candidate that surrounds the | 
|---|
| 1128 | // current candidate. | 
|---|
| 1129 | std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(V: CurrVal); | 
|---|
| 1130 | assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); | 
|---|
| 1131 |  | 
|---|
| 1132 | // Get the canonical numbering in the large target candidate. | 
|---|
| 1133 | std::optional<unsigned> OTargetCandCanon = | 
|---|
| 1134 | TargetCandLarge.getCanonicalNum(N: OLargeTargetGVN.value()); | 
|---|
| 1135 | assert(OTargetCandCanon.has_value() && | 
|---|
| 1136 | "Canononical Number not found for GVN"); | 
|---|
| 1137 |  | 
|---|
| 1138 | // Get the GVN in the large source candidate from the canonical numbering. | 
|---|
| 1139 | std::optional<unsigned> OLargeSourceGVN = | 
|---|
| 1140 | SourceCandLarge.fromCanonicalNum(N: OTargetCandCanon.value()); | 
|---|
| 1141 | assert(OLargeSourceGVN.has_value() && | 
|---|
| 1142 | "GVN Number not found for Canonical Number"); | 
|---|
| 1143 |  | 
|---|
| 1144 | // Get the Value from the GVN in the large source candidate. | 
|---|
| 1145 | std::optional<Value *> OLargeSourceV = | 
|---|
| 1146 | SourceCandLarge.fromGVN(Num: OLargeSourceGVN.value()); | 
|---|
| 1147 | assert(OLargeSourceV.has_value() && "Value not found for GVN"); | 
|---|
| 1148 |  | 
|---|
| 1149 | // Get the GVN number for the Value in the source candidate. | 
|---|
| 1150 | std::optional<unsigned> OSourceGVN = | 
|---|
| 1151 | SourceCand.getGVN(V: OLargeSourceV.value()); | 
|---|
| 1152 | assert(OSourceGVN.has_value() && "GVN Number not found for Value"); | 
|---|
| 1153 |  | 
|---|
| 1154 | // Get the canonical numbering from the GVN/ | 
|---|
| 1155 | std::optional<unsigned> OSourceCanon = | 
|---|
| 1156 | SourceCand.getCanonicalNum(N: OSourceGVN.value()); | 
|---|
| 1157 | assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); | 
|---|
| 1158 |  | 
|---|
| 1159 | // Insert the canonical numbering and GVN pair into their respective | 
|---|
| 1160 | // mappings. | 
|---|
| 1161 | CanonNumToNumber.insert( | 
|---|
| 1162 | KV: std::make_pair(x&: OSourceCanon.value(), y&: TargetCandGVN)); | 
|---|
| 1163 | NumberToCanonNum.insert( | 
|---|
| 1164 | KV: std::make_pair(x&: TargetCandGVN, y&: OSourceCanon.value())); | 
|---|
| 1165 | } | 
|---|
| 1166 | } | 
|---|
| 1167 |  | 
|---|
| 1168 | void IRSimilarityCandidate::createCanonicalMappingFor( | 
|---|
| 1169 | IRSimilarityCandidate &CurrCand) { | 
|---|
| 1170 | assert(CurrCand.CanonNumToNumber.size() == 0 && | 
|---|
| 1171 | "Canonical Relationship is non-empty"); | 
|---|
| 1172 | assert(CurrCand.NumberToCanonNum.size() == 0 && | 
|---|
| 1173 | "Canonical Relationship is non-empty"); | 
|---|
| 1174 |  | 
|---|
| 1175 | unsigned CanonNum = 0; | 
|---|
| 1176 | // Iterate over the value numbers found, the order does not matter in this | 
|---|
| 1177 | // case. | 
|---|
| 1178 | for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { | 
|---|
| 1179 | CurrCand.NumberToCanonNum.insert(KV: std::make_pair(x&: NumToVal.first, y&: CanonNum)); | 
|---|
| 1180 | CurrCand.CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: NumToVal.first)); | 
|---|
| 1181 | CanonNum++; | 
|---|
| 1182 | } | 
|---|
| 1183 | } | 
|---|
| 1184 |  | 
|---|
| 1185 | /// Look for larger IRSimilarityCandidates From the previously matched | 
|---|
| 1186 | /// IRSimilarityCandidates that fully contain \p CandA or \p CandB.  If there is | 
|---|
| 1187 | /// an overlap, return a pair of structurally similar, larger | 
|---|
| 1188 | /// IRSimilarityCandidates. | 
|---|
| 1189 | /// | 
|---|
| 1190 | /// \param [in] CandA - The first candidate we are trying to determine the | 
|---|
| 1191 | /// structure of. | 
|---|
| 1192 | /// \param [in] CandB - The second candidate we are trying to determine the | 
|---|
| 1193 | /// structure of. | 
|---|
| 1194 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in | 
|---|
| 1195 | /// a circuit to the IRSimilarityCandidates that include this instruction. | 
|---|
| 1196 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a | 
|---|
| 1197 | /// number representing the structural group assigned to it. | 
|---|
| 1198 | static std::optional< | 
|---|
| 1199 | std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> | 
|---|
| 1200 | CheckLargerCands( | 
|---|
| 1201 | IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, | 
|---|
| 1202 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, | 
|---|
| 1203 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) { | 
|---|
| 1204 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA; | 
|---|
| 1205 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB; | 
|---|
| 1206 | DenseSet<unsigned> IncludedGroupsA; | 
|---|
| 1207 | DenseSet<unsigned> IncludedGroupsB; | 
|---|
| 1208 |  | 
|---|
| 1209 | // Find the overall similarity group numbers that fully contain the candidate, | 
|---|
| 1210 | // and record the larger candidate for each group. | 
|---|
| 1211 | auto IdxToCandidateIt = IndexToIncludedCand.find(Val: CandA.getStartIdx()); | 
|---|
| 1212 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> | 
|---|
| 1213 | Result; | 
|---|
| 1214 |  | 
|---|
| 1215 | unsigned CandAStart = CandA.getStartIdx(); | 
|---|
| 1216 | unsigned CandAEnd = CandA.getEndIdx(); | 
|---|
| 1217 | unsigned CandBStart = CandB.getStartIdx(); | 
|---|
| 1218 | unsigned CandBEnd = CandB.getEndIdx(); | 
|---|
| 1219 | if (IdxToCandidateIt == IndexToIncludedCand.end()) | 
|---|
| 1220 | return Result; | 
|---|
| 1221 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { | 
|---|
| 1222 | if (MatchedCand->getStartIdx() > CandAStart || | 
|---|
| 1223 | (MatchedCand->getEndIdx() < CandAEnd)) | 
|---|
| 1224 | continue; | 
|---|
| 1225 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; | 
|---|
| 1226 | IncludedGroupAndCandA.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); | 
|---|
| 1227 | IncludedGroupsA.insert(V: GroupNum); | 
|---|
| 1228 | } | 
|---|
| 1229 |  | 
|---|
| 1230 | // Find the overall similarity group numbers that fully contain the next | 
|---|
| 1231 | // candidate, and record the larger candidate for each group. | 
|---|
| 1232 | IdxToCandidateIt = IndexToIncludedCand.find(Val: CandBStart); | 
|---|
| 1233 | if (IdxToCandidateIt == IndexToIncludedCand.end()) | 
|---|
| 1234 | return Result; | 
|---|
| 1235 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { | 
|---|
| 1236 | if (MatchedCand->getStartIdx() > CandBStart || | 
|---|
| 1237 | MatchedCand->getEndIdx() < CandBEnd) | 
|---|
| 1238 | continue; | 
|---|
| 1239 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; | 
|---|
| 1240 | IncludedGroupAndCandB.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); | 
|---|
| 1241 | IncludedGroupsB.insert(V: GroupNum); | 
|---|
| 1242 | } | 
|---|
| 1243 |  | 
|---|
| 1244 | // Find the intersection between the two groups, these are the groups where | 
|---|
| 1245 | // the larger candidates exist. | 
|---|
| 1246 | set_intersect(S1&: IncludedGroupsA, S2: IncludedGroupsB); | 
|---|
| 1247 |  | 
|---|
| 1248 | // If there is no intersection between the sets, then we cannot determine | 
|---|
| 1249 | // whether or not there is a match. | 
|---|
| 1250 | if (IncludedGroupsA.empty()) | 
|---|
| 1251 | return Result; | 
|---|
| 1252 |  | 
|---|
| 1253 | // Create a pair that contains the larger candidates. | 
|---|
| 1254 | auto ItA = IncludedGroupAndCandA.find(Val: *IncludedGroupsA.begin()); | 
|---|
| 1255 | auto ItB = IncludedGroupAndCandB.find(Val: *IncludedGroupsA.begin()); | 
|---|
| 1256 | Result = std::make_pair(x&: ItA->second, y&: ItB->second); | 
|---|
| 1257 | return Result; | 
|---|
| 1258 | } | 
|---|
| 1259 |  | 
|---|
| 1260 | /// From the list of IRSimilarityCandidates, perform a comparison between each | 
|---|
| 1261 | /// IRSimilarityCandidate to determine if there are overlapping | 
|---|
| 1262 | /// IRInstructionData, or if they do not have the same structure. | 
|---|
| 1263 | /// | 
|---|
| 1264 | /// \param [in] CandsForRepSubstring - The vector containing the | 
|---|
| 1265 | /// IRSimilarityCandidates. | 
|---|
| 1266 | /// \param [out] StructuralGroups - the mapping of unsigned integers to vector | 
|---|
| 1267 | /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the | 
|---|
| 1268 | /// vector are structurally similar to one another. | 
|---|
| 1269 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in | 
|---|
| 1270 | /// a circuit to the IRSimilarityCandidates that include this instruction. | 
|---|
| 1271 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a | 
|---|
| 1272 | /// number representing the structural group assigned to it. | 
|---|
| 1273 | static void findCandidateStructures( | 
|---|
| 1274 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring, | 
|---|
| 1275 | DenseMap<unsigned, SimilarityGroup> &StructuralGroups, | 
|---|
| 1276 | DenseMap<unsigned,  DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, | 
|---|
| 1277 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup | 
|---|
| 1278 | ) { | 
|---|
| 1279 | std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, | 
|---|
| 1280 | InnerCandEndIt; | 
|---|
| 1281 |  | 
|---|
| 1282 | // IRSimilarityCandidates each have a structure for operand use.  It is | 
|---|
| 1283 | // possible that two instances of the same subsequences have different | 
|---|
| 1284 | // structure. Each type of structure found is assigned a number.  This | 
|---|
| 1285 | // DenseMap maps an IRSimilarityCandidate to which type of similarity | 
|---|
| 1286 | // discovered it fits within. | 
|---|
| 1287 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; | 
|---|
| 1288 |  | 
|---|
| 1289 | // Find the compatibility from each candidate to the others to determine | 
|---|
| 1290 | // which candidates overlap and which have the same structure by mapping | 
|---|
| 1291 | // each structure to a different group. | 
|---|
| 1292 | bool SameStructure; | 
|---|
| 1293 | bool Inserted; | 
|---|
| 1294 | unsigned CurrentGroupNum = 0; | 
|---|
| 1295 | unsigned OuterGroupNum; | 
|---|
| 1296 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; | 
|---|
| 1297 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; | 
|---|
| 1298 | DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; | 
|---|
| 1299 |  | 
|---|
| 1300 | // Iterate over the candidates to determine its structural and overlapping | 
|---|
| 1301 | // compatibility with other instructions | 
|---|
| 1302 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; | 
|---|
| 1303 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; | 
|---|
| 1304 | for (CandIt = CandsForRepSubstring.begin(), | 
|---|
| 1305 | CandEndIt = CandsForRepSubstring.end(); | 
|---|
| 1306 | CandIt != CandEndIt; CandIt++) { | 
|---|
| 1307 |  | 
|---|
| 1308 | // Determine if it has an assigned structural group already. | 
|---|
| 1309 | // If not, we assign it one, and add it to our mapping. | 
|---|
| 1310 | std::tie(args&: CandToGroupIt, args&: Inserted) = | 
|---|
| 1311 | CandToGroup.try_emplace(Key: &*CandIt, Args&: CurrentGroupNum); | 
|---|
| 1312 | if (Inserted) | 
|---|
| 1313 | ++CurrentGroupNum; | 
|---|
| 1314 |  | 
|---|
| 1315 | // Get the structural group number from the iterator. | 
|---|
| 1316 | OuterGroupNum = CandToGroupIt->second; | 
|---|
| 1317 |  | 
|---|
| 1318 | // Check if we already have a list of IRSimilarityCandidates for the current | 
|---|
| 1319 | // structural group.  Create one if one does not exist. | 
|---|
| 1320 | CurrentGroupPair = StructuralGroups.find(Val: OuterGroupNum); | 
|---|
| 1321 | if (CurrentGroupPair == StructuralGroups.end()) { | 
|---|
| 1322 | IRSimilarityCandidate::createCanonicalMappingFor(CurrCand&: *CandIt); | 
|---|
| 1323 | std::tie(args&: CurrentGroupPair, args&: Inserted) = StructuralGroups.insert( | 
|---|
| 1324 | KV: std::make_pair(x&: OuterGroupNum, y: SimilarityGroup({*CandIt}))); | 
|---|
| 1325 | } | 
|---|
| 1326 |  | 
|---|
| 1327 | // Iterate over the IRSimilarityCandidates following the current | 
|---|
| 1328 | // IRSimilarityCandidate in the list to determine whether the two | 
|---|
| 1329 | // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs | 
|---|
| 1330 | // of IRSimilarityCandidates. | 
|---|
| 1331 | for (InnerCandIt = std::next(x: CandIt), | 
|---|
| 1332 | InnerCandEndIt = CandsForRepSubstring.end(); | 
|---|
| 1333 | InnerCandIt != InnerCandEndIt; InnerCandIt++) { | 
|---|
| 1334 |  | 
|---|
| 1335 | // We check if the inner item has a group already, if it does, we skip it. | 
|---|
| 1336 | CandToGroupItInner = CandToGroup.find(Val: &*InnerCandIt); | 
|---|
| 1337 | if (CandToGroupItInner != CandToGroup.end()) | 
|---|
| 1338 | continue; | 
|---|
| 1339 |  | 
|---|
| 1340 | // Check if we have found structural similarity between two candidates | 
|---|
| 1341 | // that fully contains the first and second candidates. | 
|---|
| 1342 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> | 
|---|
| 1343 | LargerPair = CheckLargerCands( | 
|---|
| 1344 | CandA&: *CandIt, CandB&: *InnerCandIt, IndexToIncludedCand, CandToGroup&: CandToOverallGroup); | 
|---|
| 1345 |  | 
|---|
| 1346 | // If a pair was found, it means that we can assume that these smaller | 
|---|
| 1347 | // substrings are also structurally similar.  Use the larger candidates to | 
|---|
| 1348 | // determine the canonical mapping between the two sections. | 
|---|
| 1349 | if (LargerPair.has_value()) { | 
|---|
| 1350 | SameStructure = true; | 
|---|
| 1351 | InnerCandIt->createCanonicalRelationFrom( | 
|---|
| 1352 | SourceCand&: *CandIt, SourceCandLarge&: *LargerPair.value().first, TargetCandLarge&: *LargerPair.value().second); | 
|---|
| 1353 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); | 
|---|
| 1354 | CurrentGroupPair->second.push_back(x: *InnerCandIt); | 
|---|
| 1355 | continue; | 
|---|
| 1356 | } | 
|---|
| 1357 |  | 
|---|
| 1358 | // Otherwise we determine if they have the same structure and add it to | 
|---|
| 1359 | // vector if they match. | 
|---|
| 1360 | ValueNumberMappingA.clear(); | 
|---|
| 1361 | ValueNumberMappingB.clear(); | 
|---|
| 1362 | SameStructure = IRSimilarityCandidate::compareStructure( | 
|---|
| 1363 | A: *CandIt, B: *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); | 
|---|
| 1364 | if (!SameStructure) | 
|---|
| 1365 | continue; | 
|---|
| 1366 |  | 
|---|
| 1367 | InnerCandIt->createCanonicalRelationFrom(SourceCand&: *CandIt, ToSourceMapping&: ValueNumberMappingA, | 
|---|
| 1368 | FromSourceMapping&: ValueNumberMappingB); | 
|---|
| 1369 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); | 
|---|
| 1370 | CurrentGroupPair->second.push_back(x: *InnerCandIt); | 
|---|
| 1371 | } | 
|---|
| 1372 | } | 
|---|
| 1373 | } | 
|---|
| 1374 |  | 
|---|
| 1375 | void IRSimilarityIdentifier::findCandidates( | 
|---|
| 1376 | std::vector<IRInstructionData *> &InstrList, | 
|---|
| 1377 | std::vector<unsigned> &IntegerMapping) { | 
|---|
| 1378 | SuffixTree ST(IntegerMapping); | 
|---|
| 1379 |  | 
|---|
| 1380 | std::vector<IRSimilarityCandidate> CandsForRepSubstring; | 
|---|
| 1381 | std::vector<SimilarityGroup> NewCandidateGroups; | 
|---|
| 1382 |  | 
|---|
| 1383 | DenseMap<unsigned, SimilarityGroup> StructuralGroups; | 
|---|
| 1384 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand; | 
|---|
| 1385 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; | 
|---|
| 1386 |  | 
|---|
| 1387 | // Iterate over the subsequences found by the Suffix Tree to create | 
|---|
| 1388 | // IRSimilarityCandidates for each repeated subsequence and determine which | 
|---|
| 1389 | // instances are structurally similar to one another. | 
|---|
| 1390 |  | 
|---|
| 1391 | // Sort the suffix tree from longest substring to shortest. | 
|---|
| 1392 | std::vector<SuffixTree::RepeatedSubstring> RSes; | 
|---|
| 1393 | for (SuffixTree::RepeatedSubstring &RS : ST) | 
|---|
| 1394 | RSes.push_back(x: RS); | 
|---|
| 1395 |  | 
|---|
| 1396 | llvm::stable_sort(Range&: RSes, C: [](const SuffixTree::RepeatedSubstring &LHS, | 
|---|
| 1397 | const SuffixTree::RepeatedSubstring &RHS) { | 
|---|
| 1398 | return LHS.Length > RHS.Length; | 
|---|
| 1399 | }); | 
|---|
| 1400 | for (SuffixTree::RepeatedSubstring &RS : RSes) { | 
|---|
| 1401 | createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, | 
|---|
| 1402 | CandsForRepSubstring); | 
|---|
| 1403 |  | 
|---|
| 1404 | if (CandsForRepSubstring.size() < 2) | 
|---|
| 1405 | continue; | 
|---|
| 1406 |  | 
|---|
| 1407 | findCandidateStructures(CandsForRepSubstring, StructuralGroups, | 
|---|
| 1408 | IndexToIncludedCand, CandToOverallGroup&: CandToGroup); | 
|---|
| 1409 | for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) { | 
|---|
| 1410 | // We only add the group if it contains more than one | 
|---|
| 1411 | // IRSimilarityCandidate.  If there is only one, that means there is no | 
|---|
| 1412 | // other repeated subsequence with the same structure. | 
|---|
| 1413 | if (Group.second.size() > 1) { | 
|---|
| 1414 | SimilarityCandidates->push_back(x: Group.second); | 
|---|
| 1415 | // Iterate over each candidate in the group, and add an entry for each | 
|---|
| 1416 | // instruction included with a mapping to a set of | 
|---|
| 1417 | // IRSimilarityCandidates that include that instruction. | 
|---|
| 1418 | for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { | 
|---|
| 1419 | for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); | 
|---|
| 1420 | Idx <= Edx; ++Idx) | 
|---|
| 1421 | IndexToIncludedCand[Idx].insert(V: &IRCand); | 
|---|
| 1422 | // Add mapping of candidate to the overall similarity group number. | 
|---|
| 1423 | CandToGroup.insert( | 
|---|
| 1424 | KV: std::make_pair(x: &IRCand, y: SimilarityCandidates->size() - 1)); | 
|---|
| 1425 | } | 
|---|
| 1426 | } | 
|---|
| 1427 | } | 
|---|
| 1428 |  | 
|---|
| 1429 | CandsForRepSubstring.clear(); | 
|---|
| 1430 | StructuralGroups.clear(); | 
|---|
| 1431 | NewCandidateGroups.clear(); | 
|---|
| 1432 | } | 
|---|
| 1433 | } | 
|---|
| 1434 |  | 
|---|
| 1435 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( | 
|---|
| 1436 | ArrayRef<std::unique_ptr<Module>> Modules) { | 
|---|
| 1437 | resetSimilarityCandidates(); | 
|---|
| 1438 |  | 
|---|
| 1439 | std::vector<IRInstructionData *> InstrList; | 
|---|
| 1440 | std::vector<unsigned> IntegerMapping; | 
|---|
| 1441 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; | 
|---|
| 1442 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; | 
|---|
| 1443 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; | 
|---|
| 1444 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; | 
|---|
| 1445 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; | 
|---|
| 1446 |  | 
|---|
| 1447 | populateMapper(Modules, InstrList, IntegerMapping); | 
|---|
| 1448 | findCandidates(InstrList, IntegerMapping); | 
|---|
| 1449 |  | 
|---|
| 1450 | return *SimilarityCandidates; | 
|---|
| 1451 | } | 
|---|
| 1452 |  | 
|---|
| 1453 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { | 
|---|
| 1454 | resetSimilarityCandidates(); | 
|---|
| 1455 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; | 
|---|
| 1456 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; | 
|---|
| 1457 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; | 
|---|
| 1458 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; | 
|---|
| 1459 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; | 
|---|
| 1460 |  | 
|---|
| 1461 | std::vector<IRInstructionData *> InstrList; | 
|---|
| 1462 | std::vector<unsigned> IntegerMapping; | 
|---|
| 1463 |  | 
|---|
| 1464 | populateMapper(M, InstrList, IntegerMapping); | 
|---|
| 1465 | findCandidates(InstrList, IntegerMapping); | 
|---|
| 1466 |  | 
|---|
| 1467 | return *SimilarityCandidates; | 
|---|
| 1468 | } | 
|---|
| 1469 |  | 
|---|
| 1470 | INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", | 
|---|
| 1471 | "ir-similarity-identifier", false, true) | 
|---|
| 1472 |  | 
|---|
| 1473 | IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() | 
|---|
| 1474 | : ModulePass(ID) {} | 
|---|
| 1475 |  | 
|---|
| 1476 | bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { | 
|---|
| 1477 | IRSI.reset(p: new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, | 
|---|
| 1478 | MatchCallsByName, !DisableIntrinsics, | 
|---|
| 1479 | false)); | 
|---|
| 1480 | return false; | 
|---|
| 1481 | } | 
|---|
| 1482 |  | 
|---|
| 1483 | bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { | 
|---|
| 1484 | IRSI.reset(); | 
|---|
| 1485 | return false; | 
|---|
| 1486 | } | 
|---|
| 1487 |  | 
|---|
| 1488 | bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { | 
|---|
| 1489 | IRSI->findSimilarity(M); | 
|---|
| 1490 | return false; | 
|---|
| 1491 | } | 
|---|
| 1492 |  | 
|---|
| 1493 | AnalysisKey IRSimilarityAnalysis::Key; | 
|---|
| 1494 | IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, | 
|---|
| 1495 | ModuleAnalysisManager &) { | 
|---|
| 1496 | auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, | 
|---|
| 1497 | MatchCallsByName, !DisableIntrinsics, | 
|---|
| 1498 | false); | 
|---|
| 1499 | IRSI.findSimilarity(M); | 
|---|
| 1500 | return IRSI; | 
|---|
| 1501 | } | 
|---|
| 1502 |  | 
|---|
| 1503 | PreservedAnalyses | 
|---|
| 1504 | IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { | 
|---|
| 1505 | IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(IR&: M); | 
|---|
| 1506 | std::optional<SimilarityGroupList> &SimilarityCandidatesOpt = | 
|---|
| 1507 | IRSI.getSimilarity(); | 
|---|
| 1508 |  | 
|---|
| 1509 | for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { | 
|---|
| 1510 | OS << CandVec.size() << " candidates of length " | 
|---|
| 1511 | << CandVec.begin()->getLength() << ".  Found in: \n"; | 
|---|
| 1512 | for (IRSimilarityCandidate &Cand : CandVec) { | 
|---|
| 1513 | OS << "  Function: "<< Cand.front()->Inst->getFunction()->getName().str() | 
|---|
| 1514 | << ", Basic Block: "; | 
|---|
| 1515 | if (Cand.front()->Inst->getParent()->getName().str() == "") | 
|---|
| 1516 | OS << "(unnamed)"; | 
|---|
| 1517 | else | 
|---|
| 1518 | OS << Cand.front()->Inst->getParent()->getName().str(); | 
|---|
| 1519 | OS << "\n    Start Instruction: "; | 
|---|
| 1520 | Cand.frontInstruction()->print(O&: OS); | 
|---|
| 1521 | OS << "\n      End Instruction: "; | 
|---|
| 1522 | Cand.backInstruction()->print(O&: OS); | 
|---|
| 1523 | OS << "\n"; | 
|---|
| 1524 | } | 
|---|
| 1525 | } | 
|---|
| 1526 |  | 
|---|
| 1527 | return PreservedAnalyses::all(); | 
|---|
| 1528 | } | 
|---|
| 1529 |  | 
|---|
| 1530 | char IRSimilarityIdentifierWrapperPass::ID = 0; | 
|---|
| 1531 |  | 
|---|