1 | //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // \file |
10 | // Implementation file for the IRSimilarityIdentifier for identifying |
11 | // similarities in IR including the IRInstructionMapper. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/Analysis/IRSimilarityIdentifier.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/SetOperations.h" |
18 | #include "llvm/IR/Intrinsics.h" |
19 | #include "llvm/IR/Operator.h" |
20 | #include "llvm/IR/User.h" |
21 | #include "llvm/InitializePasses.h" |
22 | #include "llvm/Support/SuffixTree.h" |
23 | |
24 | using namespace llvm; |
25 | using namespace IRSimilarity; |
26 | |
27 | namespace llvm { |
28 | cl::opt<bool> |
29 | DisableBranches("no-ir-sim-branch-matching" , cl::init(Val: false), |
30 | cl::ReallyHidden, |
31 | cl::desc("disable similarity matching, and outlining, " |
32 | "across branches for debugging purposes." )); |
33 | |
34 | cl::opt<bool> |
35 | DisableIndirectCalls("no-ir-sim-indirect-calls" , cl::init(Val: false), |
36 | cl::ReallyHidden, |
37 | cl::desc("disable outlining indirect calls." )); |
38 | |
39 | cl::opt<bool> |
40 | MatchCallsByName("ir-sim-calls-by-name" , cl::init(Val: false), cl::ReallyHidden, |
41 | cl::desc("only allow matching call instructions if the " |
42 | "name and type signature match." )); |
43 | |
44 | cl::opt<bool> |
45 | DisableIntrinsics("no-ir-sim-intrinsics" , cl::init(Val: false), cl::ReallyHidden, |
46 | cl::desc("Don't match or outline intrinsics" )); |
47 | } // namespace llvm |
48 | |
49 | IRInstructionData::IRInstructionData(Instruction &I, bool Legality, |
50 | IRInstructionDataList &IDList) |
51 | : Inst(&I), Legal(Legality), IDL(&IDList) { |
52 | initializeInstruction(); |
53 | } |
54 | |
55 | void IRInstructionData::initializeInstruction() { |
56 | // We check for whether we have a comparison instruction. If it is, we |
57 | // find the "less than" version of the predicate for consistency for |
58 | // comparison instructions throught the program. |
59 | if (CmpInst *C = dyn_cast<CmpInst>(Val: Inst)) { |
60 | CmpInst::Predicate Predicate = predicateForConsistency(CI: C); |
61 | if (Predicate != C->getPredicate()) |
62 | RevisedPredicate = Predicate; |
63 | } |
64 | |
65 | // Here we collect the operands and their types for determining whether |
66 | // the structure of the operand use matches between two different candidates. |
67 | for (Use &OI : Inst->operands()) { |
68 | if (isa<CmpInst>(Val: Inst) && RevisedPredicate) { |
69 | // If we have a CmpInst where the predicate is reversed, it means the |
70 | // operands must be reversed as well. |
71 | OperVals.insert(I: OperVals.begin(), Elt: OI.get()); |
72 | continue; |
73 | } |
74 | |
75 | OperVals.push_back(Elt: OI.get()); |
76 | } |
77 | |
78 | // We capture the incoming BasicBlocks as values as well as the incoming |
79 | // Values in order to check for structural similarity. |
80 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) |
81 | for (BasicBlock *BB : PN->blocks()) |
82 | OperVals.push_back(Elt: BB); |
83 | } |
84 | |
85 | IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) |
86 | : IDL(&IDList) {} |
87 | |
88 | void IRInstructionData::setBranchSuccessors( |
89 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { |
90 | assert(isa<BranchInst>(Inst) && "Instruction must be branch" ); |
91 | |
92 | BranchInst *BI = cast<BranchInst>(Val: Inst); |
93 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; |
94 | |
95 | BBNumIt = BasicBlockToInteger.find(Val: BI->getParent()); |
96 | assert(BBNumIt != BasicBlockToInteger.end() && |
97 | "Could not find location for BasicBlock!" ); |
98 | |
99 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); |
100 | |
101 | for (Value *V : getBlockOperVals()) { |
102 | BasicBlock *Successor = cast<BasicBlock>(Val: V); |
103 | BBNumIt = BasicBlockToInteger.find(Val: Successor); |
104 | assert(BBNumIt != BasicBlockToInteger.end() && |
105 | "Could not find number for BasicBlock!" ); |
106 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); |
107 | |
108 | int Relative = OtherBlockNumber - CurrentBlockNumber; |
109 | RelativeBlockLocations.push_back(Elt: Relative); |
110 | } |
111 | } |
112 | |
113 | ArrayRef<Value *> IRInstructionData::getBlockOperVals() { |
114 | assert((isa<BranchInst>(Inst) || |
115 | isa<PHINode>(Inst)) && "Instruction must be branch or PHINode" ); |
116 | |
117 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: Inst)) |
118 | return ArrayRef<Value *>( |
119 | std::next(x: OperVals.begin(), n: BI->isConditional() ? 1 : 0), |
120 | OperVals.end() |
121 | ); |
122 | |
123 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) |
124 | return ArrayRef<Value *>( |
125 | std::next(x: OperVals.begin(), n: PN->getNumIncomingValues()), |
126 | OperVals.end() |
127 | ); |
128 | |
129 | return ArrayRef<Value *>(); |
130 | } |
131 | |
132 | void IRInstructionData::setCalleeName(bool MatchByName) { |
133 | CallInst *CI = dyn_cast<CallInst>(Val: Inst); |
134 | assert(CI && "Instruction must be call" ); |
135 | |
136 | CalleeName = "" ; |
137 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) { |
138 | // To hash intrinsics, we use the opcode, and types like the other |
139 | // instructions, but also, the Intrinsic ID, and the Name of the |
140 | // intrinsic. |
141 | Intrinsic::ID IntrinsicID = II->getIntrinsicID(); |
142 | FunctionType *FT = II->getFunctionType(); |
143 | // If there is an overloaded name, we have to use the complex version |
144 | // of getName to get the entire string. |
145 | if (Intrinsic::isOverloaded(id: IntrinsicID)) |
146 | CalleeName = |
147 | Intrinsic::getName(Id: IntrinsicID, Tys: FT->params(), M: II->getModule(), FT); |
148 | // If there is not an overloaded name, we only need to use this version. |
149 | else |
150 | CalleeName = Intrinsic::getName(id: IntrinsicID).str(); |
151 | |
152 | return; |
153 | } |
154 | |
155 | if (!CI->isIndirectCall() && MatchByName) |
156 | CalleeName = CI->getCalledFunction()->getName().str(); |
157 | } |
158 | |
159 | void IRInstructionData::setPHIPredecessors( |
160 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { |
161 | assert(isa<PHINode>(Inst) && "Instruction must be phi node" ); |
162 | |
163 | PHINode *PN = cast<PHINode>(Val: Inst); |
164 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; |
165 | |
166 | BBNumIt = BasicBlockToInteger.find(Val: PN->getParent()); |
167 | assert(BBNumIt != BasicBlockToInteger.end() && |
168 | "Could not find location for BasicBlock!" ); |
169 | |
170 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); |
171 | |
172 | // Convert the incoming blocks of the PHINode to an integer value, based on |
173 | // the relative distances between the current block and the incoming block. |
174 | for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { |
175 | BasicBlock *Incoming = PN->getIncomingBlock(i: Idx); |
176 | BBNumIt = BasicBlockToInteger.find(Val: Incoming); |
177 | assert(BBNumIt != BasicBlockToInteger.end() && |
178 | "Could not find number for BasicBlock!" ); |
179 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); |
180 | |
181 | int Relative = OtherBlockNumber - CurrentBlockNumber; |
182 | RelativeBlockLocations.push_back(Elt: Relative); |
183 | } |
184 | } |
185 | |
186 | CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { |
187 | switch (CI->getPredicate()) { |
188 | case CmpInst::FCMP_OGT: |
189 | case CmpInst::FCMP_UGT: |
190 | case CmpInst::FCMP_OGE: |
191 | case CmpInst::FCMP_UGE: |
192 | case CmpInst::ICMP_SGT: |
193 | case CmpInst::ICMP_UGT: |
194 | case CmpInst::ICMP_SGE: |
195 | case CmpInst::ICMP_UGE: |
196 | return CI->getSwappedPredicate(); |
197 | default: |
198 | return CI->getPredicate(); |
199 | } |
200 | } |
201 | |
202 | CmpInst::Predicate IRInstructionData::getPredicate() const { |
203 | assert(isa<CmpInst>(Inst) && |
204 | "Can only get a predicate from a compare instruction" ); |
205 | |
206 | if (RevisedPredicate) |
207 | return *RevisedPredicate; |
208 | |
209 | return cast<CmpInst>(Val: Inst)->getPredicate(); |
210 | } |
211 | |
212 | StringRef IRInstructionData::getCalleeName() const { |
213 | assert(isa<CallInst>(Inst) && |
214 | "Can only get a name from a call instruction" ); |
215 | |
216 | assert(CalleeName && "CalleeName has not been set" ); |
217 | |
218 | return *CalleeName; |
219 | } |
220 | |
221 | bool IRSimilarity::isClose(const IRInstructionData &A, |
222 | const IRInstructionData &B) { |
223 | |
224 | if (!A.Legal || !B.Legal) |
225 | return false; |
226 | |
227 | // Check if we are performing the same sort of operation on the same types |
228 | // but not on the same values. |
229 | if (!A.Inst->isSameOperationAs(I: B.Inst)) { |
230 | // If there is a predicate, this means that either there is a swapped |
231 | // predicate, or that the types are different, we want to make sure that |
232 | // the predicates are equivalent via swapping. |
233 | if (isa<CmpInst>(Val: A.Inst) && isa<CmpInst>(Val: B.Inst)) { |
234 | |
235 | if (A.getPredicate() != B.getPredicate()) |
236 | return false; |
237 | |
238 | // If the predicates are the same via swap, make sure that the types are |
239 | // still the same. |
240 | auto ZippedTypes = zip(t: A.OperVals, u: B.OperVals); |
241 | |
242 | return all_of( |
243 | Range&: ZippedTypes, P: [](std::tuple<llvm::Value *, llvm::Value *> R) { |
244 | return std::get<0>(t&: R)->getType() == std::get<1>(t&: R)->getType(); |
245 | }); |
246 | } |
247 | |
248 | return false; |
249 | } |
250 | |
251 | // Since any GEP Instruction operands after the first operand cannot be |
252 | // defined by a register, we must make sure that the operands after the first |
253 | // are the same in the two instructions |
254 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: A.Inst)) { |
255 | auto *OtherGEP = cast<GetElementPtrInst>(Val: B.Inst); |
256 | |
257 | // If the instructions do not have the same inbounds restrictions, we do |
258 | // not consider them the same. |
259 | if (GEP->isInBounds() != OtherGEP->isInBounds()) |
260 | return false; |
261 | |
262 | auto ZippedOperands = zip(t: GEP->indices(), u: OtherGEP->indices()); |
263 | |
264 | // We increment here since we do not care about the first instruction, |
265 | // we only care about the following operands since they must be the |
266 | // exact same to be considered similar. |
267 | return all_of(Range: drop_begin(RangeOrContainer&: ZippedOperands), |
268 | P: [](std::tuple<llvm::Use &, llvm::Use &> R) { |
269 | return std::get<0>(t&: R) == std::get<1>(t&: R); |
270 | }); |
271 | } |
272 | |
273 | // If the instructions are functions calls, we make sure that the function |
274 | // name is the same. We already know that the types are since is |
275 | // isSameOperationAs is true. |
276 | if (isa<CallInst>(Val: A.Inst) && isa<CallInst>(Val: B.Inst)) { |
277 | if (A.getCalleeName() != B.getCalleeName()) |
278 | return false; |
279 | } |
280 | |
281 | if (isa<BranchInst>(Val: A.Inst) && isa<BranchInst>(Val: B.Inst) && |
282 | A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) |
283 | return false; |
284 | |
285 | return true; |
286 | } |
287 | |
288 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
289 | // into the same interface. |
290 | void IRInstructionMapper::convertToUnsignedVec( |
291 | BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, |
292 | std::vector<unsigned> &IntegerMapping) { |
293 | BasicBlock::iterator It = BB.begin(); |
294 | |
295 | std::vector<unsigned> IntegerMappingForBB; |
296 | std::vector<IRInstructionData *> InstrListForBB; |
297 | |
298 | for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { |
299 | switch (InstClassifier.visit(I&: *It)) { |
300 | case InstrType::Legal: |
301 | mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); |
302 | break; |
303 | case InstrType::Illegal: |
304 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); |
305 | break; |
306 | case InstrType::Invisible: |
307 | AddedIllegalLastTime = false; |
308 | break; |
309 | } |
310 | } |
311 | |
312 | if (AddedIllegalLastTime) |
313 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, End: true); |
314 | for (IRInstructionData *ID : InstrListForBB) |
315 | this->IDL->push_back(Node&: *ID); |
316 | llvm::append_range(C&: InstrList, R&: InstrListForBB); |
317 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForBB); |
318 | } |
319 | |
320 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
321 | // into the same interface. |
322 | unsigned IRInstructionMapper::mapToLegalUnsigned( |
323 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
324 | std::vector<IRInstructionData *> &InstrListForBB) { |
325 | // We added something legal, so we should unset the AddedLegalLastTime |
326 | // flag. |
327 | AddedIllegalLastTime = false; |
328 | |
329 | // If we have at least two adjacent legal instructions (which may have |
330 | // invisible instructions in between), remember that. |
331 | if (CanCombineWithPrevInstr) |
332 | HaveLegalRange = true; |
333 | CanCombineWithPrevInstr = true; |
334 | |
335 | // Get the integer for this instruction or give it the current |
336 | // LegalInstrNumber. |
337 | IRInstructionData *ID = allocateIRInstructionData(I&: *It, Legality: true, IDL&: *IDL); |
338 | InstrListForBB.push_back(x: ID); |
339 | |
340 | if (isa<BranchInst>(Val: *It)) |
341 | ID->setBranchSuccessors(BasicBlockToInteger); |
342 | |
343 | if (isa<CallInst>(Val: *It)) |
344 | ID->setCalleeName(EnableMatchCallsByName); |
345 | |
346 | if (isa<PHINode>(Val: *It)) |
347 | ID->setPHIPredecessors(BasicBlockToInteger); |
348 | |
349 | // Add to the instruction list |
350 | bool WasInserted; |
351 | DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator |
352 | ResultIt; |
353 | std::tie(args&: ResultIt, args&: WasInserted) = |
354 | InstructionIntegerMap.insert(KV: std::make_pair(x&: ID, y&: LegalInstrNumber)); |
355 | unsigned INumber = ResultIt->second; |
356 | |
357 | // There was an insertion. |
358 | if (WasInserted) |
359 | LegalInstrNumber++; |
360 | |
361 | IntegerMappingForBB.push_back(x: INumber); |
362 | |
363 | // Make sure we don't overflow or use any integers reserved by the DenseMap. |
364 | assert(LegalInstrNumber < IllegalInstrNumber && |
365 | "Instruction mapping overflow!" ); |
366 | |
367 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
368 | "Tried to assign DenseMap tombstone or empty key to instruction." ); |
369 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
370 | "Tried to assign DenseMap tombstone or empty key to instruction." ); |
371 | |
372 | return INumber; |
373 | } |
374 | |
375 | IRInstructionData * |
376 | IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, |
377 | IRInstructionDataList &IDL) { |
378 | return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); |
379 | } |
380 | |
381 | IRInstructionData * |
382 | IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { |
383 | return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); |
384 | } |
385 | |
386 | IRInstructionDataList * |
387 | IRInstructionMapper::allocateIRInstructionDataList() { |
388 | return new (IDLAllocator->Allocate()) IRInstructionDataList(); |
389 | } |
390 | |
391 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
392 | // into the same interface. |
393 | unsigned IRInstructionMapper::mapToIllegalUnsigned( |
394 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
395 | std::vector<IRInstructionData *> &InstrListForBB, bool End) { |
396 | // Can't combine an illegal instruction. Set the flag. |
397 | CanCombineWithPrevInstr = false; |
398 | |
399 | // Only add one illegal number per range of legal numbers. |
400 | if (AddedIllegalLastTime) |
401 | return IllegalInstrNumber; |
402 | |
403 | IRInstructionData *ID = nullptr; |
404 | if (!End) |
405 | ID = allocateIRInstructionData(I&: *It, Legality: false, IDL&: *IDL); |
406 | else |
407 | ID = allocateIRInstructionData(IDL&: *IDL); |
408 | InstrListForBB.push_back(x: ID); |
409 | |
410 | // Remember that we added an illegal number last time. |
411 | AddedIllegalLastTime = true; |
412 | unsigned INumber = IllegalInstrNumber; |
413 | IntegerMappingForBB.push_back(x: IllegalInstrNumber--); |
414 | |
415 | assert(LegalInstrNumber < IllegalInstrNumber && |
416 | "Instruction mapping overflow!" ); |
417 | |
418 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
419 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!" ); |
420 | |
421 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
422 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!" ); |
423 | |
424 | return INumber; |
425 | } |
426 | |
427 | IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, |
428 | IRInstructionData *FirstInstIt, |
429 | IRInstructionData *LastInstIt) |
430 | : StartIdx(StartIdx), Len(Len) { |
431 | |
432 | assert(FirstInstIt != nullptr && "Instruction is nullptr!" ); |
433 | assert(LastInstIt != nullptr && "Instruction is nullptr!" ); |
434 | assert(StartIdx + Len > StartIdx && |
435 | "Overflow for IRSimilarityCandidate range?" ); |
436 | assert(Len - 1 == static_cast<unsigned>(std::distance( |
437 | iterator(FirstInstIt), iterator(LastInstIt))) && |
438 | "Length of the first and last IRInstructionData do not match the " |
439 | "given length" ); |
440 | |
441 | // We iterate over the given instructions, and map each unique value |
442 | // to a unique number in the IRSimilarityCandidate ValueToNumber and |
443 | // NumberToValue maps. A constant get its own value globally, the individual |
444 | // uses of the constants are not considered to be unique. |
445 | // |
446 | // IR: Mapping Added: |
447 | // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 |
448 | // %add2 = add i32 %a, %1 %add2 -> 4 |
449 | // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 |
450 | // |
451 | // when replace with global values, starting from 1, would be |
452 | // |
453 | // 3 = add i32 1, 2 |
454 | // 4 = add i32 1, 3 |
455 | // 6 = add i32 5, 2 |
456 | unsigned LocalValNumber = 1; |
457 | IRInstructionDataList::iterator ID = iterator(*FirstInstIt); |
458 | for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { |
459 | // Map the operand values to an unsigned integer if it does not already |
460 | // have an unsigned integer assigned to it. |
461 | for (Value *Arg : ID->OperVals) |
462 | if (!ValueToNumber.contains(Val: Arg)) { |
463 | ValueToNumber.try_emplace(Key: Arg, Args&: LocalValNumber); |
464 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: Arg); |
465 | LocalValNumber++; |
466 | } |
467 | |
468 | // Mapping the instructions to an unsigned integer if it is not already |
469 | // exist in the mapping. |
470 | if (!ValueToNumber.contains(Val: ID->Inst)) { |
471 | ValueToNumber.try_emplace(Key: ID->Inst, Args&: LocalValNumber); |
472 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: ID->Inst); |
473 | LocalValNumber++; |
474 | } |
475 | } |
476 | |
477 | // Setting the first and last instruction data pointers for the candidate. If |
478 | // we got through the entire for loop without hitting an assert, we know |
479 | // that both of these instructions are not nullptrs. |
480 | FirstInst = FirstInstIt; |
481 | LastInst = LastInstIt; |
482 | |
483 | // Add the basic blocks contained in the set into the global value numbering. |
484 | DenseSet<BasicBlock *> BBSet; |
485 | getBasicBlocks(BBSet); |
486 | for (BasicBlock *BB : BBSet) { |
487 | if (ValueToNumber.contains(Val: BB)) |
488 | continue; |
489 | |
490 | ValueToNumber.try_emplace(Key: BB, Args&: LocalValNumber); |
491 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: BB); |
492 | LocalValNumber++; |
493 | } |
494 | } |
495 | |
496 | bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, |
497 | const IRSimilarityCandidate &B) { |
498 | if (A.getLength() != B.getLength()) |
499 | return false; |
500 | |
501 | auto InstrDataForBoth = |
502 | zip(t: make_range(x: A.begin(), y: A.end()), u: make_range(x: B.begin(), y: B.end())); |
503 | |
504 | return all_of(Range&: InstrDataForBoth, |
505 | P: [](std::tuple<IRInstructionData &, IRInstructionData &> R) { |
506 | IRInstructionData &A = std::get<0>(t&: R); |
507 | IRInstructionData &B = std::get<1>(t&: R); |
508 | if (!A.Legal || !B.Legal) |
509 | return false; |
510 | return isClose(A, B); |
511 | }); |
512 | } |
513 | |
514 | /// Determine if one or more of the assigned global value numbers for the |
515 | /// operands in \p TargetValueNumbers is in the current mapping set for operand |
516 | /// numbers in \p SourceOperands. The set of possible corresponding global |
517 | /// value numbers are replaced with the most recent version of compatible |
518 | /// values. |
519 | /// |
520 | /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global |
521 | /// value number for the source IRInstructionCandidate. |
522 | /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source |
523 | /// IRSimilarityCandidate global value numbers to a set of possible numbers in |
524 | /// the target. |
525 | /// \param [in] SourceOperands - The operands in the original |
526 | /// IRSimilarityCandidate in the current instruction. |
527 | /// \param [in] TargetValueNumbers - The global value numbers of the operands in |
528 | /// the corresponding Instruction in the other IRSimilarityCandidate. |
529 | /// \returns true if there exists a possible mapping between the source |
530 | /// Instruction operands and the target Instruction operands, and false if not. |
531 | static bool checkNumberingAndReplaceCommutative( |
532 | const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, |
533 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, |
534 | ArrayRef<Value *> &SourceOperands, |
535 | DenseSet<unsigned> &TargetValueNumbers){ |
536 | |
537 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; |
538 | |
539 | unsigned ArgVal; |
540 | bool WasInserted; |
541 | |
542 | // Iterate over the operands in the source IRSimilarityCandidate to determine |
543 | // whether there exists an operand in the other IRSimilarityCandidate that |
544 | // creates a valid mapping of Value to Value between the |
545 | // IRSimilarityCaniddates. |
546 | for (Value *V : SourceOperands) { |
547 | ArgVal = SourceValueToNumberMapping.find(Val: V)->second; |
548 | |
549 | // Instead of finding a current mapping, we attempt to insert a set. |
550 | std::tie(args&: ValueMappingIt, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( |
551 | KV: std::make_pair(x&: ArgVal, y&: TargetValueNumbers)); |
552 | |
553 | // We need to iterate over the items in other IRSimilarityCandidate's |
554 | // Instruction to determine whether there is a valid mapping of |
555 | // Value to Value. |
556 | DenseSet<unsigned> NewSet; |
557 | for (unsigned &Curr : ValueMappingIt->second) |
558 | // If we can find the value in the mapping, we add it to the new set. |
559 | if (TargetValueNumbers.contains(V: Curr)) |
560 | NewSet.insert(V: Curr); |
561 | |
562 | // If we could not find a Value, return 0. |
563 | if (NewSet.empty()) |
564 | return false; |
565 | |
566 | // Otherwise replace the old mapping with the newly constructed one. |
567 | if (NewSet.size() != ValueMappingIt->second.size()) |
568 | ValueMappingIt->second.swap(RHS&: NewSet); |
569 | |
570 | // We have reached no conclusions about the mapping, and cannot remove |
571 | // any items from the other operands, so we move to check the next operand. |
572 | if (ValueMappingIt->second.size() != 1) |
573 | continue; |
574 | |
575 | unsigned ValToRemove = *ValueMappingIt->second.begin(); |
576 | // When there is only one item left in the mapping for and operand, remove |
577 | // the value from the other operands. If it results in there being no |
578 | // mapping, return false, it means the mapping is wrong |
579 | for (Value *InnerV : SourceOperands) { |
580 | if (V == InnerV) |
581 | continue; |
582 | |
583 | unsigned InnerVal = SourceValueToNumberMapping.find(Val: InnerV)->second; |
584 | ValueMappingIt = CurrentSrcTgtNumberMapping.find(Val: InnerVal); |
585 | if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) |
586 | continue; |
587 | |
588 | ValueMappingIt->second.erase(V: ValToRemove); |
589 | if (ValueMappingIt->second.empty()) |
590 | return false; |
591 | } |
592 | } |
593 | |
594 | return true; |
595 | } |
596 | |
597 | /// Determine if operand number \p TargetArgVal is in the current mapping set |
598 | /// for operand number \p SourceArgVal. |
599 | /// |
600 | /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global |
601 | /// value numbers from source IRSimilarityCandidate to target |
602 | /// IRSimilarityCandidate. |
603 | /// \param [in] SourceArgVal The global value number for an operand in the |
604 | /// in the original candidate. |
605 | /// \param [in] TargetArgVal The global value number for the corresponding |
606 | /// operand in the other candidate. |
607 | /// \returns True if there exists a mapping and false if not. |
608 | bool checkNumberingAndReplace( |
609 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, |
610 | unsigned SourceArgVal, unsigned TargetArgVal) { |
611 | // We are given two unsigned integers representing the global values of |
612 | // the operands in different IRSimilarityCandidates and a current mapping |
613 | // between the two. |
614 | // |
615 | // Source Operand GVN: 1 |
616 | // Target Operand GVN: 2 |
617 | // CurrentMapping: {1: {1, 2}} |
618 | // |
619 | // Since we have mapping, and the target operand is contained in the set, we |
620 | // update it to: |
621 | // CurrentMapping: {1: {2}} |
622 | // and can return true. But, if the mapping was |
623 | // CurrentMapping: {1: {3}} |
624 | // we would return false. |
625 | |
626 | bool WasInserted; |
627 | DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; |
628 | |
629 | std::tie(args&: Val, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( |
630 | KV: std::make_pair(x&: SourceArgVal, y: DenseSet<unsigned>({TargetArgVal}))); |
631 | |
632 | // If we created a new mapping, then we are done. |
633 | if (WasInserted) |
634 | return true; |
635 | |
636 | // If there is more than one option in the mapping set, and the target value |
637 | // is included in the mapping set replace that set with one that only includes |
638 | // the target value, as it is the only valid mapping via the non commutative |
639 | // instruction. |
640 | |
641 | DenseSet<unsigned> &TargetSet = Val->second; |
642 | if (TargetSet.size() > 1 && TargetSet.contains(V: TargetArgVal)) { |
643 | TargetSet.clear(); |
644 | TargetSet.insert(V: TargetArgVal); |
645 | return true; |
646 | } |
647 | |
648 | // Return true if we can find the value in the set. |
649 | return TargetSet.contains(V: TargetArgVal); |
650 | } |
651 | |
652 | bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( |
653 | OperandMapping A, OperandMapping B) { |
654 | // Iterators to keep track of where we are in the operands for each |
655 | // Instruction. |
656 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); |
657 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); |
658 | unsigned OperandLength = A.OperVals.size(); |
659 | |
660 | // For each operand, get the value numbering and ensure it is consistent. |
661 | for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { |
662 | unsigned OperValA = A.IRSC.ValueToNumber.find(Val: *VItA)->second; |
663 | unsigned OperValB = B.IRSC.ValueToNumber.find(Val: *VItB)->second; |
664 | |
665 | // Attempt to add a set with only the target value. If there is no mapping |
666 | // we can create it here. |
667 | // |
668 | // For an instruction like a subtraction: |
669 | // IRSimilarityCandidateA: IRSimilarityCandidateB: |
670 | // %resultA = sub %a, %b %resultB = sub %d, %e |
671 | // |
672 | // We map %a -> %d and %b -> %e. |
673 | // |
674 | // And check to see whether their mapping is consistent in |
675 | // checkNumberingAndReplace. |
676 | |
677 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceArgVal: OperValA, TargetArgVal: OperValB)) |
678 | return false; |
679 | |
680 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceArgVal: OperValB, TargetArgVal: OperValA)) |
681 | return false; |
682 | } |
683 | return true; |
684 | } |
685 | |
686 | bool IRSimilarityCandidate::compareCommutativeOperandMapping( |
687 | OperandMapping A, OperandMapping B) { |
688 | DenseSet<unsigned> ValueNumbersA; |
689 | DenseSet<unsigned> ValueNumbersB; |
690 | |
691 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); |
692 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); |
693 | unsigned OperandLength = A.OperVals.size(); |
694 | |
695 | // Find the value number sets for the operands. |
696 | for (unsigned Idx = 0; Idx < OperandLength; |
697 | Idx++, VItA++, VItB++) { |
698 | ValueNumbersA.insert(V: A.IRSC.ValueToNumber.find(Val: *VItA)->second); |
699 | ValueNumbersB.insert(V: B.IRSC.ValueToNumber.find(Val: *VItB)->second); |
700 | } |
701 | |
702 | // Iterate over the operands in the first IRSimilarityCandidate and make sure |
703 | // there exists a possible mapping with the operands in the second |
704 | // IRSimilarityCandidate. |
705 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: A.IRSC.ValueToNumber, |
706 | CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceOperands&: A.OperVals, |
707 | TargetValueNumbers&: ValueNumbersB)) |
708 | return false; |
709 | |
710 | // Iterate over the operands in the second IRSimilarityCandidate and make sure |
711 | // there exists a possible mapping with the operands in the first |
712 | // IRSimilarityCandidate. |
713 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: B.IRSC.ValueToNumber, |
714 | CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceOperands&: B.OperVals, |
715 | TargetValueNumbers&: ValueNumbersA)) |
716 | return false; |
717 | |
718 | return true; |
719 | } |
720 | |
721 | bool IRSimilarityCandidate::compareAssignmentMapping( |
722 | const unsigned InstValA, const unsigned &InstValB, |
723 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
724 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { |
725 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; |
726 | bool WasInserted; |
727 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( |
728 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); |
729 | if (!WasInserted && !ValueMappingIt->second.contains(V: InstValB)) |
730 | return false; |
731 | else if (ValueMappingIt->second.size() != 1) { |
732 | for (unsigned OtherVal : ValueMappingIt->second) { |
733 | if (OtherVal == InstValB) |
734 | continue; |
735 | if (!ValueNumberMappingA.contains(Val: OtherVal)) |
736 | continue; |
737 | if (!ValueNumberMappingA[OtherVal].contains(V: InstValA)) |
738 | continue; |
739 | ValueNumberMappingA[OtherVal].erase(V: InstValA); |
740 | } |
741 | ValueNumberMappingA.erase(I: ValueMappingIt); |
742 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( |
743 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); |
744 | } |
745 | |
746 | return true; |
747 | } |
748 | |
749 | bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, |
750 | RelativeLocMapping B) { |
751 | // Get the basic blocks the label refers to. |
752 | BasicBlock *ABB = cast<BasicBlock>(Val: A.OperVal); |
753 | BasicBlock *BBB = cast<BasicBlock>(Val: B.OperVal); |
754 | |
755 | // Get the basic blocks contained in each region. |
756 | DenseSet<BasicBlock *> BasicBlockA; |
757 | DenseSet<BasicBlock *> BasicBlockB; |
758 | A.IRSC.getBasicBlocks(BBSet&: BasicBlockA); |
759 | B.IRSC.getBasicBlocks(BBSet&: BasicBlockB); |
760 | |
761 | // Determine if the block is contained in the region. |
762 | bool AContained = BasicBlockA.contains(V: ABB); |
763 | bool BContained = BasicBlockB.contains(V: BBB); |
764 | |
765 | // Both blocks need to be contained in the region, or both need to be outside |
766 | // the region. |
767 | if (AContained != BContained) |
768 | return false; |
769 | |
770 | // If both are contained, then we need to make sure that the relative |
771 | // distance to the target blocks are the same. |
772 | if (AContained) |
773 | return A.RelativeLocation == B.RelativeLocation; |
774 | return true; |
775 | } |
776 | |
777 | bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, |
778 | const IRSimilarityCandidate &B) { |
779 | DenseMap<unsigned, DenseSet<unsigned>> MappingA; |
780 | DenseMap<unsigned, DenseSet<unsigned>> MappingB; |
781 | return IRSimilarityCandidate::compareStructure(A, B, ValueNumberMappingA&: MappingA, ValueNumberMappingB&: MappingB); |
782 | } |
783 | |
784 | typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, |
785 | SmallVector<int, 4> &, ArrayRef<Value *> &, |
786 | ArrayRef<Value *> &> |
787 | ZippedRelativeLocationsT; |
788 | |
789 | bool IRSimilarityCandidate::compareStructure( |
790 | const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, |
791 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
792 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { |
793 | if (A.getLength() != B.getLength()) |
794 | return false; |
795 | |
796 | if (A.ValueToNumber.size() != B.ValueToNumber.size()) |
797 | return false; |
798 | |
799 | iterator ItA = A.begin(); |
800 | iterator ItB = B.begin(); |
801 | |
802 | // These ValueNumber Mapping sets create a create a mapping between the values |
803 | // in one candidate to values in the other candidate. If we create a set with |
804 | // one element, and that same element maps to the original element in the |
805 | // candidate we have a good mapping. |
806 | |
807 | // Iterate over the instructions contained in each candidate |
808 | unsigned SectionLength = A.getStartIdx() + A.getLength(); |
809 | for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; |
810 | ItA++, ItB++, Loc++) { |
811 | // Make sure the instructions are similar to one another. |
812 | if (!isClose(A: *ItA, B: *ItB)) |
813 | return false; |
814 | |
815 | Instruction *IA = ItA->Inst; |
816 | Instruction *IB = ItB->Inst; |
817 | |
818 | if (!ItA->Legal || !ItB->Legal) |
819 | return false; |
820 | |
821 | // Get the operand sets for the instructions. |
822 | ArrayRef<Value *> OperValsA = ItA->OperVals; |
823 | ArrayRef<Value *> OperValsB = ItB->OperVals; |
824 | |
825 | unsigned InstValA = A.ValueToNumber.find(Val: IA)->second; |
826 | unsigned InstValB = B.ValueToNumber.find(Val: IB)->second; |
827 | |
828 | // Ensure that the mappings for the instructions exists. |
829 | if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, |
830 | ValueNumberMappingB)) |
831 | return false; |
832 | |
833 | if (!compareAssignmentMapping(InstValA: InstValB, InstValB: InstValA, ValueNumberMappingA&: ValueNumberMappingB, |
834 | ValueNumberMappingB&: ValueNumberMappingA)) |
835 | return false; |
836 | |
837 | // We have different paths for commutative instructions and non-commutative |
838 | // instructions since commutative instructions could allow multiple mappings |
839 | // to certain values. |
840 | if (IA->isCommutative() && !isa<FPMathOperator>(Val: IA) && |
841 | !isa<IntrinsicInst>(Val: IA)) { |
842 | if (!compareCommutativeOperandMapping( |
843 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, |
844 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) |
845 | return false; |
846 | continue; |
847 | } |
848 | |
849 | // Handle the non-commutative cases. |
850 | if (!compareNonCommutativeOperandMapping( |
851 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, |
852 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) |
853 | return false; |
854 | |
855 | // Here we check that between two corresponding instructions, |
856 | // when referring to a basic block in the same region, the |
857 | // relative locations are the same. And, that the instructions refer to |
858 | // basic blocks outside the region in the same corresponding locations. |
859 | |
860 | // We are able to make the assumption about blocks outside of the region |
861 | // since the target block labels are considered values and will follow the |
862 | // same number matching that we defined for the other instructions in the |
863 | // region. So, at this point, in each location we target a specific block |
864 | // outside the region, we are targeting a corresponding block in each |
865 | // analagous location in the region we are comparing to. |
866 | if (!(isa<BranchInst>(Val: IA) && isa<BranchInst>(Val: IB)) && |
867 | !(isa<PHINode>(Val: IA) && isa<PHINode>(Val: IB))) |
868 | continue; |
869 | |
870 | SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; |
871 | SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; |
872 | ArrayRef<Value *> ABL = ItA->getBlockOperVals(); |
873 | ArrayRef<Value *> BBL = ItB->getBlockOperVals(); |
874 | |
875 | // Check to make sure that the number of operands, and branching locations |
876 | // between BranchInsts is the same. |
877 | if (RelBlockLocsA.size() != RelBlockLocsB.size() && |
878 | ABL.size() != BBL.size()) |
879 | return false; |
880 | |
881 | assert(RelBlockLocsA.size() == ABL.size() && |
882 | "Block information vectors not the same size." ); |
883 | assert(RelBlockLocsB.size() == BBL.size() && |
884 | "Block information vectors not the same size." ); |
885 | |
886 | ZippedRelativeLocationsT ZippedRelativeLocations = |
887 | zip(t&: RelBlockLocsA, u&: RelBlockLocsB, args&: ABL, args&: BBL); |
888 | if (any_of(Range&: ZippedRelativeLocations, |
889 | P: [&A, &B](std::tuple<int, int, Value *, Value *> R) { |
890 | return !checkRelativeLocations( |
891 | A: {.IRSC: A, .RelativeLocation: std::get<0>(t&: R), .OperVal: std::get<2>(t&: R)}, |
892 | B: {.IRSC: B, .RelativeLocation: std::get<1>(t&: R), .OperVal: std::get<3>(t&: R)}); |
893 | })) |
894 | return false; |
895 | } |
896 | return true; |
897 | } |
898 | |
899 | bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, |
900 | const IRSimilarityCandidate &B) { |
901 | auto DoesOverlap = [](const IRSimilarityCandidate &X, |
902 | const IRSimilarityCandidate &Y) { |
903 | // Check: |
904 | // XXXXXX X starts before Y ends |
905 | // YYYYYYY Y starts after X starts |
906 | return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; |
907 | }; |
908 | |
909 | return DoesOverlap(A, B) || DoesOverlap(B, A); |
910 | } |
911 | |
912 | void IRSimilarityIdentifier::populateMapper( |
913 | Module &M, std::vector<IRInstructionData *> &InstrList, |
914 | std::vector<unsigned> &IntegerMapping) { |
915 | |
916 | std::vector<IRInstructionData *> InstrListForModule; |
917 | std::vector<unsigned> IntegerMappingForModule; |
918 | // Iterate over the functions in the module to map each Instruction in each |
919 | // BasicBlock to an unsigned integer. |
920 | Mapper.initializeForBBs(M); |
921 | |
922 | for (Function &F : M) { |
923 | |
924 | if (F.empty()) |
925 | continue; |
926 | |
927 | for (BasicBlock &BB : F) { |
928 | |
929 | // BB has potential to have similarity since it has a size greater than 2 |
930 | // and can therefore match other regions greater than 2. Map it to a list |
931 | // of unsigned integers. |
932 | Mapper.convertToUnsignedVec(BB, InstrList&: InstrListForModule, |
933 | IntegerMapping&: IntegerMappingForModule); |
934 | } |
935 | |
936 | BasicBlock::iterator It = F.begin()->end(); |
937 | Mapper.mapToIllegalUnsigned(It, IntegerMappingForBB&: IntegerMappingForModule, InstrListForBB&: InstrListForModule, |
938 | End: true); |
939 | if (InstrListForModule.size() > 0) |
940 | Mapper.IDL->push_back(Node&: *InstrListForModule.back()); |
941 | } |
942 | |
943 | // Insert the InstrListForModule at the end of the overall InstrList so that |
944 | // we can have a long InstrList for the entire set of Modules being analyzed. |
945 | llvm::append_range(C&: InstrList, R&: InstrListForModule); |
946 | // Do the same as above, but for IntegerMapping. |
947 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForModule); |
948 | } |
949 | |
950 | void IRSimilarityIdentifier::populateMapper( |
951 | ArrayRef<std::unique_ptr<Module>> &Modules, |
952 | std::vector<IRInstructionData *> &InstrList, |
953 | std::vector<unsigned> &IntegerMapping) { |
954 | |
955 | // Iterate over, and map the instructions in each module. |
956 | for (const std::unique_ptr<Module> &M : Modules) |
957 | populateMapper(M&: *M, InstrList, IntegerMapping); |
958 | } |
959 | |
960 | /// From a repeated subsequence, find all the different instances of the |
961 | /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from |
962 | /// the IRInstructionData in subsequence. |
963 | /// |
964 | /// \param [in] Mapper - The instruction mapper for basic correctness checks. |
965 | /// \param [in] InstrList - The vector that holds the instruction data. |
966 | /// \param [in] IntegerMapping - The vector that holds the mapped integers. |
967 | /// \param [out] CandsForRepSubstring - The vector to store the generated |
968 | /// IRSimilarityCandidates. |
969 | static void createCandidatesFromSuffixTree( |
970 | const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, |
971 | std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, |
972 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { |
973 | |
974 | unsigned StringLen = RS.Length; |
975 | if (StringLen < 2) |
976 | return; |
977 | |
978 | // Create an IRSimilarityCandidate for instance of this subsequence \p RS. |
979 | for (const unsigned &StartIdx : RS.StartIndices) { |
980 | unsigned EndIdx = StartIdx + StringLen - 1; |
981 | |
982 | // Check that this subsequence does not contain an illegal instruction. |
983 | bool ContainsIllegal = false; |
984 | for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { |
985 | unsigned Key = IntegerMapping[CurrIdx]; |
986 | if (Key > Mapper.IllegalInstrNumber) { |
987 | ContainsIllegal = true; |
988 | break; |
989 | } |
990 | } |
991 | |
992 | // If we have an illegal instruction, we should not create an |
993 | // IRSimilarityCandidate for this region. |
994 | if (ContainsIllegal) |
995 | continue; |
996 | |
997 | // We are getting iterators to the instructions in this region of code |
998 | // by advancing the start and end indices from the start of the |
999 | // InstrList. |
1000 | std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); |
1001 | std::advance(i&: StartIt, n: StartIdx); |
1002 | std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); |
1003 | std::advance(i&: EndIt, n: EndIdx); |
1004 | |
1005 | CandsForRepSubstring.emplace_back(args: StartIdx, args&: StringLen, args&: *StartIt, args&: *EndIt); |
1006 | } |
1007 | } |
1008 | |
1009 | void IRSimilarityCandidate::createCanonicalRelationFrom( |
1010 | IRSimilarityCandidate &SourceCand, |
1011 | DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, |
1012 | DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { |
1013 | assert(SourceCand.CanonNumToNumber.size() != 0 && |
1014 | "Base canonical relationship is empty!" ); |
1015 | assert(SourceCand.NumberToCanonNum.size() != 0 && |
1016 | "Base canonical relationship is empty!" ); |
1017 | |
1018 | assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty" ); |
1019 | assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty" ); |
1020 | |
1021 | DenseSet<unsigned> UsedGVNs; |
1022 | // Iterate over the mappings provided from this candidate to SourceCand. We |
1023 | // are then able to map the GVN in this candidate to the same canonical number |
1024 | // given to the corresponding GVN in SourceCand. |
1025 | for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { |
1026 | unsigned SourceGVN = GVNMapping.first; |
1027 | |
1028 | assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!" ); |
1029 | |
1030 | unsigned ResultGVN; |
1031 | // We need special handling if we have more than one potential value. This |
1032 | // means that there are at least two GVNs that could correspond to this GVN. |
1033 | // This could lead to potential swapping later on, so we make a decision |
1034 | // here to ensure a one-to-one mapping. |
1035 | if (GVNMapping.second.size() > 1) { |
1036 | bool Found = false; |
1037 | for (unsigned Val : GVNMapping.second) { |
1038 | // We make sure the target value number hasn't already been reserved. |
1039 | if (UsedGVNs.contains(V: Val)) |
1040 | continue; |
1041 | |
1042 | // We make sure that the opposite mapping is still consistent. |
1043 | DenseMap<unsigned, DenseSet<unsigned>>::iterator It = |
1044 | FromSourceMapping.find(Val); |
1045 | |
1046 | if (!It->second.contains(V: SourceGVN)) |
1047 | continue; |
1048 | |
1049 | // We pick the first item that satisfies these conditions. |
1050 | Found = true; |
1051 | ResultGVN = Val; |
1052 | break; |
1053 | } |
1054 | |
1055 | assert(Found && "Could not find matching value for source GVN" ); |
1056 | (void)Found; |
1057 | |
1058 | } else |
1059 | ResultGVN = *GVNMapping.second.begin(); |
1060 | |
1061 | // Whatever GVN is found, we mark it as used. |
1062 | UsedGVNs.insert(V: ResultGVN); |
1063 | |
1064 | unsigned CanonNum = *SourceCand.getCanonicalNum(N: ResultGVN); |
1065 | CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: SourceGVN)); |
1066 | NumberToCanonNum.insert(KV: std::make_pair(x&: SourceGVN, y&: CanonNum)); |
1067 | } |
1068 | |
1069 | DenseSet<BasicBlock *> BBSet; |
1070 | getBasicBlocks(BBSet); |
1071 | // Find canonical numbers for the BasicBlocks in the current candidate. |
1072 | // This is done by finding the corresponding value for the first instruction |
1073 | // in the block in the current candidate, finding the matching value in the |
1074 | // source candidate. Then by finding the parent of this value, use the |
1075 | // canonical number of the block in the source candidate for the canonical |
1076 | // number in the current candidate. |
1077 | for (BasicBlock *BB : BBSet) { |
1078 | unsigned BBGVNForCurrCand = ValueToNumber.find(Val: BB)->second; |
1079 | |
1080 | // We can skip the BasicBlock if the canonical numbering has already been |
1081 | // found in a separate instruction. |
1082 | if (NumberToCanonNum.contains(Val: BBGVNForCurrCand)) |
1083 | continue; |
1084 | |
1085 | // If the basic block is the starting block, then the shared instruction may |
1086 | // not be the first instruction in the block, it will be the first |
1087 | // instruction in the similarity region. |
1088 | Value *FirstOutlineInst = BB == getStartBB() |
1089 | ? frontInstruction() |
1090 | : &*BB->instructionsWithoutDebug().begin(); |
1091 | |
1092 | unsigned FirstInstGVN = *getGVN(V: FirstOutlineInst); |
1093 | unsigned FirstInstCanonNum = *getCanonicalNum(N: FirstInstGVN); |
1094 | unsigned SourceGVN = *SourceCand.fromCanonicalNum(N: FirstInstCanonNum); |
1095 | Value *SourceV = *SourceCand.fromGVN(Num: SourceGVN); |
1096 | BasicBlock *SourceBB = cast<Instruction>(Val: SourceV)->getParent(); |
1097 | unsigned SourceBBGVN = *SourceCand.getGVN(V: SourceBB); |
1098 | unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(N: SourceBBGVN); |
1099 | CanonNumToNumber.insert(KV: std::make_pair(x&: SourceCanonBBGVN, y&: BBGVNForCurrCand)); |
1100 | NumberToCanonNum.insert(KV: std::make_pair(x&: BBGVNForCurrCand, y&: SourceCanonBBGVN)); |
1101 | } |
1102 | } |
1103 | |
1104 | void IRSimilarityCandidate::createCanonicalRelationFrom( |
1105 | IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, |
1106 | IRSimilarityCandidate &TargetCandLarge) { |
1107 | assert(!SourceCand.CanonNumToNumber.empty() && |
1108 | "Canonical Relationship is non-empty" ); |
1109 | assert(!SourceCand.NumberToCanonNum.empty() && |
1110 | "Canonical Relationship is non-empty" ); |
1111 | |
1112 | assert(!SourceCandLarge.CanonNumToNumber.empty() && |
1113 | "Canonical Relationship is non-empty" ); |
1114 | assert(!SourceCandLarge.NumberToCanonNum.empty() && |
1115 | "Canonical Relationship is non-empty" ); |
1116 | |
1117 | assert(!TargetCandLarge.CanonNumToNumber.empty() && |
1118 | "Canonical Relationship is non-empty" ); |
1119 | assert(!TargetCandLarge.NumberToCanonNum.empty() && |
1120 | "Canonical Relationship is non-empty" ); |
1121 | |
1122 | assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty" ); |
1123 | assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty" ); |
1124 | |
1125 | // We're going to use the larger candidates as a "bridge" to create the |
1126 | // canonical number for the target candidate since we have idetified two |
1127 | // candidates as subsequences of larger sequences, and therefore must be |
1128 | // structurally similar. |
1129 | for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) { |
1130 | Value *CurrVal = ValueNumPair.first; |
1131 | unsigned TargetCandGVN = ValueNumPair.second; |
1132 | |
1133 | // Find the numbering in the large candidate that surrounds the |
1134 | // current candidate. |
1135 | std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(V: CurrVal); |
1136 | assert(OLargeTargetGVN.has_value() && "GVN not found for Value" ); |
1137 | |
1138 | // Get the canonical numbering in the large target candidate. |
1139 | std::optional<unsigned> OTargetCandCanon = |
1140 | TargetCandLarge.getCanonicalNum(N: OLargeTargetGVN.value()); |
1141 | assert(OTargetCandCanon.has_value() && |
1142 | "Canononical Number not found for GVN" ); |
1143 | |
1144 | // Get the GVN in the large source candidate from the canonical numbering. |
1145 | std::optional<unsigned> OLargeSourceGVN = |
1146 | SourceCandLarge.fromCanonicalNum(N: OTargetCandCanon.value()); |
1147 | assert(OLargeSourceGVN.has_value() && |
1148 | "GVN Number not found for Canonical Number" ); |
1149 | |
1150 | // Get the Value from the GVN in the large source candidate. |
1151 | std::optional<Value *> OLargeSourceV = |
1152 | SourceCandLarge.fromGVN(Num: OLargeSourceGVN.value()); |
1153 | assert(OLargeSourceV.has_value() && "Value not found for GVN" ); |
1154 | |
1155 | // Get the GVN number for the Value in the source candidate. |
1156 | std::optional<unsigned> OSourceGVN = |
1157 | SourceCand.getGVN(V: OLargeSourceV.value()); |
1158 | assert(OSourceGVN.has_value() && "GVN Number not found for Value" ); |
1159 | |
1160 | // Get the canonical numbering from the GVN/ |
1161 | std::optional<unsigned> OSourceCanon = |
1162 | SourceCand.getCanonicalNum(N: OSourceGVN.value()); |
1163 | assert(OSourceCanon.has_value() && "Canon Number not found for GVN" ); |
1164 | |
1165 | // Insert the canonical numbering and GVN pair into their respective |
1166 | // mappings. |
1167 | CanonNumToNumber.insert( |
1168 | KV: std::make_pair(x&: OSourceCanon.value(), y&: TargetCandGVN)); |
1169 | NumberToCanonNum.insert( |
1170 | KV: std::make_pair(x&: TargetCandGVN, y&: OSourceCanon.value())); |
1171 | } |
1172 | } |
1173 | |
1174 | void IRSimilarityCandidate::createCanonicalMappingFor( |
1175 | IRSimilarityCandidate &CurrCand) { |
1176 | assert(CurrCand.CanonNumToNumber.size() == 0 && |
1177 | "Canonical Relationship is non-empty" ); |
1178 | assert(CurrCand.NumberToCanonNum.size() == 0 && |
1179 | "Canonical Relationship is non-empty" ); |
1180 | |
1181 | unsigned CanonNum = 0; |
1182 | // Iterate over the value numbers found, the order does not matter in this |
1183 | // case. |
1184 | for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { |
1185 | CurrCand.NumberToCanonNum.insert(KV: std::make_pair(x&: NumToVal.first, y&: CanonNum)); |
1186 | CurrCand.CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: NumToVal.first)); |
1187 | CanonNum++; |
1188 | } |
1189 | } |
1190 | |
1191 | /// Look for larger IRSimilarityCandidates From the previously matched |
1192 | /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is |
1193 | /// an overlap, return a pair of structurally similar, larger |
1194 | /// IRSimilarityCandidates. |
1195 | /// |
1196 | /// \param [in] CandA - The first candidate we are trying to determine the |
1197 | /// structure of. |
1198 | /// \param [in] CandB - The second candidate we are trying to determine the |
1199 | /// structure of. |
1200 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in |
1201 | /// a circuit to the IRSimilarityCandidates that include this instruction. |
1202 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a |
1203 | /// number representing the structural group assigned to it. |
1204 | static std::optional< |
1205 | std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
1206 | CheckLargerCands( |
1207 | IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, |
1208 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, |
1209 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) { |
1210 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA; |
1211 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB; |
1212 | DenseSet<unsigned> IncludedGroupsA; |
1213 | DenseSet<unsigned> IncludedGroupsB; |
1214 | |
1215 | // Find the overall similarity group numbers that fully contain the candidate, |
1216 | // and record the larger candidate for each group. |
1217 | auto IdxToCandidateIt = IndexToIncludedCand.find(Val: CandA.getStartIdx()); |
1218 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
1219 | Result; |
1220 | |
1221 | unsigned CandAStart = CandA.getStartIdx(); |
1222 | unsigned CandAEnd = CandA.getEndIdx(); |
1223 | unsigned CandBStart = CandB.getStartIdx(); |
1224 | unsigned CandBEnd = CandB.getEndIdx(); |
1225 | if (IdxToCandidateIt == IndexToIncludedCand.end()) |
1226 | return Result; |
1227 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { |
1228 | if (MatchedCand->getStartIdx() > CandAStart || |
1229 | (MatchedCand->getEndIdx() < CandAEnd)) |
1230 | continue; |
1231 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; |
1232 | IncludedGroupAndCandA.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); |
1233 | IncludedGroupsA.insert(V: GroupNum); |
1234 | } |
1235 | |
1236 | // Find the overall similarity group numbers that fully contain the next |
1237 | // candidate, and record the larger candidate for each group. |
1238 | IdxToCandidateIt = IndexToIncludedCand.find(Val: CandBStart); |
1239 | if (IdxToCandidateIt == IndexToIncludedCand.end()) |
1240 | return Result; |
1241 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { |
1242 | if (MatchedCand->getStartIdx() > CandBStart || |
1243 | MatchedCand->getEndIdx() < CandBEnd) |
1244 | continue; |
1245 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; |
1246 | IncludedGroupAndCandB.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); |
1247 | IncludedGroupsB.insert(V: GroupNum); |
1248 | } |
1249 | |
1250 | // Find the intersection between the two groups, these are the groups where |
1251 | // the larger candidates exist. |
1252 | set_intersect(S1&: IncludedGroupsA, S2: IncludedGroupsB); |
1253 | |
1254 | // If there is no intersection between the sets, then we cannot determine |
1255 | // whether or not there is a match. |
1256 | if (IncludedGroupsA.empty()) |
1257 | return Result; |
1258 | |
1259 | // Create a pair that contains the larger candidates. |
1260 | auto ItA = IncludedGroupAndCandA.find(Val: *IncludedGroupsA.begin()); |
1261 | auto ItB = IncludedGroupAndCandB.find(Val: *IncludedGroupsA.begin()); |
1262 | Result = std::make_pair(x&: ItA->second, y&: ItB->second); |
1263 | return Result; |
1264 | } |
1265 | |
1266 | /// From the list of IRSimilarityCandidates, perform a comparison between each |
1267 | /// IRSimilarityCandidate to determine if there are overlapping |
1268 | /// IRInstructionData, or if they do not have the same structure. |
1269 | /// |
1270 | /// \param [in] CandsForRepSubstring - The vector containing the |
1271 | /// IRSimilarityCandidates. |
1272 | /// \param [out] StructuralGroups - the mapping of unsigned integers to vector |
1273 | /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the |
1274 | /// vector are structurally similar to one another. |
1275 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in |
1276 | /// a circuit to the IRSimilarityCandidates that include this instruction. |
1277 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a |
1278 | /// number representing the structural group assigned to it. |
1279 | static void findCandidateStructures( |
1280 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring, |
1281 | DenseMap<unsigned, SimilarityGroup> &StructuralGroups, |
1282 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, |
1283 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup |
1284 | ) { |
1285 | std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, |
1286 | InnerCandEndIt; |
1287 | |
1288 | // IRSimilarityCandidates each have a structure for operand use. It is |
1289 | // possible that two instances of the same subsequences have different |
1290 | // structure. Each type of structure found is assigned a number. This |
1291 | // DenseMap maps an IRSimilarityCandidate to which type of similarity |
1292 | // discovered it fits within. |
1293 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; |
1294 | |
1295 | // Find the compatibility from each candidate to the others to determine |
1296 | // which candidates overlap and which have the same structure by mapping |
1297 | // each structure to a different group. |
1298 | bool SameStructure; |
1299 | bool Inserted; |
1300 | unsigned CurrentGroupNum = 0; |
1301 | unsigned OuterGroupNum; |
1302 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; |
1303 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; |
1304 | DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; |
1305 | |
1306 | // Iterate over the candidates to determine its structural and overlapping |
1307 | // compatibility with other instructions |
1308 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; |
1309 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; |
1310 | for (CandIt = CandsForRepSubstring.begin(), |
1311 | CandEndIt = CandsForRepSubstring.end(); |
1312 | CandIt != CandEndIt; CandIt++) { |
1313 | |
1314 | // Determine if it has an assigned structural group already. |
1315 | CandToGroupIt = CandToGroup.find(Val: &*CandIt); |
1316 | if (CandToGroupIt == CandToGroup.end()) { |
1317 | // If not, we assign it one, and add it to our mapping. |
1318 | std::tie(args&: CandToGroupIt, args&: Inserted) = |
1319 | CandToGroup.insert(KV: std::make_pair(x: &*CandIt, y: CurrentGroupNum++)); |
1320 | } |
1321 | |
1322 | // Get the structural group number from the iterator. |
1323 | OuterGroupNum = CandToGroupIt->second; |
1324 | |
1325 | // Check if we already have a list of IRSimilarityCandidates for the current |
1326 | // structural group. Create one if one does not exist. |
1327 | CurrentGroupPair = StructuralGroups.find(Val: OuterGroupNum); |
1328 | if (CurrentGroupPair == StructuralGroups.end()) { |
1329 | IRSimilarityCandidate::createCanonicalMappingFor(CurrCand&: *CandIt); |
1330 | std::tie(args&: CurrentGroupPair, args&: Inserted) = StructuralGroups.insert( |
1331 | KV: std::make_pair(x&: OuterGroupNum, y: SimilarityGroup({*CandIt}))); |
1332 | } |
1333 | |
1334 | // Iterate over the IRSimilarityCandidates following the current |
1335 | // IRSimilarityCandidate in the list to determine whether the two |
1336 | // IRSimilarityCandidates are compatible. This is so we do not repeat pairs |
1337 | // of IRSimilarityCandidates. |
1338 | for (InnerCandIt = std::next(x: CandIt), |
1339 | InnerCandEndIt = CandsForRepSubstring.end(); |
1340 | InnerCandIt != InnerCandEndIt; InnerCandIt++) { |
1341 | |
1342 | // We check if the inner item has a group already, if it does, we skip it. |
1343 | CandToGroupItInner = CandToGroup.find(Val: &*InnerCandIt); |
1344 | if (CandToGroupItInner != CandToGroup.end()) |
1345 | continue; |
1346 | |
1347 | // Check if we have found structural similarity between two candidates |
1348 | // that fully contains the first and second candidates. |
1349 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
1350 | LargerPair = CheckLargerCands( |
1351 | CandA&: *CandIt, CandB&: *InnerCandIt, IndexToIncludedCand, CandToGroup&: CandToOverallGroup); |
1352 | |
1353 | // If a pair was found, it means that we can assume that these smaller |
1354 | // substrings are also structurally similar. Use the larger candidates to |
1355 | // determine the canonical mapping between the two sections. |
1356 | if (LargerPair.has_value()) { |
1357 | SameStructure = true; |
1358 | InnerCandIt->createCanonicalRelationFrom( |
1359 | SourceCand&: *CandIt, SourceCandLarge&: *LargerPair.value().first, TargetCandLarge&: *LargerPair.value().second); |
1360 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); |
1361 | CurrentGroupPair->second.push_back(x: *InnerCandIt); |
1362 | continue; |
1363 | } |
1364 | |
1365 | // Otherwise we determine if they have the same structure and add it to |
1366 | // vector if they match. |
1367 | ValueNumberMappingA.clear(); |
1368 | ValueNumberMappingB.clear(); |
1369 | SameStructure = IRSimilarityCandidate::compareStructure( |
1370 | A: *CandIt, B: *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); |
1371 | if (!SameStructure) |
1372 | continue; |
1373 | |
1374 | InnerCandIt->createCanonicalRelationFrom(SourceCand&: *CandIt, ToSourceMapping&: ValueNumberMappingA, |
1375 | FromSourceMapping&: ValueNumberMappingB); |
1376 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); |
1377 | CurrentGroupPair->second.push_back(x: *InnerCandIt); |
1378 | } |
1379 | } |
1380 | } |
1381 | |
1382 | void IRSimilarityIdentifier::findCandidates( |
1383 | std::vector<IRInstructionData *> &InstrList, |
1384 | std::vector<unsigned> &IntegerMapping) { |
1385 | SuffixTree ST(IntegerMapping); |
1386 | |
1387 | std::vector<IRSimilarityCandidate> CandsForRepSubstring; |
1388 | std::vector<SimilarityGroup> NewCandidateGroups; |
1389 | |
1390 | DenseMap<unsigned, SimilarityGroup> StructuralGroups; |
1391 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand; |
1392 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; |
1393 | |
1394 | // Iterate over the subsequences found by the Suffix Tree to create |
1395 | // IRSimilarityCandidates for each repeated subsequence and determine which |
1396 | // instances are structurally similar to one another. |
1397 | |
1398 | // Sort the suffix tree from longest substring to shortest. |
1399 | std::vector<SuffixTree::RepeatedSubstring> RSes; |
1400 | for (SuffixTree::RepeatedSubstring &RS : ST) |
1401 | RSes.push_back(x: RS); |
1402 | |
1403 | llvm::stable_sort(Range&: RSes, C: [](const SuffixTree::RepeatedSubstring &LHS, |
1404 | const SuffixTree::RepeatedSubstring &RHS) { |
1405 | return LHS.Length > RHS.Length; |
1406 | }); |
1407 | for (SuffixTree::RepeatedSubstring &RS : RSes) { |
1408 | createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, |
1409 | CandsForRepSubstring); |
1410 | |
1411 | if (CandsForRepSubstring.size() < 2) |
1412 | continue; |
1413 | |
1414 | findCandidateStructures(CandsForRepSubstring, StructuralGroups, |
1415 | IndexToIncludedCand, CandToOverallGroup&: CandToGroup); |
1416 | for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) { |
1417 | // We only add the group if it contains more than one |
1418 | // IRSimilarityCandidate. If there is only one, that means there is no |
1419 | // other repeated subsequence with the same structure. |
1420 | if (Group.second.size() > 1) { |
1421 | SimilarityCandidates->push_back(x: Group.second); |
1422 | // Iterate over each candidate in the group, and add an entry for each |
1423 | // instruction included with a mapping to a set of |
1424 | // IRSimilarityCandidates that include that instruction. |
1425 | for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { |
1426 | for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); |
1427 | Idx <= Edx; ++Idx) { |
1428 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator |
1429 | IdIt; |
1430 | IdIt = IndexToIncludedCand.find(Val: Idx); |
1431 | bool Inserted = false; |
1432 | if (IdIt == IndexToIncludedCand.end()) |
1433 | std::tie(args&: IdIt, args&: Inserted) = IndexToIncludedCand.insert( |
1434 | KV: std::make_pair(x&: Idx, y: DenseSet<IRSimilarityCandidate *>())); |
1435 | IdIt->second.insert(V: &IRCand); |
1436 | } |
1437 | // Add mapping of candidate to the overall similarity group number. |
1438 | CandToGroup.insert( |
1439 | KV: std::make_pair(x: &IRCand, y: SimilarityCandidates->size() - 1)); |
1440 | } |
1441 | } |
1442 | } |
1443 | |
1444 | CandsForRepSubstring.clear(); |
1445 | StructuralGroups.clear(); |
1446 | NewCandidateGroups.clear(); |
1447 | } |
1448 | } |
1449 | |
1450 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( |
1451 | ArrayRef<std::unique_ptr<Module>> Modules) { |
1452 | resetSimilarityCandidates(); |
1453 | |
1454 | std::vector<IRInstructionData *> InstrList; |
1455 | std::vector<unsigned> IntegerMapping; |
1456 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; |
1457 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; |
1458 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; |
1459 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; |
1460 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; |
1461 | |
1462 | populateMapper(Modules, InstrList, IntegerMapping); |
1463 | findCandidates(InstrList, IntegerMapping); |
1464 | |
1465 | return *SimilarityCandidates; |
1466 | } |
1467 | |
1468 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { |
1469 | resetSimilarityCandidates(); |
1470 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; |
1471 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; |
1472 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; |
1473 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; |
1474 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; |
1475 | |
1476 | std::vector<IRInstructionData *> InstrList; |
1477 | std::vector<unsigned> IntegerMapping; |
1478 | |
1479 | populateMapper(M, InstrList, IntegerMapping); |
1480 | findCandidates(InstrList, IntegerMapping); |
1481 | |
1482 | return *SimilarityCandidates; |
1483 | } |
1484 | |
1485 | INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier" , |
1486 | "ir-similarity-identifier" , false, true) |
1487 | |
1488 | IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() |
1489 | : ModulePass(ID) { |
1490 | initializeIRSimilarityIdentifierWrapperPassPass( |
1491 | Registry&: *PassRegistry::getPassRegistry()); |
1492 | } |
1493 | |
1494 | bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { |
1495 | IRSI.reset(p: new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, |
1496 | MatchCallsByName, !DisableIntrinsics, |
1497 | false)); |
1498 | return false; |
1499 | } |
1500 | |
1501 | bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { |
1502 | IRSI.reset(); |
1503 | return false; |
1504 | } |
1505 | |
1506 | bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { |
1507 | IRSI->findSimilarity(M); |
1508 | return false; |
1509 | } |
1510 | |
1511 | AnalysisKey IRSimilarityAnalysis::Key; |
1512 | IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, |
1513 | ModuleAnalysisManager &) { |
1514 | auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, |
1515 | MatchCallsByName, !DisableIntrinsics, |
1516 | false); |
1517 | IRSI.findSimilarity(M); |
1518 | return IRSI; |
1519 | } |
1520 | |
1521 | PreservedAnalyses |
1522 | IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { |
1523 | IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(IR&: M); |
1524 | std::optional<SimilarityGroupList> &SimilarityCandidatesOpt = |
1525 | IRSI.getSimilarity(); |
1526 | |
1527 | for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { |
1528 | OS << CandVec.size() << " candidates of length " |
1529 | << CandVec.begin()->getLength() << ". Found in: \n" ; |
1530 | for (IRSimilarityCandidate &Cand : CandVec) { |
1531 | OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() |
1532 | << ", Basic Block: " ; |
1533 | if (Cand.front()->Inst->getParent()->getName().str() == "" ) |
1534 | OS << "(unnamed)" ; |
1535 | else |
1536 | OS << Cand.front()->Inst->getParent()->getName().str(); |
1537 | OS << "\n Start Instruction: " ; |
1538 | Cand.frontInstruction()->print(O&: OS); |
1539 | OS << "\n End Instruction: " ; |
1540 | Cand.backInstruction()->print(O&: OS); |
1541 | OS << "\n" ; |
1542 | } |
1543 | } |
1544 | |
1545 | return PreservedAnalyses::all(); |
1546 | } |
1547 | |
1548 | char IRSimilarityIdentifierWrapperPass::ID = 0; |
1549 | |