1 | //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // \file |
10 | // Implementation file for the IRSimilarityIdentifier for identifying |
11 | // similarities in IR including the IRInstructionMapper. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/Analysis/IRSimilarityIdentifier.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/SetOperations.h" |
18 | #include "llvm/IR/Intrinsics.h" |
19 | #include "llvm/IR/Operator.h" |
20 | #include "llvm/IR/User.h" |
21 | #include "llvm/InitializePasses.h" |
22 | #include "llvm/Support/SuffixTree.h" |
23 | |
24 | using namespace llvm; |
25 | using namespace IRSimilarity; |
26 | |
27 | namespace llvm { |
28 | cl::opt<bool> |
29 | DisableBranches("no-ir-sim-branch-matching" , cl::init(Val: false), |
30 | cl::ReallyHidden, |
31 | cl::desc("disable similarity matching, and outlining, " |
32 | "across branches for debugging purposes." )); |
33 | |
34 | cl::opt<bool> |
35 | DisableIndirectCalls("no-ir-sim-indirect-calls" , cl::init(Val: false), |
36 | cl::ReallyHidden, |
37 | cl::desc("disable outlining indirect calls." )); |
38 | |
39 | static cl::opt<bool> |
40 | MatchCallsByName("ir-sim-calls-by-name" , cl::init(Val: false), cl::ReallyHidden, |
41 | cl::desc("only allow matching call instructions if the " |
42 | "name and type signature match." )); |
43 | |
44 | cl::opt<bool> |
45 | DisableIntrinsics("no-ir-sim-intrinsics" , cl::init(Val: false), cl::ReallyHidden, |
46 | cl::desc("Don't match or outline intrinsics" )); |
47 | } // namespace llvm |
48 | |
49 | IRInstructionData::IRInstructionData(Instruction &I, bool Legality, |
50 | IRInstructionDataList &IDList) |
51 | : Inst(&I), Legal(Legality), IDL(&IDList) { |
52 | initializeInstruction(); |
53 | } |
54 | |
55 | void IRInstructionData::initializeInstruction() { |
56 | // We check for whether we have a comparison instruction. If it is, we |
57 | // find the "less than" version of the predicate for consistency for |
58 | // comparison instructions throught the program. |
59 | if (CmpInst *C = dyn_cast<CmpInst>(Val: Inst)) { |
60 | CmpInst::Predicate Predicate = predicateForConsistency(CI: C); |
61 | if (Predicate != C->getPredicate()) |
62 | RevisedPredicate = Predicate; |
63 | } |
64 | |
65 | // Here we collect the operands and their types for determining whether |
66 | // the structure of the operand use matches between two different candidates. |
67 | for (Use &OI : Inst->operands()) { |
68 | if (isa<CmpInst>(Val: Inst) && RevisedPredicate) { |
69 | // If we have a CmpInst where the predicate is reversed, it means the |
70 | // operands must be reversed as well. |
71 | OperVals.insert(I: OperVals.begin(), Elt: OI.get()); |
72 | continue; |
73 | } |
74 | |
75 | OperVals.push_back(Elt: OI.get()); |
76 | } |
77 | |
78 | // We capture the incoming BasicBlocks as values as well as the incoming |
79 | // Values in order to check for structural similarity. |
80 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) |
81 | llvm::append_range(C&: OperVals, R: PN->blocks()); |
82 | } |
83 | |
84 | IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) |
85 | : IDL(&IDList) {} |
86 | |
87 | void IRInstructionData::setBranchSuccessors( |
88 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { |
89 | assert(isa<BranchInst>(Inst) && "Instruction must be branch" ); |
90 | |
91 | BranchInst *BI = cast<BranchInst>(Val: Inst); |
92 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; |
93 | |
94 | BBNumIt = BasicBlockToInteger.find(Val: BI->getParent()); |
95 | assert(BBNumIt != BasicBlockToInteger.end() && |
96 | "Could not find location for BasicBlock!" ); |
97 | |
98 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); |
99 | |
100 | for (Value *V : getBlockOperVals()) { |
101 | BasicBlock *Successor = cast<BasicBlock>(Val: V); |
102 | BBNumIt = BasicBlockToInteger.find(Val: Successor); |
103 | assert(BBNumIt != BasicBlockToInteger.end() && |
104 | "Could not find number for BasicBlock!" ); |
105 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); |
106 | |
107 | int Relative = OtherBlockNumber - CurrentBlockNumber; |
108 | RelativeBlockLocations.push_back(Elt: Relative); |
109 | } |
110 | } |
111 | |
112 | ArrayRef<Value *> IRInstructionData::getBlockOperVals() { |
113 | assert((isa<BranchInst>(Inst) || |
114 | isa<PHINode>(Inst)) && "Instruction must be branch or PHINode" ); |
115 | |
116 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: Inst)) |
117 | return ArrayRef<Value *>( |
118 | std::next(x: OperVals.begin(), n: BI->isConditional() ? 1 : 0), |
119 | OperVals.end() |
120 | ); |
121 | |
122 | if (PHINode *PN = dyn_cast<PHINode>(Val: Inst)) |
123 | return ArrayRef<Value *>( |
124 | std::next(x: OperVals.begin(), n: PN->getNumIncomingValues()), |
125 | OperVals.end() |
126 | ); |
127 | |
128 | return ArrayRef<Value *>(); |
129 | } |
130 | |
131 | void IRInstructionData::setCalleeName(bool MatchByName) { |
132 | CallInst *CI = dyn_cast<CallInst>(Val: Inst); |
133 | assert(CI && "Instruction must be call" ); |
134 | |
135 | CalleeName = "" ; |
136 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) { |
137 | // To hash intrinsics, we use the opcode, and types like the other |
138 | // instructions, but also, the Intrinsic ID, and the Name of the |
139 | // intrinsic. |
140 | Intrinsic::ID IntrinsicID = II->getIntrinsicID(); |
141 | FunctionType *FT = II->getFunctionType(); |
142 | // If there is an overloaded name, we have to use the complex version |
143 | // of getName to get the entire string. |
144 | if (Intrinsic::isOverloaded(id: IntrinsicID)) |
145 | CalleeName = |
146 | Intrinsic::getName(Id: IntrinsicID, Tys: FT->params(), M: II->getModule(), FT); |
147 | // If there is not an overloaded name, we only need to use this version. |
148 | else |
149 | CalleeName = Intrinsic::getName(id: IntrinsicID).str(); |
150 | |
151 | return; |
152 | } |
153 | |
154 | if (!CI->isIndirectCall() && MatchByName) |
155 | CalleeName = CI->getCalledFunction()->getName().str(); |
156 | } |
157 | |
158 | void IRInstructionData::setPHIPredecessors( |
159 | DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { |
160 | assert(isa<PHINode>(Inst) && "Instruction must be phi node" ); |
161 | |
162 | PHINode *PN = cast<PHINode>(Val: Inst); |
163 | DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; |
164 | |
165 | BBNumIt = BasicBlockToInteger.find(Val: PN->getParent()); |
166 | assert(BBNumIt != BasicBlockToInteger.end() && |
167 | "Could not find location for BasicBlock!" ); |
168 | |
169 | int CurrentBlockNumber = static_cast<int>(BBNumIt->second); |
170 | |
171 | // Convert the incoming blocks of the PHINode to an integer value, based on |
172 | // the relative distances between the current block and the incoming block. |
173 | for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { |
174 | BasicBlock *Incoming = PN->getIncomingBlock(i: Idx); |
175 | BBNumIt = BasicBlockToInteger.find(Val: Incoming); |
176 | assert(BBNumIt != BasicBlockToInteger.end() && |
177 | "Could not find number for BasicBlock!" ); |
178 | int OtherBlockNumber = static_cast<int>(BBNumIt->second); |
179 | |
180 | int Relative = OtherBlockNumber - CurrentBlockNumber; |
181 | RelativeBlockLocations.push_back(Elt: Relative); |
182 | } |
183 | } |
184 | |
185 | CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { |
186 | switch (CI->getPredicate()) { |
187 | case CmpInst::FCMP_OGT: |
188 | case CmpInst::FCMP_UGT: |
189 | case CmpInst::FCMP_OGE: |
190 | case CmpInst::FCMP_UGE: |
191 | case CmpInst::ICMP_SGT: |
192 | case CmpInst::ICMP_UGT: |
193 | case CmpInst::ICMP_SGE: |
194 | case CmpInst::ICMP_UGE: |
195 | return CI->getSwappedPredicate(); |
196 | default: |
197 | return CI->getPredicate(); |
198 | } |
199 | } |
200 | |
201 | CmpInst::Predicate IRInstructionData::getPredicate() const { |
202 | assert(isa<CmpInst>(Inst) && |
203 | "Can only get a predicate from a compare instruction" ); |
204 | |
205 | if (RevisedPredicate) |
206 | return *RevisedPredicate; |
207 | |
208 | return cast<CmpInst>(Val: Inst)->getPredicate(); |
209 | } |
210 | |
211 | StringRef IRInstructionData::getCalleeName() const { |
212 | assert(isa<CallInst>(Inst) && |
213 | "Can only get a name from a call instruction" ); |
214 | |
215 | assert(CalleeName && "CalleeName has not been set" ); |
216 | |
217 | return *CalleeName; |
218 | } |
219 | |
220 | bool IRSimilarity::isClose(const IRInstructionData &A, |
221 | const IRInstructionData &B) { |
222 | |
223 | if (!A.Legal || !B.Legal) |
224 | return false; |
225 | |
226 | // Check if we are performing the same sort of operation on the same types |
227 | // but not on the same values. |
228 | if (!A.Inst->isSameOperationAs(I: B.Inst)) { |
229 | // If there is a predicate, this means that either there is a swapped |
230 | // predicate, or that the types are different, we want to make sure that |
231 | // the predicates are equivalent via swapping. |
232 | if (isa<CmpInst>(Val: A.Inst) && isa<CmpInst>(Val: B.Inst)) { |
233 | |
234 | if (A.getPredicate() != B.getPredicate()) |
235 | return false; |
236 | |
237 | // If the predicates are the same via swap, make sure that the types are |
238 | // still the same. |
239 | auto ZippedTypes = zip(t: A.OperVals, u: B.OperVals); |
240 | |
241 | return all_of( |
242 | Range&: ZippedTypes, P: [](std::tuple<llvm::Value *, llvm::Value *> R) { |
243 | return std::get<0>(t&: R)->getType() == std::get<1>(t&: R)->getType(); |
244 | }); |
245 | } |
246 | |
247 | return false; |
248 | } |
249 | |
250 | // Since any GEP Instruction operands after the first operand cannot be |
251 | // defined by a register, we must make sure that the operands after the first |
252 | // are the same in the two instructions |
253 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: A.Inst)) { |
254 | auto *OtherGEP = cast<GetElementPtrInst>(Val: B.Inst); |
255 | |
256 | // If the instructions do not have the same inbounds restrictions, we do |
257 | // not consider them the same. |
258 | if (GEP->isInBounds() != OtherGEP->isInBounds()) |
259 | return false; |
260 | |
261 | auto ZippedOperands = zip(t: GEP->indices(), u: OtherGEP->indices()); |
262 | |
263 | // We increment here since we do not care about the first instruction, |
264 | // we only care about the following operands since they must be the |
265 | // exact same to be considered similar. |
266 | return all_of(Range: drop_begin(RangeOrContainer&: ZippedOperands), |
267 | P: [](std::tuple<llvm::Use &, llvm::Use &> R) { |
268 | return std::get<0>(t&: R) == std::get<1>(t&: R); |
269 | }); |
270 | } |
271 | |
272 | // If the instructions are functions calls, we make sure that the function |
273 | // name is the same. We already know that the types are since is |
274 | // isSameOperationAs is true. |
275 | if (isa<CallInst>(Val: A.Inst) && isa<CallInst>(Val: B.Inst)) { |
276 | if (A.getCalleeName() != B.getCalleeName()) |
277 | return false; |
278 | } |
279 | |
280 | if (isa<BranchInst>(Val: A.Inst) && isa<BranchInst>(Val: B.Inst) && |
281 | A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) |
282 | return false; |
283 | |
284 | return true; |
285 | } |
286 | |
287 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
288 | // into the same interface. |
289 | void IRInstructionMapper::convertToUnsignedVec( |
290 | BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, |
291 | std::vector<unsigned> &IntegerMapping) { |
292 | BasicBlock::iterator It = BB.begin(); |
293 | |
294 | std::vector<unsigned> IntegerMappingForBB; |
295 | std::vector<IRInstructionData *> InstrListForBB; |
296 | |
297 | for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { |
298 | switch (InstClassifier.visit(I&: *It)) { |
299 | case InstrType::Legal: |
300 | mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); |
301 | break; |
302 | case InstrType::Illegal: |
303 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); |
304 | break; |
305 | case InstrType::Invisible: |
306 | AddedIllegalLastTime = false; |
307 | break; |
308 | } |
309 | } |
310 | |
311 | if (AddedIllegalLastTime) |
312 | mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, End: true); |
313 | for (IRInstructionData *ID : InstrListForBB) |
314 | this->IDL->push_back(Node&: *ID); |
315 | llvm::append_range(C&: InstrList, R&: InstrListForBB); |
316 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForBB); |
317 | } |
318 | |
319 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
320 | // into the same interface. |
321 | unsigned IRInstructionMapper::mapToLegalUnsigned( |
322 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
323 | std::vector<IRInstructionData *> &InstrListForBB) { |
324 | // We added something legal, so we should unset the AddedLegalLastTime |
325 | // flag. |
326 | AddedIllegalLastTime = false; |
327 | |
328 | // If we have at least two adjacent legal instructions (which may have |
329 | // invisible instructions in between), remember that. |
330 | if (CanCombineWithPrevInstr) |
331 | HaveLegalRange = true; |
332 | CanCombineWithPrevInstr = true; |
333 | |
334 | // Get the integer for this instruction or give it the current |
335 | // LegalInstrNumber. |
336 | IRInstructionData *ID = allocateIRInstructionData(I&: *It, Legality: true, IDL&: *IDL); |
337 | InstrListForBB.push_back(x: ID); |
338 | |
339 | if (isa<BranchInst>(Val: *It)) |
340 | ID->setBranchSuccessors(BasicBlockToInteger); |
341 | |
342 | if (isa<CallInst>(Val: *It)) |
343 | ID->setCalleeName(EnableMatchCallsByName); |
344 | |
345 | if (isa<PHINode>(Val: *It)) |
346 | ID->setPHIPredecessors(BasicBlockToInteger); |
347 | |
348 | // Add to the instruction list |
349 | bool WasInserted; |
350 | DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator |
351 | ResultIt; |
352 | std::tie(args&: ResultIt, args&: WasInserted) = |
353 | InstructionIntegerMap.insert(KV: std::make_pair(x&: ID, y&: LegalInstrNumber)); |
354 | unsigned INumber = ResultIt->second; |
355 | |
356 | // There was an insertion. |
357 | if (WasInserted) |
358 | LegalInstrNumber++; |
359 | |
360 | IntegerMappingForBB.push_back(x: INumber); |
361 | |
362 | // Make sure we don't overflow or use any integers reserved by the DenseMap. |
363 | assert(LegalInstrNumber < IllegalInstrNumber && |
364 | "Instruction mapping overflow!" ); |
365 | |
366 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
367 | "Tried to assign DenseMap tombstone or empty key to instruction." ); |
368 | assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
369 | "Tried to assign DenseMap tombstone or empty key to instruction." ); |
370 | |
371 | return INumber; |
372 | } |
373 | |
374 | IRInstructionData * |
375 | IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, |
376 | IRInstructionDataList &IDL) { |
377 | return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); |
378 | } |
379 | |
380 | IRInstructionData * |
381 | IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { |
382 | return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); |
383 | } |
384 | |
385 | IRInstructionDataList * |
386 | IRInstructionMapper::allocateIRInstructionDataList() { |
387 | return new (IDLAllocator->Allocate()) IRInstructionDataList(); |
388 | } |
389 | |
390 | // TODO: This is the same as the MachineOutliner, and should be consolidated |
391 | // into the same interface. |
392 | unsigned IRInstructionMapper::mapToIllegalUnsigned( |
393 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
394 | std::vector<IRInstructionData *> &InstrListForBB, bool End) { |
395 | // Can't combine an illegal instruction. Set the flag. |
396 | CanCombineWithPrevInstr = false; |
397 | |
398 | // Only add one illegal number per range of legal numbers. |
399 | if (AddedIllegalLastTime) |
400 | return IllegalInstrNumber; |
401 | |
402 | IRInstructionData *ID = nullptr; |
403 | if (!End) |
404 | ID = allocateIRInstructionData(I&: *It, Legality: false, IDL&: *IDL); |
405 | else |
406 | ID = allocateIRInstructionData(IDL&: *IDL); |
407 | InstrListForBB.push_back(x: ID); |
408 | |
409 | // Remember that we added an illegal number last time. |
410 | AddedIllegalLastTime = true; |
411 | unsigned INumber = IllegalInstrNumber; |
412 | IntegerMappingForBB.push_back(x: IllegalInstrNumber--); |
413 | |
414 | assert(LegalInstrNumber < IllegalInstrNumber && |
415 | "Instruction mapping overflow!" ); |
416 | |
417 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && |
418 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!" ); |
419 | |
420 | assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && |
421 | "IllegalInstrNumber cannot be DenseMap tombstone or empty key!" ); |
422 | |
423 | return INumber; |
424 | } |
425 | |
426 | IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, |
427 | IRInstructionData *FirstInstIt, |
428 | IRInstructionData *LastInstIt) |
429 | : StartIdx(StartIdx), Len(Len) { |
430 | |
431 | assert(FirstInstIt != nullptr && "Instruction is nullptr!" ); |
432 | assert(LastInstIt != nullptr && "Instruction is nullptr!" ); |
433 | assert(StartIdx + Len > StartIdx && |
434 | "Overflow for IRSimilarityCandidate range?" ); |
435 | assert(Len - 1 == static_cast<unsigned>(std::distance( |
436 | iterator(FirstInstIt), iterator(LastInstIt))) && |
437 | "Length of the first and last IRInstructionData do not match the " |
438 | "given length" ); |
439 | |
440 | // We iterate over the given instructions, and map each unique value |
441 | // to a unique number in the IRSimilarityCandidate ValueToNumber and |
442 | // NumberToValue maps. A constant get its own value globally, the individual |
443 | // uses of the constants are not considered to be unique. |
444 | // |
445 | // IR: Mapping Added: |
446 | // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 |
447 | // %add2 = add i32 %a, %1 %add2 -> 4 |
448 | // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 |
449 | // |
450 | // when replace with global values, starting from 1, would be |
451 | // |
452 | // 3 = add i32 1, 2 |
453 | // 4 = add i32 1, 3 |
454 | // 6 = add i32 5, 2 |
455 | unsigned LocalValNumber = 1; |
456 | IRInstructionDataList::iterator ID = iterator(*FirstInstIt); |
457 | for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { |
458 | // Map the operand values to an unsigned integer if it does not already |
459 | // have an unsigned integer assigned to it. |
460 | for (Value *Arg : ID->OperVals) |
461 | if (ValueToNumber.try_emplace(Key: Arg, Args&: LocalValNumber).second) { |
462 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: Arg); |
463 | LocalValNumber++; |
464 | } |
465 | |
466 | // Mapping the instructions to an unsigned integer if it is not already |
467 | // exist in the mapping. |
468 | if (ValueToNumber.try_emplace(Key: ID->Inst, Args&: LocalValNumber).second) { |
469 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: ID->Inst); |
470 | LocalValNumber++; |
471 | } |
472 | } |
473 | |
474 | // Setting the first and last instruction data pointers for the candidate. If |
475 | // we got through the entire for loop without hitting an assert, we know |
476 | // that both of these instructions are not nullptrs. |
477 | FirstInst = FirstInstIt; |
478 | LastInst = LastInstIt; |
479 | |
480 | // Add the basic blocks contained in the set into the global value numbering. |
481 | DenseSet<BasicBlock *> BBSet; |
482 | getBasicBlocks(BBSet); |
483 | for (BasicBlock *BB : BBSet) { |
484 | if (ValueToNumber.try_emplace(Key: BB, Args&: LocalValNumber).second) { |
485 | NumberToValue.try_emplace(Key: LocalValNumber, Args&: BB); |
486 | LocalValNumber++; |
487 | } |
488 | } |
489 | } |
490 | |
491 | bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, |
492 | const IRSimilarityCandidate &B) { |
493 | if (A.getLength() != B.getLength()) |
494 | return false; |
495 | |
496 | auto InstrDataForBoth = |
497 | zip(t: make_range(x: A.begin(), y: A.end()), u: make_range(x: B.begin(), y: B.end())); |
498 | |
499 | return all_of(Range&: InstrDataForBoth, |
500 | P: [](std::tuple<IRInstructionData &, IRInstructionData &> R) { |
501 | IRInstructionData &A = std::get<0>(t&: R); |
502 | IRInstructionData &B = std::get<1>(t&: R); |
503 | if (!A.Legal || !B.Legal) |
504 | return false; |
505 | return isClose(A, B); |
506 | }); |
507 | } |
508 | |
509 | /// Determine if one or more of the assigned global value numbers for the |
510 | /// operands in \p TargetValueNumbers is in the current mapping set for operand |
511 | /// numbers in \p SourceOperands. The set of possible corresponding global |
512 | /// value numbers are replaced with the most recent version of compatible |
513 | /// values. |
514 | /// |
515 | /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global |
516 | /// value number for the source IRInstructionCandidate. |
517 | /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source |
518 | /// IRSimilarityCandidate global value numbers to a set of possible numbers in |
519 | /// the target. |
520 | /// \param [in] SourceOperands - The operands in the original |
521 | /// IRSimilarityCandidate in the current instruction. |
522 | /// \param [in] TargetValueNumbers - The global value numbers of the operands in |
523 | /// the corresponding Instruction in the other IRSimilarityCandidate. |
524 | /// \returns true if there exists a possible mapping between the source |
525 | /// Instruction operands and the target Instruction operands, and false if not. |
526 | static bool checkNumberingAndReplaceCommutative( |
527 | const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, |
528 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, |
529 | ArrayRef<Value *> &SourceOperands, |
530 | DenseSet<unsigned> &TargetValueNumbers){ |
531 | |
532 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; |
533 | |
534 | unsigned ArgVal; |
535 | bool WasInserted; |
536 | |
537 | // Iterate over the operands in the source IRSimilarityCandidate to determine |
538 | // whether there exists an operand in the other IRSimilarityCandidate that |
539 | // creates a valid mapping of Value to Value between the |
540 | // IRSimilarityCaniddates. |
541 | for (Value *V : SourceOperands) { |
542 | ArgVal = SourceValueToNumberMapping.find(Val: V)->second; |
543 | |
544 | // Instead of finding a current mapping, we attempt to insert a set. |
545 | std::tie(args&: ValueMappingIt, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( |
546 | KV: std::make_pair(x&: ArgVal, y&: TargetValueNumbers)); |
547 | |
548 | // We need to iterate over the items in other IRSimilarityCandidate's |
549 | // Instruction to determine whether there is a valid mapping of |
550 | // Value to Value. |
551 | DenseSet<unsigned> NewSet; |
552 | for (unsigned &Curr : ValueMappingIt->second) |
553 | // If we can find the value in the mapping, we add it to the new set. |
554 | if (TargetValueNumbers.contains(V: Curr)) |
555 | NewSet.insert(V: Curr); |
556 | |
557 | // If we could not find a Value, return 0. |
558 | if (NewSet.empty()) |
559 | return false; |
560 | |
561 | // Otherwise replace the old mapping with the newly constructed one. |
562 | if (NewSet.size() != ValueMappingIt->second.size()) |
563 | ValueMappingIt->second.swap(RHS&: NewSet); |
564 | |
565 | // We have reached no conclusions about the mapping, and cannot remove |
566 | // any items from the other operands, so we move to check the next operand. |
567 | if (ValueMappingIt->second.size() != 1) |
568 | continue; |
569 | |
570 | unsigned ValToRemove = *ValueMappingIt->second.begin(); |
571 | // When there is only one item left in the mapping for and operand, remove |
572 | // the value from the other operands. If it results in there being no |
573 | // mapping, return false, it means the mapping is wrong |
574 | for (Value *InnerV : SourceOperands) { |
575 | if (V == InnerV) |
576 | continue; |
577 | |
578 | unsigned InnerVal = SourceValueToNumberMapping.find(Val: InnerV)->second; |
579 | ValueMappingIt = CurrentSrcTgtNumberMapping.find(Val: InnerVal); |
580 | if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) |
581 | continue; |
582 | |
583 | ValueMappingIt->second.erase(V: ValToRemove); |
584 | if (ValueMappingIt->second.empty()) |
585 | return false; |
586 | } |
587 | } |
588 | |
589 | return true; |
590 | } |
591 | |
592 | /// Determine if operand number \p TargetArgVal is in the current mapping set |
593 | /// for operand number \p SourceArgVal. |
594 | /// |
595 | /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global |
596 | /// value numbers from source IRSimilarityCandidate to target |
597 | /// IRSimilarityCandidate. |
598 | /// \param [in] SourceArgVal The global value number for an operand in the |
599 | /// in the original candidate. |
600 | /// \param [in] TargetArgVal The global value number for the corresponding |
601 | /// operand in the other candidate. |
602 | /// \returns True if there exists a mapping and false if not. |
603 | bool checkNumberingAndReplace( |
604 | DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, |
605 | unsigned SourceArgVal, unsigned TargetArgVal) { |
606 | // We are given two unsigned integers representing the global values of |
607 | // the operands in different IRSimilarityCandidates and a current mapping |
608 | // between the two. |
609 | // |
610 | // Source Operand GVN: 1 |
611 | // Target Operand GVN: 2 |
612 | // CurrentMapping: {1: {1, 2}} |
613 | // |
614 | // Since we have mapping, and the target operand is contained in the set, we |
615 | // update it to: |
616 | // CurrentMapping: {1: {2}} |
617 | // and can return true. But, if the mapping was |
618 | // CurrentMapping: {1: {3}} |
619 | // we would return false. |
620 | |
621 | bool WasInserted; |
622 | DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; |
623 | |
624 | std::tie(args&: Val, args&: WasInserted) = CurrentSrcTgtNumberMapping.insert( |
625 | KV: std::make_pair(x&: SourceArgVal, y: DenseSet<unsigned>({TargetArgVal}))); |
626 | |
627 | // If we created a new mapping, then we are done. |
628 | if (WasInserted) |
629 | return true; |
630 | |
631 | // If there is more than one option in the mapping set, and the target value |
632 | // is included in the mapping set replace that set with one that only includes |
633 | // the target value, as it is the only valid mapping via the non commutative |
634 | // instruction. |
635 | |
636 | DenseSet<unsigned> &TargetSet = Val->second; |
637 | if (TargetSet.size() > 1 && TargetSet.contains(V: TargetArgVal)) { |
638 | TargetSet.clear(); |
639 | TargetSet.insert(V: TargetArgVal); |
640 | return true; |
641 | } |
642 | |
643 | // Return true if we can find the value in the set. |
644 | return TargetSet.contains(V: TargetArgVal); |
645 | } |
646 | |
647 | bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( |
648 | OperandMapping A, OperandMapping B) { |
649 | // Iterators to keep track of where we are in the operands for each |
650 | // Instruction. |
651 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); |
652 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); |
653 | unsigned OperandLength = A.OperVals.size(); |
654 | |
655 | // For each operand, get the value numbering and ensure it is consistent. |
656 | for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { |
657 | unsigned OperValA = A.IRSC.ValueToNumber.find(Val: *VItA)->second; |
658 | unsigned OperValB = B.IRSC.ValueToNumber.find(Val: *VItB)->second; |
659 | |
660 | // Attempt to add a set with only the target value. If there is no mapping |
661 | // we can create it here. |
662 | // |
663 | // For an instruction like a subtraction: |
664 | // IRSimilarityCandidateA: IRSimilarityCandidateB: |
665 | // %resultA = sub %a, %b %resultB = sub %d, %e |
666 | // |
667 | // We map %a -> %d and %b -> %e. |
668 | // |
669 | // And check to see whether their mapping is consistent in |
670 | // checkNumberingAndReplace. |
671 | |
672 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceArgVal: OperValA, TargetArgVal: OperValB)) |
673 | return false; |
674 | |
675 | if (!checkNumberingAndReplace(CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceArgVal: OperValB, TargetArgVal: OperValA)) |
676 | return false; |
677 | } |
678 | return true; |
679 | } |
680 | |
681 | bool IRSimilarityCandidate::compareCommutativeOperandMapping( |
682 | OperandMapping A, OperandMapping B) { |
683 | DenseSet<unsigned> ValueNumbersA; |
684 | DenseSet<unsigned> ValueNumbersB; |
685 | |
686 | ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); |
687 | ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); |
688 | unsigned OperandLength = A.OperVals.size(); |
689 | |
690 | // Find the value number sets for the operands. |
691 | for (unsigned Idx = 0; Idx < OperandLength; |
692 | Idx++, VItA++, VItB++) { |
693 | ValueNumbersA.insert(V: A.IRSC.ValueToNumber.find(Val: *VItA)->second); |
694 | ValueNumbersB.insert(V: B.IRSC.ValueToNumber.find(Val: *VItB)->second); |
695 | } |
696 | |
697 | // Iterate over the operands in the first IRSimilarityCandidate and make sure |
698 | // there exists a possible mapping with the operands in the second |
699 | // IRSimilarityCandidate. |
700 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: A.IRSC.ValueToNumber, |
701 | CurrentSrcTgtNumberMapping&: A.ValueNumberMapping, SourceOperands&: A.OperVals, |
702 | TargetValueNumbers&: ValueNumbersB)) |
703 | return false; |
704 | |
705 | // Iterate over the operands in the second IRSimilarityCandidate and make sure |
706 | // there exists a possible mapping with the operands in the first |
707 | // IRSimilarityCandidate. |
708 | if (!checkNumberingAndReplaceCommutative(SourceValueToNumberMapping: B.IRSC.ValueToNumber, |
709 | CurrentSrcTgtNumberMapping&: B.ValueNumberMapping, SourceOperands&: B.OperVals, |
710 | TargetValueNumbers&: ValueNumbersA)) |
711 | return false; |
712 | |
713 | return true; |
714 | } |
715 | |
716 | bool IRSimilarityCandidate::compareAssignmentMapping( |
717 | const unsigned InstValA, const unsigned &InstValB, |
718 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
719 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { |
720 | DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; |
721 | bool WasInserted; |
722 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( |
723 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); |
724 | if (!WasInserted && !ValueMappingIt->second.contains(V: InstValB)) |
725 | return false; |
726 | else if (ValueMappingIt->second.size() != 1) { |
727 | for (unsigned OtherVal : ValueMappingIt->second) { |
728 | if (OtherVal == InstValB) |
729 | continue; |
730 | auto OtherValIt = ValueNumberMappingA.find(Val: OtherVal); |
731 | if (OtherValIt == ValueNumberMappingA.end()) |
732 | continue; |
733 | OtherValIt->second.erase(V: InstValA); |
734 | } |
735 | ValueNumberMappingA.erase(I: ValueMappingIt); |
736 | std::tie(args&: ValueMappingIt, args&: WasInserted) = ValueNumberMappingA.insert( |
737 | KV: std::make_pair(x: InstValA, y: DenseSet<unsigned>({InstValB}))); |
738 | } |
739 | |
740 | return true; |
741 | } |
742 | |
743 | bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, |
744 | RelativeLocMapping B) { |
745 | // Get the basic blocks the label refers to. |
746 | BasicBlock *ABB = cast<BasicBlock>(Val: A.OperVal); |
747 | BasicBlock *BBB = cast<BasicBlock>(Val: B.OperVal); |
748 | |
749 | // Get the basic blocks contained in each region. |
750 | DenseSet<BasicBlock *> BasicBlockA; |
751 | DenseSet<BasicBlock *> BasicBlockB; |
752 | A.IRSC.getBasicBlocks(BBSet&: BasicBlockA); |
753 | B.IRSC.getBasicBlocks(BBSet&: BasicBlockB); |
754 | |
755 | // Determine if the block is contained in the region. |
756 | bool AContained = BasicBlockA.contains(V: ABB); |
757 | bool BContained = BasicBlockB.contains(V: BBB); |
758 | |
759 | // Both blocks need to be contained in the region, or both need to be outside |
760 | // the region. |
761 | if (AContained != BContained) |
762 | return false; |
763 | |
764 | // If both are contained, then we need to make sure that the relative |
765 | // distance to the target blocks are the same. |
766 | if (AContained) |
767 | return A.RelativeLocation == B.RelativeLocation; |
768 | return true; |
769 | } |
770 | |
771 | bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, |
772 | const IRSimilarityCandidate &B) { |
773 | DenseMap<unsigned, DenseSet<unsigned>> MappingA; |
774 | DenseMap<unsigned, DenseSet<unsigned>> MappingB; |
775 | return IRSimilarityCandidate::compareStructure(A, B, ValueNumberMappingA&: MappingA, ValueNumberMappingB&: MappingB); |
776 | } |
777 | |
778 | typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, |
779 | SmallVector<int, 4> &, ArrayRef<Value *> &, |
780 | ArrayRef<Value *> &> |
781 | ZippedRelativeLocationsT; |
782 | |
783 | bool IRSimilarityCandidate::compareStructure( |
784 | const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, |
785 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
786 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { |
787 | if (A.getLength() != B.getLength()) |
788 | return false; |
789 | |
790 | if (A.ValueToNumber.size() != B.ValueToNumber.size()) |
791 | return false; |
792 | |
793 | iterator ItA = A.begin(); |
794 | iterator ItB = B.begin(); |
795 | |
796 | // These ValueNumber Mapping sets create a create a mapping between the values |
797 | // in one candidate to values in the other candidate. If we create a set with |
798 | // one element, and that same element maps to the original element in the |
799 | // candidate we have a good mapping. |
800 | |
801 | // Iterate over the instructions contained in each candidate |
802 | unsigned SectionLength = A.getStartIdx() + A.getLength(); |
803 | for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; |
804 | ItA++, ItB++, Loc++) { |
805 | // Make sure the instructions are similar to one another. |
806 | if (!isClose(A: *ItA, B: *ItB)) |
807 | return false; |
808 | |
809 | Instruction *IA = ItA->Inst; |
810 | Instruction *IB = ItB->Inst; |
811 | |
812 | if (!ItA->Legal || !ItB->Legal) |
813 | return false; |
814 | |
815 | // Get the operand sets for the instructions. |
816 | ArrayRef<Value *> OperValsA = ItA->OperVals; |
817 | ArrayRef<Value *> OperValsB = ItB->OperVals; |
818 | |
819 | unsigned InstValA = A.ValueToNumber.find(Val: IA)->second; |
820 | unsigned InstValB = B.ValueToNumber.find(Val: IB)->second; |
821 | |
822 | // Ensure that the mappings for the instructions exists. |
823 | if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, |
824 | ValueNumberMappingB)) |
825 | return false; |
826 | |
827 | if (!compareAssignmentMapping(InstValA: InstValB, InstValB: InstValA, ValueNumberMappingA&: ValueNumberMappingB, |
828 | ValueNumberMappingB&: ValueNumberMappingA)) |
829 | return false; |
830 | |
831 | // We have different paths for commutative instructions and non-commutative |
832 | // instructions since commutative instructions could allow multiple mappings |
833 | // to certain values. |
834 | if (IA->isCommutative() && !isa<FPMathOperator>(Val: IA) && |
835 | !isa<IntrinsicInst>(Val: IA)) { |
836 | if (!compareCommutativeOperandMapping( |
837 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, |
838 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) |
839 | return false; |
840 | continue; |
841 | } |
842 | |
843 | // Handle the non-commutative cases. |
844 | if (!compareNonCommutativeOperandMapping( |
845 | A: {.IRSC: A, .OperVals: OperValsA, .ValueNumberMapping: ValueNumberMappingA}, |
846 | B: {.IRSC: B, .OperVals: OperValsB, .ValueNumberMapping: ValueNumberMappingB})) |
847 | return false; |
848 | |
849 | // Here we check that between two corresponding instructions, |
850 | // when referring to a basic block in the same region, the |
851 | // relative locations are the same. And, that the instructions refer to |
852 | // basic blocks outside the region in the same corresponding locations. |
853 | |
854 | // We are able to make the assumption about blocks outside of the region |
855 | // since the target block labels are considered values and will follow the |
856 | // same number matching that we defined for the other instructions in the |
857 | // region. So, at this point, in each location we target a specific block |
858 | // outside the region, we are targeting a corresponding block in each |
859 | // analagous location in the region we are comparing to. |
860 | if (!(isa<BranchInst>(Val: IA) && isa<BranchInst>(Val: IB)) && |
861 | !(isa<PHINode>(Val: IA) && isa<PHINode>(Val: IB))) |
862 | continue; |
863 | |
864 | SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; |
865 | SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; |
866 | ArrayRef<Value *> ABL = ItA->getBlockOperVals(); |
867 | ArrayRef<Value *> BBL = ItB->getBlockOperVals(); |
868 | |
869 | // Check to make sure that the number of operands, and branching locations |
870 | // between BranchInsts is the same. |
871 | if (RelBlockLocsA.size() != RelBlockLocsB.size() && |
872 | ABL.size() != BBL.size()) |
873 | return false; |
874 | |
875 | assert(RelBlockLocsA.size() == ABL.size() && |
876 | "Block information vectors not the same size." ); |
877 | assert(RelBlockLocsB.size() == BBL.size() && |
878 | "Block information vectors not the same size." ); |
879 | |
880 | ZippedRelativeLocationsT ZippedRelativeLocations = |
881 | zip(t&: RelBlockLocsA, u&: RelBlockLocsB, args&: ABL, args&: BBL); |
882 | if (any_of(Range&: ZippedRelativeLocations, |
883 | P: [&A, &B](std::tuple<int, int, Value *, Value *> R) { |
884 | return !checkRelativeLocations( |
885 | A: {.IRSC: A, .RelativeLocation: std::get<0>(t&: R), .OperVal: std::get<2>(t&: R)}, |
886 | B: {.IRSC: B, .RelativeLocation: std::get<1>(t&: R), .OperVal: std::get<3>(t&: R)}); |
887 | })) |
888 | return false; |
889 | } |
890 | return true; |
891 | } |
892 | |
893 | bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, |
894 | const IRSimilarityCandidate &B) { |
895 | auto DoesOverlap = [](const IRSimilarityCandidate &X, |
896 | const IRSimilarityCandidate &Y) { |
897 | // Check: |
898 | // XXXXXX X starts before Y ends |
899 | // YYYYYYY Y starts after X starts |
900 | return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; |
901 | }; |
902 | |
903 | return DoesOverlap(A, B) || DoesOverlap(B, A); |
904 | } |
905 | |
906 | void IRSimilarityIdentifier::populateMapper( |
907 | Module &M, std::vector<IRInstructionData *> &InstrList, |
908 | std::vector<unsigned> &IntegerMapping) { |
909 | |
910 | std::vector<IRInstructionData *> InstrListForModule; |
911 | std::vector<unsigned> IntegerMappingForModule; |
912 | // Iterate over the functions in the module to map each Instruction in each |
913 | // BasicBlock to an unsigned integer. |
914 | Mapper.initializeForBBs(M); |
915 | |
916 | for (Function &F : M) { |
917 | |
918 | if (F.empty()) |
919 | continue; |
920 | |
921 | for (BasicBlock &BB : F) { |
922 | |
923 | // BB has potential to have similarity since it has a size greater than 2 |
924 | // and can therefore match other regions greater than 2. Map it to a list |
925 | // of unsigned integers. |
926 | Mapper.convertToUnsignedVec(BB, InstrList&: InstrListForModule, |
927 | IntegerMapping&: IntegerMappingForModule); |
928 | } |
929 | |
930 | BasicBlock::iterator It = F.begin()->end(); |
931 | Mapper.mapToIllegalUnsigned(It, IntegerMappingForBB&: IntegerMappingForModule, InstrListForBB&: InstrListForModule, |
932 | End: true); |
933 | if (InstrListForModule.size() > 0) |
934 | Mapper.IDL->push_back(Node&: *InstrListForModule.back()); |
935 | } |
936 | |
937 | // Insert the InstrListForModule at the end of the overall InstrList so that |
938 | // we can have a long InstrList for the entire set of Modules being analyzed. |
939 | llvm::append_range(C&: InstrList, R&: InstrListForModule); |
940 | // Do the same as above, but for IntegerMapping. |
941 | llvm::append_range(C&: IntegerMapping, R&: IntegerMappingForModule); |
942 | } |
943 | |
944 | void IRSimilarityIdentifier::populateMapper( |
945 | ArrayRef<std::unique_ptr<Module>> &Modules, |
946 | std::vector<IRInstructionData *> &InstrList, |
947 | std::vector<unsigned> &IntegerMapping) { |
948 | |
949 | // Iterate over, and map the instructions in each module. |
950 | for (const std::unique_ptr<Module> &M : Modules) |
951 | populateMapper(M&: *M, InstrList, IntegerMapping); |
952 | } |
953 | |
954 | /// From a repeated subsequence, find all the different instances of the |
955 | /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from |
956 | /// the IRInstructionData in subsequence. |
957 | /// |
958 | /// \param [in] Mapper - The instruction mapper for basic correctness checks. |
959 | /// \param [in] InstrList - The vector that holds the instruction data. |
960 | /// \param [in] IntegerMapping - The vector that holds the mapped integers. |
961 | /// \param [out] CandsForRepSubstring - The vector to store the generated |
962 | /// IRSimilarityCandidates. |
963 | static void createCandidatesFromSuffixTree( |
964 | const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, |
965 | std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, |
966 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { |
967 | |
968 | unsigned StringLen = RS.Length; |
969 | if (StringLen < 2) |
970 | return; |
971 | |
972 | // Create an IRSimilarityCandidate for instance of this subsequence \p RS. |
973 | for (const unsigned &StartIdx : RS.StartIndices) { |
974 | unsigned EndIdx = StartIdx + StringLen - 1; |
975 | |
976 | // Check that this subsequence does not contain an illegal instruction. |
977 | bool ContainsIllegal = false; |
978 | for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { |
979 | unsigned Key = IntegerMapping[CurrIdx]; |
980 | if (Key > Mapper.IllegalInstrNumber) { |
981 | ContainsIllegal = true; |
982 | break; |
983 | } |
984 | } |
985 | |
986 | // If we have an illegal instruction, we should not create an |
987 | // IRSimilarityCandidate for this region. |
988 | if (ContainsIllegal) |
989 | continue; |
990 | |
991 | // We are getting iterators to the instructions in this region of code |
992 | // by advancing the start and end indices from the start of the |
993 | // InstrList. |
994 | std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); |
995 | std::advance(i&: StartIt, n: StartIdx); |
996 | std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); |
997 | std::advance(i&: EndIt, n: EndIdx); |
998 | |
999 | CandsForRepSubstring.emplace_back(args: StartIdx, args&: StringLen, args&: *StartIt, args&: *EndIt); |
1000 | } |
1001 | } |
1002 | |
1003 | void IRSimilarityCandidate::createCanonicalRelationFrom( |
1004 | IRSimilarityCandidate &SourceCand, |
1005 | DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, |
1006 | DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { |
1007 | assert(SourceCand.CanonNumToNumber.size() != 0 && |
1008 | "Base canonical relationship is empty!" ); |
1009 | assert(SourceCand.NumberToCanonNum.size() != 0 && |
1010 | "Base canonical relationship is empty!" ); |
1011 | |
1012 | assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty" ); |
1013 | assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty" ); |
1014 | |
1015 | DenseSet<unsigned> UsedGVNs; |
1016 | // Iterate over the mappings provided from this candidate to SourceCand. We |
1017 | // are then able to map the GVN in this candidate to the same canonical number |
1018 | // given to the corresponding GVN in SourceCand. |
1019 | for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { |
1020 | unsigned SourceGVN = GVNMapping.first; |
1021 | |
1022 | assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!" ); |
1023 | |
1024 | unsigned ResultGVN; |
1025 | // We need special handling if we have more than one potential value. This |
1026 | // means that there are at least two GVNs that could correspond to this GVN. |
1027 | // This could lead to potential swapping later on, so we make a decision |
1028 | // here to ensure a one-to-one mapping. |
1029 | if (GVNMapping.second.size() > 1) { |
1030 | bool Found = false; |
1031 | for (unsigned Val : GVNMapping.second) { |
1032 | // We make sure the target value number hasn't already been reserved. |
1033 | if (UsedGVNs.contains(V: Val)) |
1034 | continue; |
1035 | |
1036 | // We make sure that the opposite mapping is still consistent. |
1037 | DenseMap<unsigned, DenseSet<unsigned>>::iterator It = |
1038 | FromSourceMapping.find(Val); |
1039 | |
1040 | if (!It->second.contains(V: SourceGVN)) |
1041 | continue; |
1042 | |
1043 | // We pick the first item that satisfies these conditions. |
1044 | Found = true; |
1045 | ResultGVN = Val; |
1046 | break; |
1047 | } |
1048 | |
1049 | assert(Found && "Could not find matching value for source GVN" ); |
1050 | (void)Found; |
1051 | |
1052 | } else |
1053 | ResultGVN = *GVNMapping.second.begin(); |
1054 | |
1055 | // Whatever GVN is found, we mark it as used. |
1056 | UsedGVNs.insert(V: ResultGVN); |
1057 | |
1058 | unsigned CanonNum = *SourceCand.getCanonicalNum(N: ResultGVN); |
1059 | CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: SourceGVN)); |
1060 | NumberToCanonNum.insert(KV: std::make_pair(x&: SourceGVN, y&: CanonNum)); |
1061 | } |
1062 | |
1063 | DenseSet<BasicBlock *> BBSet; |
1064 | getBasicBlocks(BBSet); |
1065 | // Find canonical numbers for the BasicBlocks in the current candidate. |
1066 | // This is done by finding the corresponding value for the first instruction |
1067 | // in the block in the current candidate, finding the matching value in the |
1068 | // source candidate. Then by finding the parent of this value, use the |
1069 | // canonical number of the block in the source candidate for the canonical |
1070 | // number in the current candidate. |
1071 | for (BasicBlock *BB : BBSet) { |
1072 | unsigned BBGVNForCurrCand = ValueToNumber.find(Val: BB)->second; |
1073 | |
1074 | // We can skip the BasicBlock if the canonical numbering has already been |
1075 | // found in a separate instruction. |
1076 | if (NumberToCanonNum.contains(Val: BBGVNForCurrCand)) |
1077 | continue; |
1078 | |
1079 | // If the basic block is the starting block, then the shared instruction may |
1080 | // not be the first instruction in the block, it will be the first |
1081 | // instruction in the similarity region. |
1082 | Value *FirstOutlineInst = BB == getStartBB() |
1083 | ? frontInstruction() |
1084 | : &*BB->instructionsWithoutDebug().begin(); |
1085 | |
1086 | unsigned FirstInstGVN = *getGVN(V: FirstOutlineInst); |
1087 | unsigned FirstInstCanonNum = *getCanonicalNum(N: FirstInstGVN); |
1088 | unsigned SourceGVN = *SourceCand.fromCanonicalNum(N: FirstInstCanonNum); |
1089 | Value *SourceV = *SourceCand.fromGVN(Num: SourceGVN); |
1090 | BasicBlock *SourceBB = cast<Instruction>(Val: SourceV)->getParent(); |
1091 | unsigned SourceBBGVN = *SourceCand.getGVN(V: SourceBB); |
1092 | unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(N: SourceBBGVN); |
1093 | CanonNumToNumber.insert(KV: std::make_pair(x&: SourceCanonBBGVN, y&: BBGVNForCurrCand)); |
1094 | NumberToCanonNum.insert(KV: std::make_pair(x&: BBGVNForCurrCand, y&: SourceCanonBBGVN)); |
1095 | } |
1096 | } |
1097 | |
1098 | void IRSimilarityCandidate::createCanonicalRelationFrom( |
1099 | IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, |
1100 | IRSimilarityCandidate &TargetCandLarge) { |
1101 | assert(!SourceCand.CanonNumToNumber.empty() && |
1102 | "Canonical Relationship is non-empty" ); |
1103 | assert(!SourceCand.NumberToCanonNum.empty() && |
1104 | "Canonical Relationship is non-empty" ); |
1105 | |
1106 | assert(!SourceCandLarge.CanonNumToNumber.empty() && |
1107 | "Canonical Relationship is non-empty" ); |
1108 | assert(!SourceCandLarge.NumberToCanonNum.empty() && |
1109 | "Canonical Relationship is non-empty" ); |
1110 | |
1111 | assert(!TargetCandLarge.CanonNumToNumber.empty() && |
1112 | "Canonical Relationship is non-empty" ); |
1113 | assert(!TargetCandLarge.NumberToCanonNum.empty() && |
1114 | "Canonical Relationship is non-empty" ); |
1115 | |
1116 | assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty" ); |
1117 | assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty" ); |
1118 | |
1119 | // We're going to use the larger candidates as a "bridge" to create the |
1120 | // canonical number for the target candidate since we have idetified two |
1121 | // candidates as subsequences of larger sequences, and therefore must be |
1122 | // structurally similar. |
1123 | for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) { |
1124 | Value *CurrVal = ValueNumPair.first; |
1125 | unsigned TargetCandGVN = ValueNumPair.second; |
1126 | |
1127 | // Find the numbering in the large candidate that surrounds the |
1128 | // current candidate. |
1129 | std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(V: CurrVal); |
1130 | assert(OLargeTargetGVN.has_value() && "GVN not found for Value" ); |
1131 | |
1132 | // Get the canonical numbering in the large target candidate. |
1133 | std::optional<unsigned> OTargetCandCanon = |
1134 | TargetCandLarge.getCanonicalNum(N: OLargeTargetGVN.value()); |
1135 | assert(OTargetCandCanon.has_value() && |
1136 | "Canononical Number not found for GVN" ); |
1137 | |
1138 | // Get the GVN in the large source candidate from the canonical numbering. |
1139 | std::optional<unsigned> OLargeSourceGVN = |
1140 | SourceCandLarge.fromCanonicalNum(N: OTargetCandCanon.value()); |
1141 | assert(OLargeSourceGVN.has_value() && |
1142 | "GVN Number not found for Canonical Number" ); |
1143 | |
1144 | // Get the Value from the GVN in the large source candidate. |
1145 | std::optional<Value *> OLargeSourceV = |
1146 | SourceCandLarge.fromGVN(Num: OLargeSourceGVN.value()); |
1147 | assert(OLargeSourceV.has_value() && "Value not found for GVN" ); |
1148 | |
1149 | // Get the GVN number for the Value in the source candidate. |
1150 | std::optional<unsigned> OSourceGVN = |
1151 | SourceCand.getGVN(V: OLargeSourceV.value()); |
1152 | assert(OSourceGVN.has_value() && "GVN Number not found for Value" ); |
1153 | |
1154 | // Get the canonical numbering from the GVN/ |
1155 | std::optional<unsigned> OSourceCanon = |
1156 | SourceCand.getCanonicalNum(N: OSourceGVN.value()); |
1157 | assert(OSourceCanon.has_value() && "Canon Number not found for GVN" ); |
1158 | |
1159 | // Insert the canonical numbering and GVN pair into their respective |
1160 | // mappings. |
1161 | CanonNumToNumber.insert( |
1162 | KV: std::make_pair(x&: OSourceCanon.value(), y&: TargetCandGVN)); |
1163 | NumberToCanonNum.insert( |
1164 | KV: std::make_pair(x&: TargetCandGVN, y&: OSourceCanon.value())); |
1165 | } |
1166 | } |
1167 | |
1168 | void IRSimilarityCandidate::createCanonicalMappingFor( |
1169 | IRSimilarityCandidate &CurrCand) { |
1170 | assert(CurrCand.CanonNumToNumber.size() == 0 && |
1171 | "Canonical Relationship is non-empty" ); |
1172 | assert(CurrCand.NumberToCanonNum.size() == 0 && |
1173 | "Canonical Relationship is non-empty" ); |
1174 | |
1175 | unsigned CanonNum = 0; |
1176 | // Iterate over the value numbers found, the order does not matter in this |
1177 | // case. |
1178 | for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { |
1179 | CurrCand.NumberToCanonNum.insert(KV: std::make_pair(x&: NumToVal.first, y&: CanonNum)); |
1180 | CurrCand.CanonNumToNumber.insert(KV: std::make_pair(x&: CanonNum, y&: NumToVal.first)); |
1181 | CanonNum++; |
1182 | } |
1183 | } |
1184 | |
1185 | /// Look for larger IRSimilarityCandidates From the previously matched |
1186 | /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is |
1187 | /// an overlap, return a pair of structurally similar, larger |
1188 | /// IRSimilarityCandidates. |
1189 | /// |
1190 | /// \param [in] CandA - The first candidate we are trying to determine the |
1191 | /// structure of. |
1192 | /// \param [in] CandB - The second candidate we are trying to determine the |
1193 | /// structure of. |
1194 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in |
1195 | /// a circuit to the IRSimilarityCandidates that include this instruction. |
1196 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a |
1197 | /// number representing the structural group assigned to it. |
1198 | static std::optional< |
1199 | std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
1200 | CheckLargerCands( |
1201 | IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, |
1202 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, |
1203 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) { |
1204 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA; |
1205 | DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB; |
1206 | DenseSet<unsigned> IncludedGroupsA; |
1207 | DenseSet<unsigned> IncludedGroupsB; |
1208 | |
1209 | // Find the overall similarity group numbers that fully contain the candidate, |
1210 | // and record the larger candidate for each group. |
1211 | auto IdxToCandidateIt = IndexToIncludedCand.find(Val: CandA.getStartIdx()); |
1212 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
1213 | Result; |
1214 | |
1215 | unsigned CandAStart = CandA.getStartIdx(); |
1216 | unsigned CandAEnd = CandA.getEndIdx(); |
1217 | unsigned CandBStart = CandB.getStartIdx(); |
1218 | unsigned CandBEnd = CandB.getEndIdx(); |
1219 | if (IdxToCandidateIt == IndexToIncludedCand.end()) |
1220 | return Result; |
1221 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { |
1222 | if (MatchedCand->getStartIdx() > CandAStart || |
1223 | (MatchedCand->getEndIdx() < CandAEnd)) |
1224 | continue; |
1225 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; |
1226 | IncludedGroupAndCandA.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); |
1227 | IncludedGroupsA.insert(V: GroupNum); |
1228 | } |
1229 | |
1230 | // Find the overall similarity group numbers that fully contain the next |
1231 | // candidate, and record the larger candidate for each group. |
1232 | IdxToCandidateIt = IndexToIncludedCand.find(Val: CandBStart); |
1233 | if (IdxToCandidateIt == IndexToIncludedCand.end()) |
1234 | return Result; |
1235 | for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { |
1236 | if (MatchedCand->getStartIdx() > CandBStart || |
1237 | MatchedCand->getEndIdx() < CandBEnd) |
1238 | continue; |
1239 | unsigned GroupNum = CandToGroup.find(Val: MatchedCand)->second; |
1240 | IncludedGroupAndCandB.insert(KV: std::make_pair(x&: GroupNum, y&: MatchedCand)); |
1241 | IncludedGroupsB.insert(V: GroupNum); |
1242 | } |
1243 | |
1244 | // Find the intersection between the two groups, these are the groups where |
1245 | // the larger candidates exist. |
1246 | set_intersect(S1&: IncludedGroupsA, S2: IncludedGroupsB); |
1247 | |
1248 | // If there is no intersection between the sets, then we cannot determine |
1249 | // whether or not there is a match. |
1250 | if (IncludedGroupsA.empty()) |
1251 | return Result; |
1252 | |
1253 | // Create a pair that contains the larger candidates. |
1254 | auto ItA = IncludedGroupAndCandA.find(Val: *IncludedGroupsA.begin()); |
1255 | auto ItB = IncludedGroupAndCandB.find(Val: *IncludedGroupsA.begin()); |
1256 | Result = std::make_pair(x&: ItA->second, y&: ItB->second); |
1257 | return Result; |
1258 | } |
1259 | |
1260 | /// From the list of IRSimilarityCandidates, perform a comparison between each |
1261 | /// IRSimilarityCandidate to determine if there are overlapping |
1262 | /// IRInstructionData, or if they do not have the same structure. |
1263 | /// |
1264 | /// \param [in] CandsForRepSubstring - The vector containing the |
1265 | /// IRSimilarityCandidates. |
1266 | /// \param [out] StructuralGroups - the mapping of unsigned integers to vector |
1267 | /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the |
1268 | /// vector are structurally similar to one another. |
1269 | /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in |
1270 | /// a circuit to the IRSimilarityCandidates that include this instruction. |
1271 | /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a |
1272 | /// number representing the structural group assigned to it. |
1273 | static void findCandidateStructures( |
1274 | std::vector<IRSimilarityCandidate> &CandsForRepSubstring, |
1275 | DenseMap<unsigned, SimilarityGroup> &StructuralGroups, |
1276 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand, |
1277 | DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup |
1278 | ) { |
1279 | std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, |
1280 | InnerCandEndIt; |
1281 | |
1282 | // IRSimilarityCandidates each have a structure for operand use. It is |
1283 | // possible that two instances of the same subsequences have different |
1284 | // structure. Each type of structure found is assigned a number. This |
1285 | // DenseMap maps an IRSimilarityCandidate to which type of similarity |
1286 | // discovered it fits within. |
1287 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; |
1288 | |
1289 | // Find the compatibility from each candidate to the others to determine |
1290 | // which candidates overlap and which have the same structure by mapping |
1291 | // each structure to a different group. |
1292 | bool SameStructure; |
1293 | bool Inserted; |
1294 | unsigned CurrentGroupNum = 0; |
1295 | unsigned OuterGroupNum; |
1296 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; |
1297 | DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; |
1298 | DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; |
1299 | |
1300 | // Iterate over the candidates to determine its structural and overlapping |
1301 | // compatibility with other instructions |
1302 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; |
1303 | DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; |
1304 | for (CandIt = CandsForRepSubstring.begin(), |
1305 | CandEndIt = CandsForRepSubstring.end(); |
1306 | CandIt != CandEndIt; CandIt++) { |
1307 | |
1308 | // Determine if it has an assigned structural group already. |
1309 | // If not, we assign it one, and add it to our mapping. |
1310 | std::tie(args&: CandToGroupIt, args&: Inserted) = |
1311 | CandToGroup.try_emplace(Key: &*CandIt, Args&: CurrentGroupNum); |
1312 | if (Inserted) |
1313 | ++CurrentGroupNum; |
1314 | |
1315 | // Get the structural group number from the iterator. |
1316 | OuterGroupNum = CandToGroupIt->second; |
1317 | |
1318 | // Check if we already have a list of IRSimilarityCandidates for the current |
1319 | // structural group. Create one if one does not exist. |
1320 | CurrentGroupPair = StructuralGroups.find(Val: OuterGroupNum); |
1321 | if (CurrentGroupPair == StructuralGroups.end()) { |
1322 | IRSimilarityCandidate::createCanonicalMappingFor(CurrCand&: *CandIt); |
1323 | std::tie(args&: CurrentGroupPair, args&: Inserted) = StructuralGroups.insert( |
1324 | KV: std::make_pair(x&: OuterGroupNum, y: SimilarityGroup({*CandIt}))); |
1325 | } |
1326 | |
1327 | // Iterate over the IRSimilarityCandidates following the current |
1328 | // IRSimilarityCandidate in the list to determine whether the two |
1329 | // IRSimilarityCandidates are compatible. This is so we do not repeat pairs |
1330 | // of IRSimilarityCandidates. |
1331 | for (InnerCandIt = std::next(x: CandIt), |
1332 | InnerCandEndIt = CandsForRepSubstring.end(); |
1333 | InnerCandIt != InnerCandEndIt; InnerCandIt++) { |
1334 | |
1335 | // We check if the inner item has a group already, if it does, we skip it. |
1336 | CandToGroupItInner = CandToGroup.find(Val: &*InnerCandIt); |
1337 | if (CandToGroupItInner != CandToGroup.end()) |
1338 | continue; |
1339 | |
1340 | // Check if we have found structural similarity between two candidates |
1341 | // that fully contains the first and second candidates. |
1342 | std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>> |
1343 | LargerPair = CheckLargerCands( |
1344 | CandA&: *CandIt, CandB&: *InnerCandIt, IndexToIncludedCand, CandToGroup&: CandToOverallGroup); |
1345 | |
1346 | // If a pair was found, it means that we can assume that these smaller |
1347 | // substrings are also structurally similar. Use the larger candidates to |
1348 | // determine the canonical mapping between the two sections. |
1349 | if (LargerPair.has_value()) { |
1350 | SameStructure = true; |
1351 | InnerCandIt->createCanonicalRelationFrom( |
1352 | SourceCand&: *CandIt, SourceCandLarge&: *LargerPair.value().first, TargetCandLarge&: *LargerPair.value().second); |
1353 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); |
1354 | CurrentGroupPair->second.push_back(x: *InnerCandIt); |
1355 | continue; |
1356 | } |
1357 | |
1358 | // Otherwise we determine if they have the same structure and add it to |
1359 | // vector if they match. |
1360 | ValueNumberMappingA.clear(); |
1361 | ValueNumberMappingB.clear(); |
1362 | SameStructure = IRSimilarityCandidate::compareStructure( |
1363 | A: *CandIt, B: *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); |
1364 | if (!SameStructure) |
1365 | continue; |
1366 | |
1367 | InnerCandIt->createCanonicalRelationFrom(SourceCand&: *CandIt, ToSourceMapping&: ValueNumberMappingA, |
1368 | FromSourceMapping&: ValueNumberMappingB); |
1369 | CandToGroup.insert(KV: std::make_pair(x: &*InnerCandIt, y&: OuterGroupNum)); |
1370 | CurrentGroupPair->second.push_back(x: *InnerCandIt); |
1371 | } |
1372 | } |
1373 | } |
1374 | |
1375 | void IRSimilarityIdentifier::findCandidates( |
1376 | std::vector<IRInstructionData *> &InstrList, |
1377 | std::vector<unsigned> &IntegerMapping) { |
1378 | SuffixTree ST(IntegerMapping); |
1379 | |
1380 | std::vector<IRSimilarityCandidate> CandsForRepSubstring; |
1381 | std::vector<SimilarityGroup> NewCandidateGroups; |
1382 | |
1383 | DenseMap<unsigned, SimilarityGroup> StructuralGroups; |
1384 | DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand; |
1385 | DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; |
1386 | |
1387 | // Iterate over the subsequences found by the Suffix Tree to create |
1388 | // IRSimilarityCandidates for each repeated subsequence and determine which |
1389 | // instances are structurally similar to one another. |
1390 | |
1391 | // Sort the suffix tree from longest substring to shortest. |
1392 | std::vector<SuffixTree::RepeatedSubstring> RSes; |
1393 | for (SuffixTree::RepeatedSubstring &RS : ST) |
1394 | RSes.push_back(x: RS); |
1395 | |
1396 | llvm::stable_sort(Range&: RSes, C: [](const SuffixTree::RepeatedSubstring &LHS, |
1397 | const SuffixTree::RepeatedSubstring &RHS) { |
1398 | return LHS.Length > RHS.Length; |
1399 | }); |
1400 | for (SuffixTree::RepeatedSubstring &RS : RSes) { |
1401 | createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, |
1402 | CandsForRepSubstring); |
1403 | |
1404 | if (CandsForRepSubstring.size() < 2) |
1405 | continue; |
1406 | |
1407 | findCandidateStructures(CandsForRepSubstring, StructuralGroups, |
1408 | IndexToIncludedCand, CandToOverallGroup&: CandToGroup); |
1409 | for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) { |
1410 | // We only add the group if it contains more than one |
1411 | // IRSimilarityCandidate. If there is only one, that means there is no |
1412 | // other repeated subsequence with the same structure. |
1413 | if (Group.second.size() > 1) { |
1414 | SimilarityCandidates->push_back(x: Group.second); |
1415 | // Iterate over each candidate in the group, and add an entry for each |
1416 | // instruction included with a mapping to a set of |
1417 | // IRSimilarityCandidates that include that instruction. |
1418 | for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { |
1419 | for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); |
1420 | Idx <= Edx; ++Idx) |
1421 | IndexToIncludedCand[Idx].insert(V: &IRCand); |
1422 | // Add mapping of candidate to the overall similarity group number. |
1423 | CandToGroup.insert( |
1424 | KV: std::make_pair(x: &IRCand, y: SimilarityCandidates->size() - 1)); |
1425 | } |
1426 | } |
1427 | } |
1428 | |
1429 | CandsForRepSubstring.clear(); |
1430 | StructuralGroups.clear(); |
1431 | NewCandidateGroups.clear(); |
1432 | } |
1433 | } |
1434 | |
1435 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( |
1436 | ArrayRef<std::unique_ptr<Module>> Modules) { |
1437 | resetSimilarityCandidates(); |
1438 | |
1439 | std::vector<IRInstructionData *> InstrList; |
1440 | std::vector<unsigned> IntegerMapping; |
1441 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; |
1442 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; |
1443 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; |
1444 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; |
1445 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; |
1446 | |
1447 | populateMapper(Modules, InstrList, IntegerMapping); |
1448 | findCandidates(InstrList, IntegerMapping); |
1449 | |
1450 | return *SimilarityCandidates; |
1451 | } |
1452 | |
1453 | SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { |
1454 | resetSimilarityCandidates(); |
1455 | Mapper.InstClassifier.EnableBranches = this->EnableBranches; |
1456 | Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; |
1457 | Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; |
1458 | Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; |
1459 | Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; |
1460 | |
1461 | std::vector<IRInstructionData *> InstrList; |
1462 | std::vector<unsigned> IntegerMapping; |
1463 | |
1464 | populateMapper(M, InstrList, IntegerMapping); |
1465 | findCandidates(InstrList, IntegerMapping); |
1466 | |
1467 | return *SimilarityCandidates; |
1468 | } |
1469 | |
1470 | INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier" , |
1471 | "ir-similarity-identifier" , false, true) |
1472 | |
1473 | IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() |
1474 | : ModulePass(ID) {} |
1475 | |
1476 | bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { |
1477 | IRSI.reset(p: new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, |
1478 | MatchCallsByName, !DisableIntrinsics, |
1479 | false)); |
1480 | return false; |
1481 | } |
1482 | |
1483 | bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { |
1484 | IRSI.reset(); |
1485 | return false; |
1486 | } |
1487 | |
1488 | bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { |
1489 | IRSI->findSimilarity(M); |
1490 | return false; |
1491 | } |
1492 | |
1493 | AnalysisKey IRSimilarityAnalysis::Key; |
1494 | IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, |
1495 | ModuleAnalysisManager &) { |
1496 | auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, |
1497 | MatchCallsByName, !DisableIntrinsics, |
1498 | false); |
1499 | IRSI.findSimilarity(M); |
1500 | return IRSI; |
1501 | } |
1502 | |
1503 | PreservedAnalyses |
1504 | IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { |
1505 | IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(IR&: M); |
1506 | std::optional<SimilarityGroupList> &SimilarityCandidatesOpt = |
1507 | IRSI.getSimilarity(); |
1508 | |
1509 | for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { |
1510 | OS << CandVec.size() << " candidates of length " |
1511 | << CandVec.begin()->getLength() << ". Found in: \n" ; |
1512 | for (IRSimilarityCandidate &Cand : CandVec) { |
1513 | OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() |
1514 | << ", Basic Block: " ; |
1515 | if (Cand.front()->Inst->getParent()->getName().str() == "" ) |
1516 | OS << "(unnamed)" ; |
1517 | else |
1518 | OS << Cand.front()->Inst->getParent()->getName().str(); |
1519 | OS << "\n Start Instruction: " ; |
1520 | Cand.frontInstruction()->print(O&: OS); |
1521 | OS << "\n End Instruction: " ; |
1522 | Cand.backInstruction()->print(O&: OS); |
1523 | OS << "\n" ; |
1524 | } |
1525 | } |
1526 | |
1527 | return PreservedAnalyses::all(); |
1528 | } |
1529 | |
1530 | char IRSimilarityIdentifierWrapperPass::ID = 0; |
1531 | |