1 | //===-- DifferenceEngine.cpp - Structural function/module comparison ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This header defines the implementation of the LLVM difference |
10 | // engine, which structurally compares global values within a module. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "DifferenceEngine.h" |
15 | #include "llvm/ADT/DenseMap.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/SmallString.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/StringSet.h" |
20 | #include "llvm/IR/BasicBlock.h" |
21 | #include "llvm/IR/CFG.h" |
22 | #include "llvm/IR/Constants.h" |
23 | #include "llvm/IR/Function.h" |
24 | #include "llvm/IR/Instructions.h" |
25 | #include "llvm/IR/Module.h" |
26 | #include "llvm/Support/ErrorHandling.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include "llvm/Support/type_traits.h" |
29 | #include <utility> |
30 | |
31 | using namespace llvm; |
32 | |
33 | namespace { |
34 | |
35 | /// A priority queue, implemented as a heap. |
36 | template <class T, class Sorter, unsigned InlineCapacity> |
37 | class PriorityQueue { |
38 | Sorter Precedes; |
39 | llvm::SmallVector<T, InlineCapacity> Storage; |
40 | |
41 | public: |
42 | PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} |
43 | |
44 | /// Checks whether the heap is empty. |
45 | bool empty() const { return Storage.empty(); } |
46 | |
47 | /// Insert a new value on the heap. |
48 | void insert(const T &V) { |
49 | unsigned Index = Storage.size(); |
50 | Storage.push_back(V); |
51 | if (Index == 0) return; |
52 | |
53 | T *data = Storage.data(); |
54 | while (true) { |
55 | unsigned Target = (Index + 1) / 2 - 1; |
56 | if (!Precedes(data[Index], data[Target])) return; |
57 | std::swap(data[Index], data[Target]); |
58 | if (Target == 0) return; |
59 | Index = Target; |
60 | } |
61 | } |
62 | |
63 | /// Remove the minimum value in the heap. Only valid on a non-empty heap. |
64 | T remove_min() { |
65 | assert(!empty()); |
66 | T tmp = Storage[0]; |
67 | |
68 | unsigned NewSize = Storage.size() - 1; |
69 | if (NewSize) { |
70 | // Move the slot at the end to the beginning. |
71 | if (std::is_trivially_copyable<T>::value) |
72 | Storage[0] = Storage[NewSize]; |
73 | else |
74 | std::swap(Storage[0], Storage[NewSize]); |
75 | |
76 | // Bubble the root up as necessary. |
77 | unsigned Index = 0; |
78 | while (true) { |
79 | // With a 1-based index, the children would be Index*2 and Index*2+1. |
80 | unsigned R = (Index + 1) * 2; |
81 | unsigned L = R - 1; |
82 | |
83 | // If R is out of bounds, we're done after this in any case. |
84 | if (R >= NewSize) { |
85 | // If L is also out of bounds, we're done immediately. |
86 | if (L >= NewSize) break; |
87 | |
88 | // Otherwise, test whether we should swap L and Index. |
89 | if (Precedes(Storage[L], Storage[Index])) |
90 | std::swap(Storage[L], Storage[Index]); |
91 | break; |
92 | } |
93 | |
94 | // Otherwise, we need to compare with the smaller of L and R. |
95 | // Prefer R because it's closer to the end of the array. |
96 | unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); |
97 | |
98 | // If Index is >= the min of L and R, then heap ordering is restored. |
99 | if (!Precedes(Storage[IndexToTest], Storage[Index])) |
100 | break; |
101 | |
102 | // Otherwise, keep bubbling up. |
103 | std::swap(Storage[IndexToTest], Storage[Index]); |
104 | Index = IndexToTest; |
105 | } |
106 | } |
107 | Storage.pop_back(); |
108 | |
109 | return tmp; |
110 | } |
111 | }; |
112 | |
113 | /// A function-scope difference engine. |
114 | class FunctionDifferenceEngine { |
115 | DifferenceEngine &Engine; |
116 | |
117 | // Some initializers may reference the variable we're currently checking. This |
118 | // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent |
119 | // recursing. |
120 | const Value *SavedLHS; |
121 | const Value *SavedRHS; |
122 | |
123 | // The current mapping from old local values to new local values. |
124 | DenseMap<const Value *, const Value *> Values; |
125 | |
126 | // The current mapping from old blocks to new blocks. |
127 | DenseMap<const BasicBlock *, const BasicBlock *> Blocks; |
128 | |
129 | // The tentative mapping from old local values while comparing a pair of |
130 | // basic blocks. Once the pair has been processed, the tentative mapping is |
131 | // committed to the Values map. |
132 | DenseSet<std::pair<const Value *, const Value *>> TentativeValues; |
133 | |
134 | // Equivalence Assumptions |
135 | // |
136 | // For basic blocks in loops, some values in phi nodes may depend on |
137 | // values from not yet processed basic blocks in the loop. When encountering |
138 | // such values, we optimistically asssume their equivalence and store this |
139 | // assumption in a BlockDiffCandidate for the pair of compared BBs. |
140 | // |
141 | // Once we have diffed all BBs, for every BlockDiffCandidate, we check all |
142 | // stored assumptions using the Values map that stores proven equivalences |
143 | // between the old and new values, and report a diff if an assumption cannot |
144 | // be proven to be true. |
145 | // |
146 | // Note that after having made an assumption, all further determined |
147 | // equivalences implicitly depend on that assumption. These will not be |
148 | // reverted or reported if the assumption proves to be false, because these |
149 | // are considered indirect diffs caused by earlier direct diffs. |
150 | // |
151 | // We aim to avoid false negatives in llvm-diff, that is, ensure that |
152 | // whenever no diff is reported, the functions are indeed equal. If |
153 | // assumptions were made, this is not entirely clear, because in principle we |
154 | // could end up with a circular proof where the proof of equivalence of two |
155 | // nodes is depending on the assumption of their equivalence. |
156 | // |
157 | // To see that assumptions do not add false negatives, note that if we do not |
158 | // report a diff, this means that there is an equivalence mapping between old |
159 | // and new values that is consistent with all assumptions made. The circular |
160 | // dependency that exists on an IR value level does not exist at run time, |
161 | // because the values selected by the phi nodes must always already have been |
162 | // computed. Hence, we can prove equivalence of the old and new functions by |
163 | // considering step-wise parallel execution, and incrementally proving |
164 | // equivalence of every new computed value. Another way to think about it is |
165 | // to imagine cloning the loop BBs for every iteration, turning the loops |
166 | // into (possibly infinite) DAGs, and proving equivalence by induction on the |
167 | // iteration, using the computed value mapping. |
168 | |
169 | // The class BlockDiffCandidate stores pairs which either have already been |
170 | // proven to differ, or pairs whose equivalence depends on assumptions to be |
171 | // verified later. |
172 | struct BlockDiffCandidate { |
173 | const BasicBlock *LBB; |
174 | const BasicBlock *RBB; |
175 | // Maps old values to assumed-to-be-equivalent new values |
176 | SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions; |
177 | // If set, we already know the blocks differ. |
178 | bool KnownToDiffer; |
179 | }; |
180 | |
181 | // List of block diff candidates in the order found by processing. |
182 | // We generate reports in this order. |
183 | // For every LBB, there may only be one corresponding RBB. |
184 | SmallVector<BlockDiffCandidate> BlockDiffCandidates; |
185 | // Maps LBB to the index of its BlockDiffCandidate, if existing. |
186 | DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices; |
187 | |
188 | // Note: Every LBB must always be queried together with the same RBB. |
189 | // The returned reference is not permanently valid and should not be stored. |
190 | BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB, |
191 | const BasicBlock *RBB) { |
192 | auto It = BlockDiffCandidateIndices.find(Val: LBB); |
193 | // Check if LBB already has a diff candidate |
194 | if (It == BlockDiffCandidateIndices.end()) { |
195 | // Add new one |
196 | BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size(); |
197 | BlockDiffCandidates.push_back( |
198 | Elt: {.LBB: LBB, .RBB: RBB, .EquivalenceAssumptions: SmallDenseMap<const Value *, const Value *>(), .KnownToDiffer: false}); |
199 | return BlockDiffCandidates.back(); |
200 | } |
201 | // Use existing one |
202 | BlockDiffCandidate &Result = BlockDiffCandidates[It->second]; |
203 | assert(Result.RBB == RBB && "Inconsistent basic block pairing!" ); |
204 | return Result; |
205 | } |
206 | |
207 | // Optionally passed to equivalence checker functions, so these can add |
208 | // assumptions in BlockDiffCandidates. Its presence controls whether |
209 | // assumptions are generated. |
210 | struct AssumptionContext { |
211 | // The two basic blocks that need the two compared values to be equivalent. |
212 | const BasicBlock *LBB; |
213 | const BasicBlock *RBB; |
214 | }; |
215 | |
216 | unsigned getUnprocPredCount(const BasicBlock *Block) const { |
217 | return llvm::count_if(Range: predecessors(BB: Block), P: [&](const BasicBlock *Pred) { |
218 | return !Blocks.contains(Val: Pred); |
219 | }); |
220 | } |
221 | |
222 | typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair; |
223 | |
224 | /// A type which sorts a priority queue by the number of unprocessed |
225 | /// predecessor blocks it has remaining. |
226 | /// |
227 | /// This is actually really expensive to calculate. |
228 | struct QueueSorter { |
229 | const FunctionDifferenceEngine &fde; |
230 | explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} |
231 | |
232 | bool operator()(BlockPair &Old, BlockPair &New) { |
233 | return fde.getUnprocPredCount(Block: Old.first) |
234 | < fde.getUnprocPredCount(Block: New.first); |
235 | } |
236 | }; |
237 | |
238 | /// A queue of unified blocks to process. |
239 | PriorityQueue<BlockPair, QueueSorter, 20> Queue; |
240 | |
241 | /// Try to unify the given two blocks. Enqueues them for processing |
242 | /// if they haven't already been processed. |
243 | /// |
244 | /// Returns true if there was a problem unifying them. |
245 | bool tryUnify(const BasicBlock *L, const BasicBlock *R) { |
246 | const BasicBlock *&Ref = Blocks[L]; |
247 | |
248 | if (Ref) { |
249 | if (Ref == R) return false; |
250 | |
251 | Engine.logf(text: "successor %l cannot be equivalent to %r; " |
252 | "it's already equivalent to %r" ) |
253 | << L << R << Ref; |
254 | return true; |
255 | } |
256 | |
257 | Ref = R; |
258 | Queue.insert(V: BlockPair(L, R)); |
259 | return false; |
260 | } |
261 | |
262 | /// Unifies two instructions, given that they're known not to have |
263 | /// structural differences. |
264 | void unify(const Instruction *L, const Instruction *R) { |
265 | DifferenceEngine::Context C(Engine, L, R); |
266 | |
267 | bool Result = diff(L, R, Complain: true, TryUnify: true, AllowAssumptions: true); |
268 | assert(!Result && "structural differences second time around?" ); |
269 | (void) Result; |
270 | if (!L->use_empty()) |
271 | Values[L] = R; |
272 | } |
273 | |
274 | void processQueue() { |
275 | while (!Queue.empty()) { |
276 | BlockPair Pair = Queue.remove_min(); |
277 | diff(L: Pair.first, R: Pair.second); |
278 | } |
279 | } |
280 | |
281 | void checkAndReportDiffCandidates() { |
282 | for (BlockDiffCandidate &BDC : BlockDiffCandidates) { |
283 | |
284 | // Check assumptions |
285 | for (const auto &[L, R] : BDC.EquivalenceAssumptions) { |
286 | auto It = Values.find(Val: L); |
287 | if (It == Values.end() || It->second != R) { |
288 | BDC.KnownToDiffer = true; |
289 | break; |
290 | } |
291 | } |
292 | |
293 | // Run block diff if the BBs differ |
294 | if (BDC.KnownToDiffer) { |
295 | DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB); |
296 | runBlockDiff(LI: BDC.LBB->begin(), RI: BDC.RBB->begin()); |
297 | } |
298 | } |
299 | } |
300 | |
301 | void diff(const BasicBlock *L, const BasicBlock *R) { |
302 | DifferenceEngine::Context C(Engine, L, R); |
303 | |
304 | BasicBlock::const_iterator LI = L->begin(), LE = L->end(); |
305 | BasicBlock::const_iterator RI = R->begin(); |
306 | |
307 | do { |
308 | assert(LI != LE && RI != R->end()); |
309 | const Instruction *LeftI = &*LI, *RightI = &*RI; |
310 | |
311 | // If the instructions differ, start the more sophisticated diff |
312 | // algorithm at the start of the block. |
313 | if (diff(L: LeftI, R: RightI, Complain: false, TryUnify: false, AllowAssumptions: true)) { |
314 | TentativeValues.clear(); |
315 | // Register (L, R) as diffing pair. Note that we could directly emit a |
316 | // block diff here, but this way we ensure all diffs are emitted in one |
317 | // consistent order, independent of whether the diffs were detected |
318 | // immediately or via invalid assumptions. |
319 | getOrCreateBlockDiffCandidate(LBB: L, RBB: R).KnownToDiffer = true; |
320 | return; |
321 | } |
322 | |
323 | // Otherwise, tentatively unify them. |
324 | if (!LeftI->use_empty()) |
325 | TentativeValues.insert(V: std::make_pair(x&: LeftI, y&: RightI)); |
326 | |
327 | ++LI; |
328 | ++RI; |
329 | } while (LI != LE); // This is sufficient: we can't get equality of |
330 | // terminators if there are residual instructions. |
331 | |
332 | // Unify everything in the block, non-tentatively this time. |
333 | TentativeValues.clear(); |
334 | for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI) |
335 | unify(L: &*LI, R: &*RI); |
336 | } |
337 | |
338 | bool matchForBlockDiff(const Instruction *L, const Instruction *R); |
339 | void runBlockDiff(BasicBlock::const_iterator LI, |
340 | BasicBlock::const_iterator RI); |
341 | |
342 | bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) { |
343 | // FIXME: call attributes |
344 | AssumptionContext AC = {.LBB: L.getParent(), .RBB: R.getParent()}; |
345 | if (!equivalentAsOperands(L: L.getCalledOperand(), R: R.getCalledOperand(), |
346 | AC: &AC)) { |
347 | if (Complain) Engine.log(text: "called functions differ" ); |
348 | return true; |
349 | } |
350 | if (L.arg_size() != R.arg_size()) { |
351 | if (Complain) Engine.log(text: "argument counts differ" ); |
352 | return true; |
353 | } |
354 | for (unsigned I = 0, E = L.arg_size(); I != E; ++I) |
355 | if (!equivalentAsOperands(L: L.getArgOperand(i: I), R: R.getArgOperand(i: I), AC: &AC)) { |
356 | if (Complain) |
357 | Engine.logf(text: "arguments %l and %r differ" ) |
358 | << L.getArgOperand(i: I) << R.getArgOperand(i: I); |
359 | return true; |
360 | } |
361 | return false; |
362 | } |
363 | |
364 | // If AllowAssumptions is enabled, whenever we encounter a pair of values |
365 | // that we cannot prove to be equivalent, we assume equivalence and store that |
366 | // assumption to be checked later in BlockDiffCandidates. |
367 | bool diff(const Instruction *L, const Instruction *R, bool Complain, |
368 | bool TryUnify, bool AllowAssumptions) { |
369 | // FIXME: metadata (if Complain is set) |
370 | AssumptionContext ACValue = {.LBB: L->getParent(), .RBB: R->getParent()}; |
371 | // nullptr AssumptionContext disables assumption generation. |
372 | const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr; |
373 | |
374 | // Different opcodes always imply different operations. |
375 | if (L->getOpcode() != R->getOpcode()) { |
376 | if (Complain) Engine.log(text: "different instruction types" ); |
377 | return true; |
378 | } |
379 | |
380 | if (isa<CmpInst>(Val: L)) { |
381 | if (cast<CmpInst>(Val: L)->getPredicate() |
382 | != cast<CmpInst>(Val: R)->getPredicate()) { |
383 | if (Complain) Engine.log(text: "different predicates" ); |
384 | return true; |
385 | } |
386 | } else if (isa<CallInst>(Val: L)) { |
387 | return diffCallSites(L: cast<CallInst>(Val: *L), R: cast<CallInst>(Val: *R), Complain); |
388 | } else if (isa<PHINode>(Val: L)) { |
389 | const PHINode &LI = cast<PHINode>(Val: *L); |
390 | const PHINode &RI = cast<PHINode>(Val: *R); |
391 | |
392 | // This is really weird; type uniquing is broken? |
393 | if (LI.getType() != RI.getType()) { |
394 | if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) { |
395 | if (Complain) Engine.log(text: "different phi types" ); |
396 | return true; |
397 | } |
398 | } |
399 | |
400 | if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) { |
401 | if (Complain) |
402 | Engine.log(text: "PHI node # of incoming values differ" ); |
403 | return true; |
404 | } |
405 | |
406 | for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) { |
407 | if (TryUnify) |
408 | tryUnify(L: LI.getIncomingBlock(i: I), R: RI.getIncomingBlock(i: I)); |
409 | |
410 | if (!equivalentAsOperands(L: LI.getIncomingValue(i: I), |
411 | R: RI.getIncomingValue(i: I), AC)) { |
412 | if (Complain) |
413 | Engine.log(text: "PHI node incoming values differ" ); |
414 | return true; |
415 | } |
416 | } |
417 | |
418 | return false; |
419 | |
420 | // Terminators. |
421 | } else if (isa<InvokeInst>(Val: L)) { |
422 | const InvokeInst &LI = cast<InvokeInst>(Val: *L); |
423 | const InvokeInst &RI = cast<InvokeInst>(Val: *R); |
424 | if (diffCallSites(L: LI, R: RI, Complain)) |
425 | return true; |
426 | |
427 | if (TryUnify) { |
428 | tryUnify(L: LI.getNormalDest(), R: RI.getNormalDest()); |
429 | tryUnify(L: LI.getUnwindDest(), R: RI.getUnwindDest()); |
430 | } |
431 | return false; |
432 | |
433 | } else if (isa<CallBrInst>(Val: L)) { |
434 | const CallBrInst &LI = cast<CallBrInst>(Val: *L); |
435 | const CallBrInst &RI = cast<CallBrInst>(Val: *R); |
436 | if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) { |
437 | if (Complain) |
438 | Engine.log(text: "callbr # of indirect destinations differ" ); |
439 | return true; |
440 | } |
441 | |
442 | // Perform the "try unify" step so that we can equate the indirect |
443 | // destinations before checking the call site. |
444 | for (unsigned I = 0; I < LI.getNumIndirectDests(); I++) |
445 | tryUnify(L: LI.getIndirectDest(i: I), R: RI.getIndirectDest(i: I)); |
446 | |
447 | if (diffCallSites(L: LI, R: RI, Complain)) |
448 | return true; |
449 | |
450 | if (TryUnify) |
451 | tryUnify(L: LI.getDefaultDest(), R: RI.getDefaultDest()); |
452 | return false; |
453 | |
454 | } else if (isa<BranchInst>(Val: L)) { |
455 | const BranchInst *LI = cast<BranchInst>(Val: L); |
456 | const BranchInst *RI = cast<BranchInst>(Val: R); |
457 | if (LI->isConditional() != RI->isConditional()) { |
458 | if (Complain) Engine.log(text: "branch conditionality differs" ); |
459 | return true; |
460 | } |
461 | |
462 | if (LI->isConditional()) { |
463 | if (!equivalentAsOperands(L: LI->getCondition(), R: RI->getCondition(), AC)) { |
464 | if (Complain) Engine.log(text: "branch conditions differ" ); |
465 | return true; |
466 | } |
467 | if (TryUnify) tryUnify(L: LI->getSuccessor(i: 1), R: RI->getSuccessor(i: 1)); |
468 | } |
469 | if (TryUnify) tryUnify(L: LI->getSuccessor(i: 0), R: RI->getSuccessor(i: 0)); |
470 | return false; |
471 | |
472 | } else if (isa<IndirectBrInst>(Val: L)) { |
473 | const IndirectBrInst *LI = cast<IndirectBrInst>(Val: L); |
474 | const IndirectBrInst *RI = cast<IndirectBrInst>(Val: R); |
475 | if (LI->getNumDestinations() != RI->getNumDestinations()) { |
476 | if (Complain) Engine.log(text: "indirectbr # of destinations differ" ); |
477 | return true; |
478 | } |
479 | |
480 | if (!equivalentAsOperands(L: LI->getAddress(), R: RI->getAddress(), AC)) { |
481 | if (Complain) Engine.log(text: "indirectbr addresses differ" ); |
482 | return true; |
483 | } |
484 | |
485 | if (TryUnify) { |
486 | for (unsigned i = 0; i < LI->getNumDestinations(); i++) { |
487 | tryUnify(L: LI->getDestination(i), R: RI->getDestination(i)); |
488 | } |
489 | } |
490 | return false; |
491 | |
492 | } else if (isa<SwitchInst>(Val: L)) { |
493 | const SwitchInst *LI = cast<SwitchInst>(Val: L); |
494 | const SwitchInst *RI = cast<SwitchInst>(Val: R); |
495 | if (!equivalentAsOperands(L: LI->getCondition(), R: RI->getCondition(), AC)) { |
496 | if (Complain) Engine.log(text: "switch conditions differ" ); |
497 | return true; |
498 | } |
499 | if (TryUnify) tryUnify(L: LI->getDefaultDest(), R: RI->getDefaultDest()); |
500 | |
501 | bool Difference = false; |
502 | |
503 | DenseMap<const ConstantInt *, const BasicBlock *> LCases; |
504 | for (auto Case : LI->cases()) |
505 | LCases[Case.getCaseValue()] = Case.getCaseSuccessor(); |
506 | |
507 | for (auto Case : RI->cases()) { |
508 | const ConstantInt *CaseValue = Case.getCaseValue(); |
509 | const BasicBlock *LCase = LCases[CaseValue]; |
510 | if (LCase) { |
511 | if (TryUnify) |
512 | tryUnify(L: LCase, R: Case.getCaseSuccessor()); |
513 | LCases.erase(Val: CaseValue); |
514 | } else if (Complain || !Difference) { |
515 | if (Complain) |
516 | Engine.logf(text: "right switch has extra case %r" ) << CaseValue; |
517 | Difference = true; |
518 | } |
519 | } |
520 | if (!Difference) |
521 | for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator |
522 | I = LCases.begin(), |
523 | E = LCases.end(); |
524 | I != E; ++I) { |
525 | if (Complain) |
526 | Engine.logf(text: "left switch has extra case %l" ) << I->first; |
527 | Difference = true; |
528 | } |
529 | return Difference; |
530 | } else if (isa<UnreachableInst>(Val: L)) { |
531 | return false; |
532 | } |
533 | |
534 | if (L->getNumOperands() != R->getNumOperands()) { |
535 | if (Complain) Engine.log(text: "instructions have different operand counts" ); |
536 | return true; |
537 | } |
538 | |
539 | for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { |
540 | Value *LO = L->getOperand(i: I), *RO = R->getOperand(i: I); |
541 | if (!equivalentAsOperands(L: LO, R: RO, AC)) { |
542 | if (Complain) Engine.logf(text: "operands %l and %r differ" ) << LO << RO; |
543 | return true; |
544 | } |
545 | } |
546 | |
547 | return false; |
548 | } |
549 | |
550 | public: |
551 | bool equivalentAsOperands(const Constant *L, const Constant *R, |
552 | const AssumptionContext *AC) { |
553 | // Use equality as a preliminary filter. |
554 | if (L == R) |
555 | return true; |
556 | |
557 | if (L->getValueID() != R->getValueID()) |
558 | return false; |
559 | |
560 | // Ask the engine about global values. |
561 | if (isa<GlobalValue>(Val: L)) |
562 | return Engine.equivalentAsOperands(L: cast<GlobalValue>(Val: L), |
563 | R: cast<GlobalValue>(Val: R)); |
564 | |
565 | // Compare constant expressions structurally. |
566 | if (isa<ConstantExpr>(Val: L)) |
567 | return equivalentAsOperands(L: cast<ConstantExpr>(Val: L), R: cast<ConstantExpr>(Val: R), |
568 | AC); |
569 | |
570 | // Constants of the "same type" don't always actually have the same |
571 | // type; I don't know why. Just white-list them. |
572 | if (isa<ConstantPointerNull>(Val: L) || isa<UndefValue>(Val: L) || isa<ConstantAggregateZero>(Val: L)) |
573 | return true; |
574 | |
575 | // Block addresses only match if we've already encountered the |
576 | // block. FIXME: tentative matches? |
577 | if (isa<BlockAddress>(Val: L)) |
578 | return Blocks[cast<BlockAddress>(Val: L)->getBasicBlock()] |
579 | == cast<BlockAddress>(Val: R)->getBasicBlock(); |
580 | |
581 | // If L and R are ConstantVectors, compare each element |
582 | if (isa<ConstantVector>(Val: L)) { |
583 | const ConstantVector *CVL = cast<ConstantVector>(Val: L); |
584 | const ConstantVector *CVR = cast<ConstantVector>(Val: R); |
585 | if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements()) |
586 | return false; |
587 | for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) { |
588 | if (!equivalentAsOperands(L: CVL->getOperand(i_nocapture: i), R: CVR->getOperand(i_nocapture: i), AC)) |
589 | return false; |
590 | } |
591 | return true; |
592 | } |
593 | |
594 | // If L and R are ConstantArrays, compare the element count and types. |
595 | if (isa<ConstantArray>(Val: L)) { |
596 | const ConstantArray *CAL = cast<ConstantArray>(Val: L); |
597 | const ConstantArray *CAR = cast<ConstantArray>(Val: R); |
598 | // Sometimes a type may be equivalent, but not uniquified---e.g. it may |
599 | // contain a GEP instruction. Do a deeper comparison of the types. |
600 | if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements()) |
601 | return false; |
602 | |
603 | for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) { |
604 | if (!equivalentAsOperands(L: CAL->getAggregateElement(Elt: I), |
605 | R: CAR->getAggregateElement(Elt: I), AC)) |
606 | return false; |
607 | } |
608 | |
609 | return true; |
610 | } |
611 | |
612 | // If L and R are ConstantStructs, compare each field and type. |
613 | if (isa<ConstantStruct>(Val: L)) { |
614 | const ConstantStruct *CSL = cast<ConstantStruct>(Val: L); |
615 | const ConstantStruct *CSR = cast<ConstantStruct>(Val: R); |
616 | |
617 | const StructType *LTy = cast<StructType>(Val: CSL->getType()); |
618 | const StructType *RTy = cast<StructType>(Val: CSR->getType()); |
619 | |
620 | // The StructTypes should have the same attributes. Don't use |
621 | // isLayoutIdentical(), because that just checks the element pointers, |
622 | // which may not work here. |
623 | if (LTy->getNumElements() != RTy->getNumElements() || |
624 | LTy->isPacked() != RTy->isPacked()) |
625 | return false; |
626 | |
627 | for (unsigned I = 0; I < LTy->getNumElements(); I++) { |
628 | const Value *LAgg = CSL->getAggregateElement(Elt: I); |
629 | const Value *RAgg = CSR->getAggregateElement(Elt: I); |
630 | |
631 | if (LAgg == SavedLHS || RAgg == SavedRHS) { |
632 | if (LAgg != SavedLHS || RAgg != SavedRHS) |
633 | // If the left and right operands aren't both re-analyzing the |
634 | // variable, then the initialiers don't match, so report "false". |
635 | // Otherwise, we skip these operands.. |
636 | return false; |
637 | |
638 | continue; |
639 | } |
640 | |
641 | if (!equivalentAsOperands(L: LAgg, R: RAgg, AC)) { |
642 | return false; |
643 | } |
644 | } |
645 | |
646 | return true; |
647 | } |
648 | |
649 | return false; |
650 | } |
651 | |
652 | bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R, |
653 | const AssumptionContext *AC) { |
654 | if (L == R) |
655 | return true; |
656 | |
657 | if (L->getOpcode() != R->getOpcode()) |
658 | return false; |
659 | |
660 | switch (L->getOpcode()) { |
661 | case Instruction::GetElementPtr: |
662 | // FIXME: inbounds? |
663 | break; |
664 | |
665 | default: |
666 | break; |
667 | } |
668 | |
669 | if (L->getNumOperands() != R->getNumOperands()) |
670 | return false; |
671 | |
672 | for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { |
673 | const auto *LOp = L->getOperand(i_nocapture: I); |
674 | const auto *ROp = R->getOperand(i_nocapture: I); |
675 | |
676 | if (LOp == SavedLHS || ROp == SavedRHS) { |
677 | if (LOp != SavedLHS || ROp != SavedRHS) |
678 | // If the left and right operands aren't both re-analyzing the |
679 | // variable, then the initialiers don't match, so report "false". |
680 | // Otherwise, we skip these operands.. |
681 | return false; |
682 | |
683 | continue; |
684 | } |
685 | |
686 | if (!equivalentAsOperands(L: LOp, R: ROp, AC)) |
687 | return false; |
688 | } |
689 | |
690 | return true; |
691 | } |
692 | |
693 | // There are cases where we cannot determine whether two values are |
694 | // equivalent, because it depends on not yet processed basic blocks -- see the |
695 | // documentation on assumptions. |
696 | // |
697 | // AC is the context in which we are currently performing a diff. |
698 | // When we encounter a pair of values for which we can neither prove |
699 | // equivalence nor the opposite, we do the following: |
700 | // * If AC is nullptr, we treat the pair as non-equivalent. |
701 | // * If AC is set, we add an assumption for the basic blocks given by AC, |
702 | // and treat the pair as equivalent. The assumption is checked later. |
703 | bool equivalentAsOperands(const Value *L, const Value *R, |
704 | const AssumptionContext *AC) { |
705 | // Fall out if the values have different kind. |
706 | // This possibly shouldn't take priority over oracles. |
707 | if (L->getValueID() != R->getValueID()) |
708 | return false; |
709 | |
710 | // Value subtypes: Argument, Constant, Instruction, BasicBlock, |
711 | // InlineAsm, MDNode, MDString, PseudoSourceValue |
712 | |
713 | if (isa<Constant>(Val: L)) |
714 | return equivalentAsOperands(L: cast<Constant>(Val: L), R: cast<Constant>(Val: R), AC); |
715 | |
716 | if (isa<Instruction>(Val: L)) { |
717 | auto It = Values.find(Val: L); |
718 | if (It != Values.end()) |
719 | return It->second == R; |
720 | |
721 | if (TentativeValues.count(V: std::make_pair(x&: L, y&: R))) |
722 | return true; |
723 | |
724 | // L and R might be equivalent, this could depend on not yet processed |
725 | // basic blocks, so we cannot decide here. |
726 | if (AC) { |
727 | // Add an assumption, unless there is a conflict with an existing one |
728 | BlockDiffCandidate &BDC = |
729 | getOrCreateBlockDiffCandidate(LBB: AC->LBB, RBB: AC->RBB); |
730 | auto InsertionResult = BDC.EquivalenceAssumptions.insert(KV: {L, R}); |
731 | if (!InsertionResult.second && InsertionResult.first->second != R) { |
732 | // We already have a conflicting equivalence assumption for L, so at |
733 | // least one must be wrong, and we know that there is a diff. |
734 | BDC.KnownToDiffer = true; |
735 | BDC.EquivalenceAssumptions.clear(); |
736 | return false; |
737 | } |
738 | // Optimistically assume equivalence, and check later once all BBs |
739 | // have been processed. |
740 | return true; |
741 | } |
742 | |
743 | // Assumptions disabled, so pessimistically assume non-equivalence. |
744 | return false; |
745 | } |
746 | |
747 | if (isa<Argument>(Val: L)) |
748 | return Values[L] == R; |
749 | |
750 | if (isa<BasicBlock>(Val: L)) |
751 | return Blocks[cast<BasicBlock>(Val: L)] != R; |
752 | |
753 | // Pretend everything else is identical. |
754 | return true; |
755 | } |
756 | |
757 | // Avoid a gcc warning about accessing 'this' in an initializer. |
758 | FunctionDifferenceEngine *this_() { return this; } |
759 | |
760 | public: |
761 | FunctionDifferenceEngine(DifferenceEngine &Engine, |
762 | const Value *SavedLHS = nullptr, |
763 | const Value *SavedRHS = nullptr) |
764 | : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS), |
765 | Queue(QueueSorter(*this_())) {} |
766 | |
767 | void diff(const Function *L, const Function *R) { |
768 | assert(Values.empty() && "Multiple diffs per engine are not supported!" ); |
769 | |
770 | if (L->arg_size() != R->arg_size()) |
771 | Engine.log(text: "different argument counts" ); |
772 | |
773 | // Map the arguments. |
774 | for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(), |
775 | RI = R->arg_begin(), RE = R->arg_end(); |
776 | LI != LE && RI != RE; ++LI, ++RI) |
777 | Values[&*LI] = &*RI; |
778 | |
779 | tryUnify(L: &*L->begin(), R: &*R->begin()); |
780 | processQueue(); |
781 | checkAndReportDiffCandidates(); |
782 | } |
783 | }; |
784 | |
785 | struct DiffEntry { |
786 | DiffEntry() = default; |
787 | |
788 | unsigned Cost = 0; |
789 | llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange |
790 | }; |
791 | |
792 | bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L, |
793 | const Instruction *R) { |
794 | return !diff(L, R, Complain: false, TryUnify: false, AllowAssumptions: false); |
795 | } |
796 | |
797 | void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart, |
798 | BasicBlock::const_iterator RStart) { |
799 | BasicBlock::const_iterator LE = LStart->getParent()->end(); |
800 | BasicBlock::const_iterator RE = RStart->getParent()->end(); |
801 | |
802 | unsigned NL = std::distance(first: LStart, last: LE); |
803 | |
804 | SmallVector<DiffEntry, 20> Paths1(NL+1); |
805 | SmallVector<DiffEntry, 20> Paths2(NL+1); |
806 | |
807 | DiffEntry *Cur = Paths1.data(); |
808 | DiffEntry *Next = Paths2.data(); |
809 | |
810 | const unsigned LeftCost = 2; |
811 | const unsigned RightCost = 2; |
812 | const unsigned MatchCost = 0; |
813 | |
814 | assert(TentativeValues.empty()); |
815 | |
816 | // Initialize the first column. |
817 | for (unsigned I = 0; I != NL+1; ++I) { |
818 | Cur[I].Cost = I * LeftCost; |
819 | for (unsigned J = 0; J != I; ++J) |
820 | Cur[I].Path.push_back(Elt: DC_left); |
821 | } |
822 | |
823 | for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) { |
824 | // Initialize the first row. |
825 | Next[0] = Cur[0]; |
826 | Next[0].Cost += RightCost; |
827 | Next[0].Path.push_back(Elt: DC_right); |
828 | |
829 | unsigned Index = 1; |
830 | for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) { |
831 | if (matchForBlockDiff(L: &*LI, R: &*RI)) { |
832 | Next[Index] = Cur[Index-1]; |
833 | Next[Index].Cost += MatchCost; |
834 | Next[Index].Path.push_back(Elt: DC_match); |
835 | TentativeValues.insert(V: std::make_pair(x: &*LI, y: &*RI)); |
836 | } else if (Next[Index-1].Cost <= Cur[Index].Cost) { |
837 | Next[Index] = Next[Index-1]; |
838 | Next[Index].Cost += LeftCost; |
839 | Next[Index].Path.push_back(Elt: DC_left); |
840 | } else { |
841 | Next[Index] = Cur[Index]; |
842 | Next[Index].Cost += RightCost; |
843 | Next[Index].Path.push_back(Elt: DC_right); |
844 | } |
845 | } |
846 | |
847 | std::swap(a&: Cur, b&: Next); |
848 | } |
849 | |
850 | // We don't need the tentative values anymore; everything from here |
851 | // on out should be non-tentative. |
852 | TentativeValues.clear(); |
853 | |
854 | SmallVectorImpl<char> &Path = Cur[NL].Path; |
855 | BasicBlock::const_iterator LI = LStart, RI = RStart; |
856 | |
857 | DiffLogBuilder Diff(Engine.getConsumer()); |
858 | |
859 | // Drop trailing matches. |
860 | while (Path.size() && Path.back() == DC_match) |
861 | Path.pop_back(); |
862 | |
863 | // Skip leading matches. |
864 | SmallVectorImpl<char>::iterator |
865 | PI = Path.begin(), PE = Path.end(); |
866 | while (PI != PE && *PI == DC_match) { |
867 | unify(L: &*LI, R: &*RI); |
868 | ++PI; |
869 | ++LI; |
870 | ++RI; |
871 | } |
872 | |
873 | for (; PI != PE; ++PI) { |
874 | switch (static_cast<DiffChange>(*PI)) { |
875 | case DC_match: |
876 | assert(LI != LE && RI != RE); |
877 | { |
878 | const Instruction *L = &*LI, *R = &*RI; |
879 | unify(L, R); |
880 | Diff.addMatch(L, R); |
881 | } |
882 | ++LI; ++RI; |
883 | break; |
884 | |
885 | case DC_left: |
886 | assert(LI != LE); |
887 | Diff.addLeft(L: &*LI); |
888 | ++LI; |
889 | break; |
890 | |
891 | case DC_right: |
892 | assert(RI != RE); |
893 | Diff.addRight(R: &*RI); |
894 | ++RI; |
895 | break; |
896 | } |
897 | } |
898 | |
899 | // Finishing unifying and complaining about the tails of the block, |
900 | // which should be matches all the way through. |
901 | while (LI != LE) { |
902 | assert(RI != RE); |
903 | unify(L: &*LI, R: &*RI); |
904 | ++LI; |
905 | ++RI; |
906 | } |
907 | |
908 | // If the terminators have different kinds, but one is an invoke and the |
909 | // other is an unconditional branch immediately following a call, unify |
910 | // the results and the destinations. |
911 | const Instruction *LTerm = LStart->getParent()->getTerminator(); |
912 | const Instruction *RTerm = RStart->getParent()->getTerminator(); |
913 | if (isa<BranchInst>(Val: LTerm) && isa<InvokeInst>(Val: RTerm)) { |
914 | if (cast<BranchInst>(Val: LTerm)->isConditional()) return; |
915 | BasicBlock::const_iterator I = LTerm->getIterator(); |
916 | if (I == LStart->getParent()->begin()) return; |
917 | --I; |
918 | if (!isa<CallInst>(Val: *I)) return; |
919 | const CallInst *LCall = cast<CallInst>(Val: &*I); |
920 | const InvokeInst *RInvoke = cast<InvokeInst>(Val: RTerm); |
921 | if (!equivalentAsOperands(L: LCall->getCalledOperand(), |
922 | R: RInvoke->getCalledOperand(), AC: nullptr)) |
923 | return; |
924 | if (!LCall->use_empty()) |
925 | Values[LCall] = RInvoke; |
926 | tryUnify(L: LTerm->getSuccessor(Idx: 0), R: RInvoke->getNormalDest()); |
927 | } else if (isa<InvokeInst>(Val: LTerm) && isa<BranchInst>(Val: RTerm)) { |
928 | if (cast<BranchInst>(Val: RTerm)->isConditional()) return; |
929 | BasicBlock::const_iterator I = RTerm->getIterator(); |
930 | if (I == RStart->getParent()->begin()) return; |
931 | --I; |
932 | if (!isa<CallInst>(Val: *I)) return; |
933 | const CallInst *RCall = cast<CallInst>(Val&: I); |
934 | const InvokeInst *LInvoke = cast<InvokeInst>(Val: LTerm); |
935 | if (!equivalentAsOperands(L: LInvoke->getCalledOperand(), |
936 | R: RCall->getCalledOperand(), AC: nullptr)) |
937 | return; |
938 | if (!LInvoke->use_empty()) |
939 | Values[LInvoke] = RCall; |
940 | tryUnify(L: LInvoke->getNormalDest(), R: RTerm->getSuccessor(Idx: 0)); |
941 | } |
942 | } |
943 | } |
944 | |
945 | void DifferenceEngine::Oracle::anchor() { } |
946 | |
947 | void DifferenceEngine::diff(const Function *L, const Function *R) { |
948 | Context C(*this, L, R); |
949 | |
950 | // FIXME: types |
951 | // FIXME: attributes and CC |
952 | // FIXME: parameter attributes |
953 | |
954 | // If both are declarations, we're done. |
955 | if (L->empty() && R->empty()) |
956 | return; |
957 | else if (L->empty()) |
958 | log(text: "left function is declaration, right function is definition" ); |
959 | else if (R->empty()) |
960 | log(text: "right function is declaration, left function is definition" ); |
961 | else |
962 | FunctionDifferenceEngine(*this).diff(L, R); |
963 | } |
964 | |
965 | void DifferenceEngine::diff(const Module *L, const Module *R) { |
966 | StringSet<> LNames; |
967 | SmallVector<std::pair<const Function *, const Function *>, 20> Queue; |
968 | |
969 | unsigned LeftAnonCount = 0; |
970 | unsigned RightAnonCount = 0; |
971 | |
972 | for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) { |
973 | const Function *LFn = &*I; |
974 | StringRef Name = LFn->getName(); |
975 | if (Name.empty()) { |
976 | ++LeftAnonCount; |
977 | continue; |
978 | } |
979 | |
980 | LNames.insert(key: Name); |
981 | |
982 | if (Function *RFn = R->getFunction(Name: LFn->getName())) |
983 | Queue.push_back(Elt: std::make_pair(x&: LFn, y&: RFn)); |
984 | else |
985 | logf(text: "function %l exists only in left module" ) << LFn; |
986 | } |
987 | |
988 | for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) { |
989 | const Function *RFn = &*I; |
990 | StringRef Name = RFn->getName(); |
991 | if (Name.empty()) { |
992 | ++RightAnonCount; |
993 | continue; |
994 | } |
995 | |
996 | if (!LNames.count(Key: Name)) |
997 | logf(text: "function %r exists only in right module" ) << RFn; |
998 | } |
999 | |
1000 | if (LeftAnonCount != 0 || RightAnonCount != 0) { |
1001 | SmallString<32> Tmp; |
1002 | logf(text: ("not comparing " + Twine(LeftAnonCount) + |
1003 | " anonymous functions in the left module and " + |
1004 | Twine(RightAnonCount) + " in the right module" ) |
1005 | .toStringRef(Out&: Tmp)); |
1006 | } |
1007 | |
1008 | for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator |
1009 | I = Queue.begin(), |
1010 | E = Queue.end(); |
1011 | I != E; ++I) |
1012 | diff(L: I->first, R: I->second); |
1013 | } |
1014 | |
1015 | bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L, |
1016 | const GlobalValue *R) { |
1017 | if (globalValueOracle) return (*globalValueOracle)(L, R); |
1018 | |
1019 | if (isa<GlobalVariable>(Val: L) && isa<GlobalVariable>(Val: R)) { |
1020 | const GlobalVariable *GVL = cast<GlobalVariable>(Val: L); |
1021 | const GlobalVariable *GVR = cast<GlobalVariable>(Val: R); |
1022 | if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() && |
1023 | GVR->hasLocalLinkage() && GVR->hasUniqueInitializer()) |
1024 | return FunctionDifferenceEngine(*this, GVL, GVR) |
1025 | .equivalentAsOperands(L: GVL->getInitializer(), R: GVR->getInitializer(), |
1026 | AC: nullptr); |
1027 | } |
1028 | |
1029 | return L->getName() == R->getName(); |
1030 | } |
1031 | |