1 | //===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass hoists and/or decomposes/recomposes integer division and remainder |
10 | // instructions to enable CFG improvements and better codegen. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
15 | #include "llvm/ADT/DenseMap.h" |
16 | #include "llvm/ADT/MapVector.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/Analysis/GlobalsModRef.h" |
19 | #include "llvm/Analysis/TargetTransformInfo.h" |
20 | #include "llvm/Analysis/ValueTracking.h" |
21 | #include "llvm/IR/Dominators.h" |
22 | #include "llvm/IR/Function.h" |
23 | #include "llvm/IR/PatternMatch.h" |
24 | #include "llvm/Support/DebugCounter.h" |
25 | #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
26 | #include <optional> |
27 | |
28 | using namespace llvm; |
29 | using namespace llvm::PatternMatch; |
30 | |
31 | #define DEBUG_TYPE "div-rem-pairs" |
32 | STATISTIC(NumPairs, "Number of div/rem pairs" ); |
33 | STATISTIC(NumRecomposed, "Number of instructions recomposed" ); |
34 | STATISTIC(NumHoisted, "Number of instructions hoisted" ); |
35 | STATISTIC(NumDecomposed, "Number of instructions decomposed" ); |
36 | DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform" , |
37 | "Controls transformations in div-rem-pairs pass" ); |
38 | |
39 | namespace { |
40 | struct ExpandedMatch { |
41 | DivRemMapKey Key; |
42 | Instruction *Value; |
43 | }; |
44 | } // namespace |
45 | |
46 | /// See if we can match: (which is the form we expand into) |
47 | /// X - ((X ?/ Y) * Y) |
48 | /// which is equivalent to: |
49 | /// X ?% Y |
50 | static std::optional<ExpandedMatch> matchExpandedRem(Instruction &I) { |
51 | Value *Dividend, *XroundedDownToMultipleOfY; |
52 | if (!match(V: &I, P: m_Sub(L: m_Value(V&: Dividend), R: m_Value(V&: XroundedDownToMultipleOfY)))) |
53 | return std::nullopt; |
54 | |
55 | Value *Divisor; |
56 | Instruction *Div; |
57 | // Look for ((X / Y) * Y) |
58 | if (!match( |
59 | V: XroundedDownToMultipleOfY, |
60 | P: m_c_Mul(L: m_CombineAnd(L: m_IDiv(L: m_Specific(V: Dividend), R: m_Value(V&: Divisor)), |
61 | R: m_Instruction(I&: Div)), |
62 | R: m_Deferred(V: Divisor)))) |
63 | return std::nullopt; |
64 | |
65 | ExpandedMatch M; |
66 | M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv; |
67 | M.Key.Dividend = Dividend; |
68 | M.Key.Divisor = Divisor; |
69 | M.Value = &I; |
70 | return M; |
71 | } |
72 | |
73 | namespace { |
74 | /// A thin wrapper to store two values that we matched as div-rem pair. |
75 | /// We want this extra indirection to avoid dealing with RAUW'ing the map keys. |
76 | struct DivRemPairWorklistEntry { |
77 | /// The actual udiv/sdiv instruction. Source of truth. |
78 | AssertingVH<Instruction> DivInst; |
79 | |
80 | /// The instruction that we have matched as a remainder instruction. |
81 | /// Should only be used as Value, don't introspect it. |
82 | AssertingVH<Instruction> RemInst; |
83 | |
84 | DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) |
85 | : DivInst(DivInst_), RemInst(RemInst_) { |
86 | assert((DivInst->getOpcode() == Instruction::UDiv || |
87 | DivInst->getOpcode() == Instruction::SDiv) && |
88 | "Not a division." ); |
89 | assert(DivInst->getType() == RemInst->getType() && "Types should match." ); |
90 | // We can't check anything else about remainder instruction, |
91 | // it's not strictly required to be a urem/srem. |
92 | } |
93 | |
94 | /// The type for this pair, identical for both the div and rem. |
95 | Type *getType() const { return DivInst->getType(); } |
96 | |
97 | /// Is this pair signed or unsigned? |
98 | bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } |
99 | |
100 | /// In this pair, what are the divident and divisor? |
101 | Value *getDividend() const { return DivInst->getOperand(i: 0); } |
102 | Value *getDivisor() const { return DivInst->getOperand(i: 1); } |
103 | |
104 | bool isRemExpanded() const { |
105 | switch (RemInst->getOpcode()) { |
106 | case Instruction::SRem: |
107 | case Instruction::URem: |
108 | return false; // single 'rem' instruction - unexpanded form. |
109 | default: |
110 | return true; // anything else means we have remainder in expanded form. |
111 | } |
112 | } |
113 | }; |
114 | } // namespace |
115 | using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>; |
116 | |
117 | /// Find matching pairs of integer div/rem ops (they have the same numerator, |
118 | /// denominator, and signedness). Place those pairs into a worklist for further |
119 | /// processing. This indirection is needed because we have to use TrackingVH<> |
120 | /// because we will be doing RAUW, and if one of the rem instructions we change |
121 | /// happens to be an input to another div/rem in the maps, we'd have problems. |
122 | static DivRemWorklistTy getWorklist(Function &F) { |
123 | // Insert all divide and remainder instructions into maps keyed by their |
124 | // operands and opcode (signed or unsigned). |
125 | DenseMap<DivRemMapKey, Instruction *> DivMap; |
126 | // Use a MapVector for RemMap so that instructions are moved/inserted in a |
127 | // deterministic order. |
128 | MapVector<DivRemMapKey, Instruction *> RemMap; |
129 | for (auto &BB : F) { |
130 | for (auto &I : BB) { |
131 | if (I.getOpcode() == Instruction::SDiv) |
132 | DivMap[DivRemMapKey(true, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
133 | else if (I.getOpcode() == Instruction::UDiv) |
134 | DivMap[DivRemMapKey(false, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
135 | else if (I.getOpcode() == Instruction::SRem) |
136 | RemMap[DivRemMapKey(true, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
137 | else if (I.getOpcode() == Instruction::URem) |
138 | RemMap[DivRemMapKey(false, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
139 | else if (auto Match = matchExpandedRem(I)) |
140 | RemMap[Match->Key] = Match->Value; |
141 | } |
142 | } |
143 | |
144 | // We'll accumulate the matching pairs of div-rem instructions here. |
145 | DivRemWorklistTy Worklist; |
146 | |
147 | // We can iterate over either map because we are only looking for matched |
148 | // pairs. Choose remainders for efficiency because they are usually even more |
149 | // rare than division. |
150 | for (auto &RemPair : RemMap) { |
151 | // Find the matching division instruction from the division map. |
152 | auto It = DivMap.find(Val: RemPair.first); |
153 | if (It == DivMap.end()) |
154 | continue; |
155 | |
156 | // We have a matching pair of div/rem instructions. |
157 | NumPairs++; |
158 | Instruction *RemInst = RemPair.second; |
159 | |
160 | // Place it in the worklist. |
161 | Worklist.emplace_back(Args&: It->second, Args&: RemInst); |
162 | } |
163 | |
164 | return Worklist; |
165 | } |
166 | |
167 | /// Find matching pairs of integer div/rem ops (they have the same numerator, |
168 | /// denominator, and signedness). If they exist in different basic blocks, bring |
169 | /// them together by hoisting or replace the common division operation that is |
170 | /// implicit in the remainder: |
171 | /// X % Y <--> X - ((X / Y) * Y). |
172 | /// |
173 | /// We can largely ignore the normal safety and cost constraints on speculation |
174 | /// of these ops when we find a matching pair. This is because we are already |
175 | /// guaranteed that any exceptions and most cost are already incurred by the |
176 | /// first member of the pair. |
177 | /// |
178 | /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or |
179 | /// SimplifyCFG, but it's split off on its own because it's different enough |
180 | /// that it doesn't quite match the stated objectives of those passes. |
181 | static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, |
182 | const DominatorTree &DT) { |
183 | bool Changed = false; |
184 | |
185 | // Get the matching pairs of div-rem instructions. We want this extra |
186 | // indirection to avoid dealing with having to RAUW the keys of the maps. |
187 | DivRemWorklistTy Worklist = getWorklist(F); |
188 | |
189 | // Process each entry in the worklist. |
190 | for (DivRemPairWorklistEntry &E : Worklist) { |
191 | if (!DebugCounter::shouldExecute(CounterName: DRPCounter)) |
192 | continue; |
193 | |
194 | bool HasDivRemOp = TTI.hasDivRemOp(DataType: E.getType(), IsSigned: E.isSigned()); |
195 | |
196 | auto &DivInst = E.DivInst; |
197 | auto &RemInst = E.RemInst; |
198 | |
199 | const bool RemOriginallyWasInExpandedForm = E.isRemExpanded(); |
200 | (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning |
201 | |
202 | if (HasDivRemOp && E.isRemExpanded()) { |
203 | // The target supports div+rem but the rem is expanded. |
204 | // We should recompose it first. |
205 | Value *X = E.getDividend(); |
206 | Value *Y = E.getDivisor(); |
207 | Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(V1: X, V2: Y) |
208 | : BinaryOperator::CreateURem(V1: X, V2: Y); |
209 | // Note that we place it right next to the original expanded instruction, |
210 | // and letting further handling to move it if needed. |
211 | RealRem->setName(RemInst->getName() + ".recomposed" ); |
212 | RealRem->insertAfter(InsertPos: RemInst); |
213 | Instruction *OrigRemInst = RemInst; |
214 | // Update AssertingVH<> with new instruction so it doesn't assert. |
215 | RemInst = RealRem; |
216 | // And replace the original instruction with the new one. |
217 | OrigRemInst->replaceAllUsesWith(V: RealRem); |
218 | RealRem->setDebugLoc(OrigRemInst->getDebugLoc()); |
219 | OrigRemInst->eraseFromParent(); |
220 | NumRecomposed++; |
221 | // Note that we have left ((X / Y) * Y) around. |
222 | // If it had other uses we could rewrite it as X - X % Y |
223 | Changed = true; |
224 | } |
225 | |
226 | assert((!E.isRemExpanded() || !HasDivRemOp) && |
227 | "*If* the target supports div-rem, then by now the RemInst *is* " |
228 | "Instruction::[US]Rem." ); |
229 | |
230 | // If the target supports div+rem and the instructions are in the same block |
231 | // already, there's nothing to do. The backend should handle this. If the |
232 | // target does not support div+rem, then we will decompose the rem. |
233 | if (HasDivRemOp && RemInst->getParent() == DivInst->getParent()) |
234 | continue; |
235 | |
236 | bool DivDominates = DT.dominates(Def: DivInst, User: RemInst); |
237 | if (!DivDominates && !DT.dominates(Def: RemInst, User: DivInst)) { |
238 | // We have matching div-rem pair, but they are in two different blocks, |
239 | // neither of which dominates one another. |
240 | |
241 | BasicBlock *PredBB = nullptr; |
242 | BasicBlock *DivBB = DivInst->getParent(); |
243 | BasicBlock *RemBB = RemInst->getParent(); |
244 | |
245 | // It's only safe to hoist if every instruction before the Div/Rem in the |
246 | // basic block is guaranteed to transfer execution. |
247 | auto IsSafeToHoist = [](Instruction *DivOrRem, BasicBlock *ParentBB) { |
248 | for (auto I = ParentBB->begin(), E = DivOrRem->getIterator(); I != E; |
249 | ++I) |
250 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &*I)) |
251 | return false; |
252 | |
253 | return true; |
254 | }; |
255 | |
256 | // Look for something like this |
257 | // PredBB |
258 | // | \ |
259 | // | Rem |
260 | // | / |
261 | // Div |
262 | // |
263 | // If the Rem block has a single predecessor and successor, and all paths |
264 | // from PredBB go to either RemBB or DivBB, and execution of RemBB and |
265 | // DivBB will always reach the Div/Rem, we can hoist Div to PredBB. If |
266 | // we have a DivRem operation we can also hoist Rem. Otherwise we'll leave |
267 | // Rem where it is and rewrite it to mul/sub. |
268 | if (RemBB->getSingleSuccessor() == DivBB) { |
269 | PredBB = RemBB->getUniquePredecessor(); |
270 | |
271 | // Look for something like this |
272 | // PredBB |
273 | // / \ |
274 | // Div Rem |
275 | // |
276 | // If the Rem and Din blocks share a unique predecessor, and all |
277 | // paths from PredBB go to either RemBB or DivBB, and execution of RemBB |
278 | // and DivBB will always reach the Div/Rem, we can hoist Div to PredBB. |
279 | // If we have a DivRem operation we can also hoist Rem. By hoisting both |
280 | // ops to the same block, we reduce code size and allow the DivRem to |
281 | // issue sooner. Without a DivRem op, this transformation is |
282 | // unprofitable because we would end up performing an extra Mul+Sub on |
283 | // the Rem path. |
284 | } else if (BasicBlock *RemPredBB = RemBB->getUniquePredecessor()) { |
285 | // This hoist is only profitable when the target has a DivRem op. |
286 | if (HasDivRemOp && RemPredBB == DivBB->getUniquePredecessor()) |
287 | PredBB = RemPredBB; |
288 | } |
289 | // FIXME: We could handle more hoisting cases. |
290 | |
291 | if (PredBB && !isa<CatchSwitchInst>(Val: PredBB->getTerminator()) && |
292 | isGuaranteedToTransferExecutionToSuccessor(I: PredBB->getTerminator()) && |
293 | IsSafeToHoist(RemInst, RemBB) && IsSafeToHoist(DivInst, DivBB) && |
294 | all_of(Range: successors(BB: PredBB), |
295 | P: [&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) && |
296 | all_of(Range: predecessors(BB: DivBB), |
297 | P: [&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) { |
298 | DivDominates = true; |
299 | DivInst->moveBefore(MovePos: PredBB->getTerminator()); |
300 | Changed = true; |
301 | if (HasDivRemOp) { |
302 | RemInst->moveBefore(MovePos: PredBB->getTerminator()); |
303 | continue; |
304 | } |
305 | } else |
306 | continue; |
307 | } |
308 | |
309 | // The target does not have a single div/rem operation, |
310 | // and the rem is already in expanded form. Nothing to do. |
311 | if (!HasDivRemOp && E.isRemExpanded()) |
312 | continue; |
313 | |
314 | if (HasDivRemOp) { |
315 | // The target has a single div/rem operation. Hoist the lower instruction |
316 | // to make the matched pair visible to the backend. |
317 | if (DivDominates) |
318 | RemInst->moveAfter(MovePos: DivInst); |
319 | else |
320 | DivInst->moveAfter(MovePos: RemInst); |
321 | NumHoisted++; |
322 | } else { |
323 | // The target does not have a single div/rem operation, |
324 | // and the rem is *not* in a already-expanded form. |
325 | // Decompose the remainder calculation as: |
326 | // X % Y --> X - ((X / Y) * Y). |
327 | |
328 | assert(!RemOriginallyWasInExpandedForm && |
329 | "We should not be expanding if the rem was in expanded form to " |
330 | "begin with." ); |
331 | |
332 | Value *X = E.getDividend(); |
333 | Value *Y = E.getDivisor(); |
334 | Instruction *Mul = BinaryOperator::CreateMul(V1: DivInst, V2: Y); |
335 | Instruction *Sub = BinaryOperator::CreateSub(V1: X, V2: Mul); |
336 | |
337 | // If the remainder dominates, then hoist the division up to that block: |
338 | // |
339 | // bb1: |
340 | // %rem = srem %x, %y |
341 | // bb2: |
342 | // %div = sdiv %x, %y |
343 | // --> |
344 | // bb1: |
345 | // %div = sdiv %x, %y |
346 | // %mul = mul %div, %y |
347 | // %rem = sub %x, %mul |
348 | // |
349 | // If the division dominates, it's already in the right place. The mul+sub |
350 | // will be in a different block because we don't assume that they are |
351 | // cheap to speculatively execute: |
352 | // |
353 | // bb1: |
354 | // %div = sdiv %x, %y |
355 | // bb2: |
356 | // %rem = srem %x, %y |
357 | // --> |
358 | // bb1: |
359 | // %div = sdiv %x, %y |
360 | // bb2: |
361 | // %mul = mul %div, %y |
362 | // %rem = sub %x, %mul |
363 | // |
364 | // If the div and rem are in the same block, we do the same transform, |
365 | // but any code movement would be within the same block. |
366 | |
367 | if (!DivDominates) |
368 | DivInst->moveBefore(MovePos: RemInst); |
369 | Mul->insertAfter(InsertPos: RemInst); |
370 | Mul->setDebugLoc(RemInst->getDebugLoc()); |
371 | Sub->insertAfter(InsertPos: Mul); |
372 | Sub->setDebugLoc(RemInst->getDebugLoc()); |
373 | |
374 | // If DivInst has the exact flag, remove it. Otherwise this optimization |
375 | // may replace a well-defined value 'X % Y' with poison. |
376 | DivInst->dropPoisonGeneratingFlags(); |
377 | |
378 | // If X can be undef, X should be frozen first. |
379 | // For example, let's assume that Y = 1 & X = undef: |
380 | // %div = sdiv undef, 1 // %div = undef |
381 | // %rem = srem undef, 1 // %rem = 0 |
382 | // => |
383 | // %div = sdiv undef, 1 // %div = undef |
384 | // %mul = mul %div, 1 // %mul = undef |
385 | // %rem = sub %x, %mul // %rem = undef - undef = undef |
386 | // If X is not frozen, %rem becomes undef after transformation. |
387 | if (!isGuaranteedNotToBeUndef(V: X, AC: nullptr, CtxI: DivInst, DT: &DT)) { |
388 | auto *FrX = |
389 | new FreezeInst(X, X->getName() + ".frozen" , DivInst->getIterator()); |
390 | FrX->setDebugLoc(DivInst->getDebugLoc()); |
391 | DivInst->setOperand(i: 0, Val: FrX); |
392 | Sub->setOperand(i: 0, Val: FrX); |
393 | } |
394 | // Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0, |
395 | // but %rem in tgt can be one of many integer values. |
396 | if (!isGuaranteedNotToBeUndef(V: Y, AC: nullptr, CtxI: DivInst, DT: &DT)) { |
397 | auto *FrY = |
398 | new FreezeInst(Y, Y->getName() + ".frozen" , DivInst->getIterator()); |
399 | FrY->setDebugLoc(DivInst->getDebugLoc()); |
400 | DivInst->setOperand(i: 1, Val: FrY); |
401 | Mul->setOperand(i: 1, Val: FrY); |
402 | } |
403 | |
404 | // Now kill the explicit remainder. We have replaced it with: |
405 | // (sub X, (mul (div X, Y), Y) |
406 | Sub->setName(RemInst->getName() + ".decomposed" ); |
407 | Instruction *OrigRemInst = RemInst; |
408 | // Update AssertingVH<> with new instruction so it doesn't assert. |
409 | RemInst = Sub; |
410 | // And replace the original instruction with the new one. |
411 | OrigRemInst->replaceAllUsesWith(V: Sub); |
412 | OrigRemInst->eraseFromParent(); |
413 | NumDecomposed++; |
414 | } |
415 | Changed = true; |
416 | } |
417 | |
418 | return Changed; |
419 | } |
420 | |
421 | // Pass manager boilerplate below here. |
422 | |
423 | PreservedAnalyses DivRemPairsPass::run(Function &F, |
424 | FunctionAnalysisManager &FAM) { |
425 | TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F); |
426 | DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F); |
427 | if (!optimizeDivRem(F, TTI, DT)) |
428 | return PreservedAnalyses::all(); |
429 | // TODO: This pass just hoists/replaces math ops - all analyses are preserved? |
430 | PreservedAnalyses PA; |
431 | PA.preserveSet<CFGAnalyses>(); |
432 | return PA; |
433 | } |
434 | |