| 1 | //===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass hoists and/or decomposes/recomposes integer division and remainder |
| 10 | // instructions to enable CFG improvements and better codegen. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Transforms/Scalar/DivRemPairs.h" |
| 15 | #include "llvm/ADT/DenseMap.h" |
| 16 | #include "llvm/ADT/MapVector.h" |
| 17 | #include "llvm/ADT/Statistic.h" |
| 18 | #include "llvm/Analysis/GlobalsModRef.h" |
| 19 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 20 | #include "llvm/Analysis/ValueTracking.h" |
| 21 | #include "llvm/IR/Dominators.h" |
| 22 | #include "llvm/IR/Function.h" |
| 23 | #include "llvm/IR/PatternMatch.h" |
| 24 | #include "llvm/Support/DebugCounter.h" |
| 25 | #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
| 26 | #include <optional> |
| 27 | |
| 28 | using namespace llvm; |
| 29 | using namespace llvm::PatternMatch; |
| 30 | |
| 31 | #define DEBUG_TYPE "div-rem-pairs" |
| 32 | STATISTIC(NumPairs, "Number of div/rem pairs" ); |
| 33 | STATISTIC(NumRecomposed, "Number of instructions recomposed" ); |
| 34 | STATISTIC(NumHoisted, "Number of instructions hoisted" ); |
| 35 | STATISTIC(NumDecomposed, "Number of instructions decomposed" ); |
| 36 | DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform" , |
| 37 | "Controls transformations in div-rem-pairs pass" ); |
| 38 | |
| 39 | namespace { |
| 40 | struct ExpandedMatch { |
| 41 | DivRemMapKey Key; |
| 42 | Instruction *Value; |
| 43 | }; |
| 44 | } // namespace |
| 45 | |
| 46 | /// See if we can match: (which is the form we expand into) |
| 47 | /// X - ((X ?/ Y) * Y) |
| 48 | /// which is equivalent to: |
| 49 | /// X ?% Y |
| 50 | static std::optional<ExpandedMatch> matchExpandedRem(Instruction &I) { |
| 51 | Value *Dividend, *XroundedDownToMultipleOfY; |
| 52 | if (!match(V: &I, P: m_Sub(L: m_Value(V&: Dividend), R: m_Value(V&: XroundedDownToMultipleOfY)))) |
| 53 | return std::nullopt; |
| 54 | |
| 55 | Value *Divisor; |
| 56 | Instruction *Div; |
| 57 | // Look for ((X / Y) * Y) |
| 58 | if (!match( |
| 59 | V: XroundedDownToMultipleOfY, |
| 60 | P: m_c_Mul(L: m_CombineAnd(L: m_IDiv(L: m_Specific(V: Dividend), R: m_Value(V&: Divisor)), |
| 61 | R: m_Instruction(I&: Div)), |
| 62 | R: m_Deferred(V: Divisor)))) |
| 63 | return std::nullopt; |
| 64 | |
| 65 | ExpandedMatch M; |
| 66 | M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv; |
| 67 | M.Key.Dividend = Dividend; |
| 68 | M.Key.Divisor = Divisor; |
| 69 | M.Value = &I; |
| 70 | return M; |
| 71 | } |
| 72 | |
| 73 | namespace { |
| 74 | /// A thin wrapper to store two values that we matched as div-rem pair. |
| 75 | /// We want this extra indirection to avoid dealing with RAUW'ing the map keys. |
| 76 | struct DivRemPairWorklistEntry { |
| 77 | /// The actual udiv/sdiv instruction. Source of truth. |
| 78 | AssertingVH<Instruction> DivInst; |
| 79 | |
| 80 | /// The instruction that we have matched as a remainder instruction. |
| 81 | /// Should only be used as Value, don't introspect it. |
| 82 | AssertingVH<Instruction> RemInst; |
| 83 | |
| 84 | DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) |
| 85 | : DivInst(DivInst_), RemInst(RemInst_) { |
| 86 | assert((DivInst->getOpcode() == Instruction::UDiv || |
| 87 | DivInst->getOpcode() == Instruction::SDiv) && |
| 88 | "Not a division." ); |
| 89 | assert(DivInst->getType() == RemInst->getType() && "Types should match." ); |
| 90 | // We can't check anything else about remainder instruction, |
| 91 | // it's not strictly required to be a urem/srem. |
| 92 | } |
| 93 | |
| 94 | /// The type for this pair, identical for both the div and rem. |
| 95 | Type *getType() const { return DivInst->getType(); } |
| 96 | |
| 97 | /// Is this pair signed or unsigned? |
| 98 | bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } |
| 99 | |
| 100 | /// In this pair, what are the divident and divisor? |
| 101 | Value *getDividend() const { return DivInst->getOperand(i: 0); } |
| 102 | Value *getDivisor() const { return DivInst->getOperand(i: 1); } |
| 103 | |
| 104 | bool isRemExpanded() const { |
| 105 | switch (RemInst->getOpcode()) { |
| 106 | case Instruction::SRem: |
| 107 | case Instruction::URem: |
| 108 | return false; // single 'rem' instruction - unexpanded form. |
| 109 | default: |
| 110 | return true; // anything else means we have remainder in expanded form. |
| 111 | } |
| 112 | } |
| 113 | }; |
| 114 | } // namespace |
| 115 | using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>; |
| 116 | |
| 117 | /// Find matching pairs of integer div/rem ops (they have the same numerator, |
| 118 | /// denominator, and signedness). Place those pairs into a worklist for further |
| 119 | /// processing. This indirection is needed because we have to use TrackingVH<> |
| 120 | /// because we will be doing RAUW, and if one of the rem instructions we change |
| 121 | /// happens to be an input to another div/rem in the maps, we'd have problems. |
| 122 | static DivRemWorklistTy getWorklist(Function &F) { |
| 123 | // Insert all divide and remainder instructions into maps keyed by their |
| 124 | // operands and opcode (signed or unsigned). |
| 125 | DenseMap<DivRemMapKey, Instruction *> DivMap; |
| 126 | // Use a MapVector for RemMap so that instructions are moved/inserted in a |
| 127 | // deterministic order. |
| 128 | MapVector<DivRemMapKey, Instruction *> RemMap; |
| 129 | for (auto &BB : F) { |
| 130 | for (auto &I : BB) { |
| 131 | if (I.getOpcode() == Instruction::SDiv) |
| 132 | DivMap[DivRemMapKey(true, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
| 133 | else if (I.getOpcode() == Instruction::UDiv) |
| 134 | DivMap[DivRemMapKey(false, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
| 135 | else if (I.getOpcode() == Instruction::SRem) |
| 136 | RemMap[DivRemMapKey(true, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
| 137 | else if (I.getOpcode() == Instruction::URem) |
| 138 | RemMap[DivRemMapKey(false, I.getOperand(i: 0), I.getOperand(i: 1))] = &I; |
| 139 | else if (auto Match = matchExpandedRem(I)) |
| 140 | RemMap[Match->Key] = Match->Value; |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | // We'll accumulate the matching pairs of div-rem instructions here. |
| 145 | DivRemWorklistTy Worklist; |
| 146 | |
| 147 | // We can iterate over either map because we are only looking for matched |
| 148 | // pairs. Choose remainders for efficiency because they are usually even more |
| 149 | // rare than division. |
| 150 | for (auto &RemPair : RemMap) { |
| 151 | // Find the matching division instruction from the division map. |
| 152 | auto It = DivMap.find(Val: RemPair.first); |
| 153 | if (It == DivMap.end()) |
| 154 | continue; |
| 155 | |
| 156 | // We have a matching pair of div/rem instructions. |
| 157 | NumPairs++; |
| 158 | Instruction *RemInst = RemPair.second; |
| 159 | |
| 160 | // Place it in the worklist. |
| 161 | Worklist.emplace_back(Args&: It->second, Args&: RemInst); |
| 162 | } |
| 163 | |
| 164 | return Worklist; |
| 165 | } |
| 166 | |
| 167 | /// Find matching pairs of integer div/rem ops (they have the same numerator, |
| 168 | /// denominator, and signedness). If they exist in different basic blocks, bring |
| 169 | /// them together by hoisting or replace the common division operation that is |
| 170 | /// implicit in the remainder: |
| 171 | /// X % Y <--> X - ((X / Y) * Y). |
| 172 | /// |
| 173 | /// We can largely ignore the normal safety and cost constraints on speculation |
| 174 | /// of these ops when we find a matching pair. This is because we are already |
| 175 | /// guaranteed that any exceptions and most cost are already incurred by the |
| 176 | /// first member of the pair. |
| 177 | /// |
| 178 | /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or |
| 179 | /// SimplifyCFG, but it's split off on its own because it's different enough |
| 180 | /// that it doesn't quite match the stated objectives of those passes. |
| 181 | static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, |
| 182 | const DominatorTree &DT) { |
| 183 | bool Changed = false; |
| 184 | |
| 185 | // Get the matching pairs of div-rem instructions. We want this extra |
| 186 | // indirection to avoid dealing with having to RAUW the keys of the maps. |
| 187 | DivRemWorklistTy Worklist = getWorklist(F); |
| 188 | |
| 189 | // Process each entry in the worklist. |
| 190 | for (DivRemPairWorklistEntry &E : Worklist) { |
| 191 | if (!DebugCounter::shouldExecute(CounterName: DRPCounter)) |
| 192 | continue; |
| 193 | |
| 194 | bool HasDivRemOp = TTI.hasDivRemOp(DataType: E.getType(), IsSigned: E.isSigned()); |
| 195 | |
| 196 | auto &DivInst = E.DivInst; |
| 197 | auto &RemInst = E.RemInst; |
| 198 | |
| 199 | const bool RemOriginallyWasInExpandedForm = E.isRemExpanded(); |
| 200 | (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning |
| 201 | |
| 202 | if (HasDivRemOp && E.isRemExpanded()) { |
| 203 | // The target supports div+rem but the rem is expanded. |
| 204 | // We should recompose it first. |
| 205 | Value *X = E.getDividend(); |
| 206 | Value *Y = E.getDivisor(); |
| 207 | Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(V1: X, V2: Y) |
| 208 | : BinaryOperator::CreateURem(V1: X, V2: Y); |
| 209 | // Note that we place it right next to the original expanded instruction, |
| 210 | // and letting further handling to move it if needed. |
| 211 | RealRem->setName(RemInst->getName() + ".recomposed" ); |
| 212 | RealRem->insertAfter(InsertPos: RemInst->getIterator()); |
| 213 | Instruction *OrigRemInst = RemInst; |
| 214 | // Update AssertingVH<> with new instruction so it doesn't assert. |
| 215 | RemInst = RealRem; |
| 216 | // And replace the original instruction with the new one. |
| 217 | OrigRemInst->replaceAllUsesWith(V: RealRem); |
| 218 | RealRem->setDebugLoc(OrigRemInst->getDebugLoc()); |
| 219 | OrigRemInst->eraseFromParent(); |
| 220 | NumRecomposed++; |
| 221 | // Note that we have left ((X / Y) * Y) around. |
| 222 | // If it had other uses we could rewrite it as X - X % Y |
| 223 | Changed = true; |
| 224 | } |
| 225 | |
| 226 | assert((!E.isRemExpanded() || !HasDivRemOp) && |
| 227 | "*If* the target supports div-rem, then by now the RemInst *is* " |
| 228 | "Instruction::[US]Rem." ); |
| 229 | |
| 230 | // If the target supports div+rem and the instructions are in the same block |
| 231 | // already, there's nothing to do. The backend should handle this. If the |
| 232 | // target does not support div+rem, then we will decompose the rem. |
| 233 | if (HasDivRemOp && RemInst->getParent() == DivInst->getParent()) |
| 234 | continue; |
| 235 | |
| 236 | bool DivDominates = DT.dominates(Def: DivInst, User: RemInst); |
| 237 | if (!DivDominates && !DT.dominates(Def: RemInst, User: DivInst)) { |
| 238 | // We have matching div-rem pair, but they are in two different blocks, |
| 239 | // neither of which dominates one another. |
| 240 | |
| 241 | BasicBlock *PredBB = nullptr; |
| 242 | BasicBlock *DivBB = DivInst->getParent(); |
| 243 | BasicBlock *RemBB = RemInst->getParent(); |
| 244 | |
| 245 | // It's only safe to hoist if every instruction before the Div/Rem in the |
| 246 | // basic block is guaranteed to transfer execution. |
| 247 | auto IsSafeToHoist = [](Instruction *DivOrRem, BasicBlock *ParentBB) { |
| 248 | for (auto I = ParentBB->begin(), E = DivOrRem->getIterator(); I != E; |
| 249 | ++I) |
| 250 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &*I)) |
| 251 | return false; |
| 252 | |
| 253 | return true; |
| 254 | }; |
| 255 | |
| 256 | // Look for something like this |
| 257 | // PredBB |
| 258 | // | \ |
| 259 | // | Rem |
| 260 | // | / |
| 261 | // Div |
| 262 | // |
| 263 | // If the Rem block has a single predecessor and successor, and all paths |
| 264 | // from PredBB go to either RemBB or DivBB, and execution of RemBB and |
| 265 | // DivBB will always reach the Div/Rem, we can hoist Div to PredBB. If |
| 266 | // we have a DivRem operation we can also hoist Rem. Otherwise we'll leave |
| 267 | // Rem where it is and rewrite it to mul/sub. |
| 268 | if (RemBB->getSingleSuccessor() == DivBB) { |
| 269 | PredBB = RemBB->getUniquePredecessor(); |
| 270 | |
| 271 | // Look for something like this |
| 272 | // PredBB |
| 273 | // / \ |
| 274 | // Div Rem |
| 275 | // |
| 276 | // If the Rem and Din blocks share a unique predecessor, and all |
| 277 | // paths from PredBB go to either RemBB or DivBB, and execution of RemBB |
| 278 | // and DivBB will always reach the Div/Rem, we can hoist Div to PredBB. |
| 279 | // If we have a DivRem operation we can also hoist Rem. By hoisting both |
| 280 | // ops to the same block, we reduce code size and allow the DivRem to |
| 281 | // issue sooner. Without a DivRem op, this transformation is |
| 282 | // unprofitable because we would end up performing an extra Mul+Sub on |
| 283 | // the Rem path. |
| 284 | } else if (BasicBlock *RemPredBB = RemBB->getUniquePredecessor()) { |
| 285 | // This hoist is only profitable when the target has a DivRem op. |
| 286 | if (HasDivRemOp && RemPredBB == DivBB->getUniquePredecessor()) |
| 287 | PredBB = RemPredBB; |
| 288 | } |
| 289 | // FIXME: We could handle more hoisting cases. |
| 290 | |
| 291 | if (PredBB && !isa<CatchSwitchInst>(Val: PredBB->getTerminator()) && |
| 292 | isGuaranteedToTransferExecutionToSuccessor(I: PredBB->getTerminator()) && |
| 293 | IsSafeToHoist(RemInst, RemBB) && IsSafeToHoist(DivInst, DivBB) && |
| 294 | all_of(Range: successors(BB: PredBB), |
| 295 | P: [&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) && |
| 296 | all_of(Range: predecessors(BB: DivBB), |
| 297 | P: [&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) { |
| 298 | DivDominates = true; |
| 299 | DivInst->moveBefore(InsertPos: PredBB->getTerminator()->getIterator()); |
| 300 | Changed = true; |
| 301 | if (HasDivRemOp) { |
| 302 | RemInst->moveBefore(InsertPos: PredBB->getTerminator()->getIterator()); |
| 303 | continue; |
| 304 | } |
| 305 | } else |
| 306 | continue; |
| 307 | } |
| 308 | |
| 309 | // The target does not have a single div/rem operation, |
| 310 | // and the rem is already in expanded form. Nothing to do. |
| 311 | if (!HasDivRemOp && E.isRemExpanded()) |
| 312 | continue; |
| 313 | |
| 314 | if (HasDivRemOp) { |
| 315 | // The target has a single div/rem operation. Hoist the lower instruction |
| 316 | // to make the matched pair visible to the backend. |
| 317 | if (DivDominates) |
| 318 | RemInst->moveAfter(MovePos: DivInst); |
| 319 | else |
| 320 | DivInst->moveAfter(MovePos: RemInst); |
| 321 | NumHoisted++; |
| 322 | } else { |
| 323 | // The target does not have a single div/rem operation, |
| 324 | // and the rem is *not* in a already-expanded form. |
| 325 | // Decompose the remainder calculation as: |
| 326 | // X % Y --> X - ((X / Y) * Y). |
| 327 | |
| 328 | assert(!RemOriginallyWasInExpandedForm && |
| 329 | "We should not be expanding if the rem was in expanded form to " |
| 330 | "begin with." ); |
| 331 | |
| 332 | Value *X = E.getDividend(); |
| 333 | Value *Y = E.getDivisor(); |
| 334 | Instruction *Mul = BinaryOperator::CreateMul(V1: DivInst, V2: Y); |
| 335 | Instruction *Sub = BinaryOperator::CreateSub(V1: X, V2: Mul); |
| 336 | |
| 337 | // If the remainder dominates, then hoist the division up to that block: |
| 338 | // |
| 339 | // bb1: |
| 340 | // %rem = srem %x, %y |
| 341 | // bb2: |
| 342 | // %div = sdiv %x, %y |
| 343 | // --> |
| 344 | // bb1: |
| 345 | // %div = sdiv %x, %y |
| 346 | // %mul = mul %div, %y |
| 347 | // %rem = sub %x, %mul |
| 348 | // |
| 349 | // If the division dominates, it's already in the right place. The mul+sub |
| 350 | // will be in a different block because we don't assume that they are |
| 351 | // cheap to speculatively execute: |
| 352 | // |
| 353 | // bb1: |
| 354 | // %div = sdiv %x, %y |
| 355 | // bb2: |
| 356 | // %rem = srem %x, %y |
| 357 | // --> |
| 358 | // bb1: |
| 359 | // %div = sdiv %x, %y |
| 360 | // bb2: |
| 361 | // %mul = mul %div, %y |
| 362 | // %rem = sub %x, %mul |
| 363 | // |
| 364 | // If the div and rem are in the same block, we do the same transform, |
| 365 | // but any code movement would be within the same block. |
| 366 | |
| 367 | if (!DivDominates) |
| 368 | DivInst->moveBefore(InsertPos: RemInst->getIterator()); |
| 369 | Mul->insertAfter(InsertPos: RemInst->getIterator()); |
| 370 | Mul->setDebugLoc(RemInst->getDebugLoc()); |
| 371 | Sub->insertAfter(InsertPos: Mul->getIterator()); |
| 372 | Sub->setDebugLoc(RemInst->getDebugLoc()); |
| 373 | |
| 374 | // If DivInst has the exact flag, remove it. Otherwise this optimization |
| 375 | // may replace a well-defined value 'X % Y' with poison. |
| 376 | DivInst->dropPoisonGeneratingFlags(); |
| 377 | |
| 378 | // If X can be undef, X should be frozen first. |
| 379 | // For example, let's assume that Y = 1 & X = undef: |
| 380 | // %div = sdiv undef, 1 // %div = undef |
| 381 | // %rem = srem undef, 1 // %rem = 0 |
| 382 | // => |
| 383 | // %div = sdiv undef, 1 // %div = undef |
| 384 | // %mul = mul %div, 1 // %mul = undef |
| 385 | // %rem = sub %x, %mul // %rem = undef - undef = undef |
| 386 | // If X is not frozen, %rem becomes undef after transformation. |
| 387 | if (!isGuaranteedNotToBeUndef(V: X, AC: nullptr, CtxI: DivInst, DT: &DT)) { |
| 388 | auto *FrX = |
| 389 | new FreezeInst(X, X->getName() + ".frozen" , DivInst->getIterator()); |
| 390 | FrX->setDebugLoc(DivInst->getDebugLoc()); |
| 391 | DivInst->setOperand(i: 0, Val: FrX); |
| 392 | Sub->setOperand(i: 0, Val: FrX); |
| 393 | } |
| 394 | // Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0, |
| 395 | // but %rem in tgt can be one of many integer values. |
| 396 | if (!isGuaranteedNotToBeUndef(V: Y, AC: nullptr, CtxI: DivInst, DT: &DT)) { |
| 397 | auto *FrY = |
| 398 | new FreezeInst(Y, Y->getName() + ".frozen" , DivInst->getIterator()); |
| 399 | FrY->setDebugLoc(DivInst->getDebugLoc()); |
| 400 | DivInst->setOperand(i: 1, Val: FrY); |
| 401 | Mul->setOperand(i: 1, Val: FrY); |
| 402 | } |
| 403 | |
| 404 | // Now kill the explicit remainder. We have replaced it with: |
| 405 | // (sub X, (mul (div X, Y), Y) |
| 406 | Sub->setName(RemInst->getName() + ".decomposed" ); |
| 407 | Instruction *OrigRemInst = RemInst; |
| 408 | // Update AssertingVH<> with new instruction so it doesn't assert. |
| 409 | RemInst = Sub; |
| 410 | // And replace the original instruction with the new one. |
| 411 | OrigRemInst->replaceAllUsesWith(V: Sub); |
| 412 | OrigRemInst->eraseFromParent(); |
| 413 | NumDecomposed++; |
| 414 | } |
| 415 | Changed = true; |
| 416 | } |
| 417 | |
| 418 | return Changed; |
| 419 | } |
| 420 | |
| 421 | // Pass manager boilerplate below here. |
| 422 | |
| 423 | PreservedAnalyses DivRemPairsPass::run(Function &F, |
| 424 | FunctionAnalysisManager &FAM) { |
| 425 | TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(IR&: F); |
| 426 | DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 427 | if (!optimizeDivRem(F, TTI, DT)) |
| 428 | return PreservedAnalyses::all(); |
| 429 | // TODO: This pass just hoists/replaces math ops - all analyses are preserved? |
| 430 | PreservedAnalyses PA; |
| 431 | PA.preserveSet<CFGAnalyses>(); |
| 432 | return PA; |
| 433 | } |
| 434 | |