| 1 | //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// This file implements a pass that converts X86 cmov instructions into |
| 11 | /// branches when profitable. This pass is conservative. It transforms if and |
| 12 | /// only if it can guarantee a gain with high confidence. |
| 13 | /// |
| 14 | /// Thus, the optimization applies under the following conditions: |
| 15 | /// 1. Consider as candidates only CMOVs in innermost loops (assume that |
| 16 | /// most hotspots are represented by these loops). |
| 17 | /// 2. Given a group of CMOV instructions that are using the same EFLAGS def |
| 18 | /// instruction: |
| 19 | /// a. Consider them as candidates only if all have the same code condition |
| 20 | /// or the opposite one to prevent generating more than one conditional |
| 21 | /// jump per EFLAGS def instruction. |
| 22 | /// b. Consider them as candidates only if all are profitable to be |
| 23 | /// converted (assume that one bad conversion may cause a degradation). |
| 24 | /// 3. Apply conversion only for loops that are found profitable and only for |
| 25 | /// CMOV candidates that were found profitable. |
| 26 | /// a. A loop is considered profitable only if conversion will reduce its |
| 27 | /// depth cost by some threshold. |
| 28 | /// b. CMOV is considered profitable if the cost of its condition is higher |
| 29 | /// than the average cost of its true-value and false-value by 25% of |
| 30 | /// branch-misprediction-penalty. This assures no degradation even with |
| 31 | /// 25% branch misprediction. |
| 32 | /// |
| 33 | /// Note: This pass is assumed to run on SSA machine code. |
| 34 | // |
| 35 | //===----------------------------------------------------------------------===// |
| 36 | // |
| 37 | // External interfaces: |
| 38 | // FunctionPass *llvm::createX86CmovConverterPass(); |
| 39 | // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF); |
| 40 | // |
| 41 | //===----------------------------------------------------------------------===// |
| 42 | |
| 43 | #include "X86.h" |
| 44 | #include "X86InstrInfo.h" |
| 45 | #include "llvm/ADT/ArrayRef.h" |
| 46 | #include "llvm/ADT/DenseMap.h" |
| 47 | #include "llvm/ADT/STLExtras.h" |
| 48 | #include "llvm/ADT/SmallPtrSet.h" |
| 49 | #include "llvm/ADT/SmallVector.h" |
| 50 | #include "llvm/ADT/Statistic.h" |
| 51 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 52 | #include "llvm/CodeGen/MachineFunction.h" |
| 53 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 54 | #include "llvm/CodeGen/MachineInstr.h" |
| 55 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 56 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 57 | #include "llvm/CodeGen/MachineOperand.h" |
| 58 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 59 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 60 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 61 | #include "llvm/CodeGen/TargetSchedule.h" |
| 62 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 63 | #include "llvm/IR/DebugLoc.h" |
| 64 | #include "llvm/InitializePasses.h" |
| 65 | #include "llvm/MC/MCSchedule.h" |
| 66 | #include "llvm/Pass.h" |
| 67 | #include "llvm/Support/CommandLine.h" |
| 68 | #include "llvm/Support/Debug.h" |
| 69 | #include "llvm/Support/raw_ostream.h" |
| 70 | #include "llvm/Target/CGPassBuilderOption.h" |
| 71 | #include <algorithm> |
| 72 | #include <cassert> |
| 73 | #include <iterator> |
| 74 | #include <utility> |
| 75 | |
| 76 | using namespace llvm; |
| 77 | |
| 78 | #define DEBUG_TYPE "x86-cmov-conversion" |
| 79 | |
| 80 | STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups" ); |
| 81 | STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates" ); |
| 82 | STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops" ); |
| 83 | STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups" ); |
| 84 | |
| 85 | // This internal switch can be used to turn off the cmov/branch optimization. |
| 86 | static cl::opt<bool> |
| 87 | EnableCmovConverter("x86-cmov-converter" , |
| 88 | cl::desc("Enable the X86 cmov-to-branch optimization." ), |
| 89 | cl::init(Val: true), cl::Hidden); |
| 90 | |
| 91 | static cl::opt<unsigned> |
| 92 | GainCycleThreshold("x86-cmov-converter-threshold" , |
| 93 | cl::desc("Minimum gain per loop (in cycles) threshold." ), |
| 94 | cl::init(Val: 4), cl::Hidden); |
| 95 | |
| 96 | static cl::opt<bool> ForceMemOperand( |
| 97 | "x86-cmov-converter-force-mem-operand" , |
| 98 | cl::desc("Convert cmovs to branches whenever they have memory operands." ), |
| 99 | cl::init(Val: true), cl::Hidden); |
| 100 | |
| 101 | static cl::opt<bool> ForceAll( |
| 102 | "x86-cmov-converter-force-all" , |
| 103 | cl::desc("Convert all cmovs to branches." ), |
| 104 | cl::init(Val: false), cl::Hidden); |
| 105 | |
| 106 | namespace { |
| 107 | |
| 108 | /// Converts X86 cmov instructions into branches when profitable. |
| 109 | class X86CmovConverterPass : public MachineFunctionPass { |
| 110 | public: |
| 111 | X86CmovConverterPass() : MachineFunctionPass(ID) { } |
| 112 | |
| 113 | StringRef getPassName() const override { return "X86 cmov Conversion" ; } |
| 114 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 115 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 116 | |
| 117 | /// Pass identification, replacement for typeid. |
| 118 | static char ID; |
| 119 | |
| 120 | private: |
| 121 | MachineRegisterInfo *MRI = nullptr; |
| 122 | const TargetInstrInfo *TII = nullptr; |
| 123 | const TargetRegisterInfo *TRI = nullptr; |
| 124 | MachineLoopInfo *MLI = nullptr; |
| 125 | TargetSchedModel TSchedModel; |
| 126 | |
| 127 | /// List of consecutive CMOV instructions. |
| 128 | using CmovGroup = SmallVector<MachineInstr *, 2>; |
| 129 | using CmovGroups = SmallVector<CmovGroup, 2>; |
| 130 | |
| 131 | /// Collect all CMOV-group-candidates in \p CurrLoop and update \p |
| 132 | /// CmovInstGroups accordingly. |
| 133 | /// |
| 134 | /// \param Blocks List of blocks to process. |
| 135 | /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. |
| 136 | /// \returns true iff it found any CMOV-group-candidate. |
| 137 | bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks, |
| 138 | CmovGroups &CmovInstGroups, |
| 139 | bool IncludeLoads = false); |
| 140 | |
| 141 | /// Check if it is profitable to transform each CMOV-group-candidates into |
| 142 | /// branch. Remove all groups that are not profitable from \p CmovInstGroups. |
| 143 | /// |
| 144 | /// \param Blocks List of blocks to process. |
| 145 | /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop. |
| 146 | /// \returns true iff any CMOV-group-candidate remain. |
| 147 | bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks, |
| 148 | CmovGroups &CmovInstGroups); |
| 149 | |
| 150 | /// Convert the given list of consecutive CMOV instructions into a branch. |
| 151 | /// |
| 152 | /// \param Group Consecutive CMOV instructions to be converted into branch. |
| 153 | void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const; |
| 154 | }; |
| 155 | |
| 156 | } // end anonymous namespace |
| 157 | |
| 158 | char X86CmovConverterPass::ID = 0; |
| 159 | |
| 160 | void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const { |
| 161 | MachineFunctionPass::getAnalysisUsage(AU); |
| 162 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
| 163 | } |
| 164 | |
| 165 | bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { |
| 166 | if (skipFunction(F: MF.getFunction())) |
| 167 | return false; |
| 168 | if (!EnableCmovConverter) |
| 169 | return false; |
| 170 | |
| 171 | // If the SelectOptimize pass is enabled, cmovs have already been optimized. |
| 172 | if (!getCGPassBuilderOption().DisableSelectOptimize) |
| 173 | return false; |
| 174 | |
| 175 | LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() |
| 176 | << "**********\n" ); |
| 177 | |
| 178 | bool Changed = false; |
| 179 | MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
| 180 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
| 181 | MRI = &MF.getRegInfo(); |
| 182 | TII = STI.getInstrInfo(); |
| 183 | TRI = STI.getRegisterInfo(); |
| 184 | TSchedModel.init(TSInfo: &STI); |
| 185 | |
| 186 | // Before we handle the more subtle cases of register-register CMOVs inside |
| 187 | // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or |
| 188 | // the ones with a memory operand (ForceMemOperand option). The latter CMOV |
| 189 | // will risk a stall waiting for the load to complete that speculative |
| 190 | // execution behind a branch is better suited to handle on modern x86 chips. |
| 191 | if (ForceMemOperand || ForceAll) { |
| 192 | CmovGroups AllCmovGroups; |
| 193 | SmallVector<MachineBasicBlock *, 4> Blocks(llvm::make_pointer_range(Range&: MF)); |
| 194 | if (collectCmovCandidates(Blocks, CmovInstGroups&: AllCmovGroups, /*IncludeLoads*/ true)) { |
| 195 | for (auto &Group : AllCmovGroups) { |
| 196 | // Skip any group that doesn't do at least one memory operand cmov. |
| 197 | if (ForceMemOperand && !ForceAll && |
| 198 | llvm::none_of(Range&: Group, P: [&](MachineInstr *I) { return I->mayLoad(); })) |
| 199 | continue; |
| 200 | |
| 201 | // For CMOV groups which we can rewrite and which contain a memory load, |
| 202 | // always rewrite them. On x86, a CMOV will dramatically amplify any |
| 203 | // memory latency by blocking speculative execution. |
| 204 | Changed = true; |
| 205 | convertCmovInstsToBranches(Group); |
| 206 | } |
| 207 | } |
| 208 | // Early return as ForceAll converts all CmovGroups. |
| 209 | if (ForceAll) |
| 210 | return Changed; |
| 211 | } |
| 212 | |
| 213 | //===--------------------------------------------------------------------===// |
| 214 | // Register-operand Conversion Algorithm |
| 215 | // --------- |
| 216 | // For each innermost loop |
| 217 | // collectCmovCandidates() { |
| 218 | // Find all CMOV-group-candidates. |
| 219 | // } |
| 220 | // |
| 221 | // checkForProfitableCmovCandidates() { |
| 222 | // * Calculate both loop-depth and optimized-loop-depth. |
| 223 | // * Use these depth to check for loop transformation profitability. |
| 224 | // * Check for CMOV-group-candidate transformation profitability. |
| 225 | // } |
| 226 | // |
| 227 | // For each profitable CMOV-group-candidate |
| 228 | // convertCmovInstsToBranches() { |
| 229 | // * Create FalseBB, SinkBB, Conditional branch to SinkBB. |
| 230 | // * Replace each CMOV instruction with a PHI instruction in SinkBB. |
| 231 | // } |
| 232 | // |
| 233 | // Note: For more details, see each function description. |
| 234 | //===--------------------------------------------------------------------===// |
| 235 | |
| 236 | // Build up the loops in pre-order. |
| 237 | SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end()); |
| 238 | // Note that we need to check size on each iteration as we accumulate child |
| 239 | // loops. |
| 240 | for (int i = 0; i < (int)Loops.size(); ++i) |
| 241 | llvm::append_range(C&: Loops, R: Loops[i]->getSubLoops()); |
| 242 | |
| 243 | for (MachineLoop *CurrLoop : Loops) { |
| 244 | // Optimize only innermost loops. |
| 245 | if (!CurrLoop->getSubLoops().empty()) |
| 246 | continue; |
| 247 | |
| 248 | // List of consecutive CMOV instructions to be processed. |
| 249 | CmovGroups CmovInstGroups; |
| 250 | |
| 251 | if (!collectCmovCandidates(Blocks: CurrLoop->getBlocks(), CmovInstGroups)) |
| 252 | continue; |
| 253 | |
| 254 | if (!checkForProfitableCmovCandidates(Blocks: CurrLoop->getBlocks(), |
| 255 | CmovInstGroups)) |
| 256 | continue; |
| 257 | |
| 258 | Changed = true; |
| 259 | for (auto &Group : CmovInstGroups) |
| 260 | convertCmovInstsToBranches(Group); |
| 261 | } |
| 262 | |
| 263 | return Changed; |
| 264 | } |
| 265 | |
| 266 | bool X86CmovConverterPass::collectCmovCandidates( |
| 267 | ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups, |
| 268 | bool IncludeLoads) { |
| 269 | //===--------------------------------------------------------------------===// |
| 270 | // Collect all CMOV-group-candidates and add them into CmovInstGroups. |
| 271 | // |
| 272 | // CMOV-group: |
| 273 | // CMOV instructions, in same MBB, that uses same EFLAGS def instruction. |
| 274 | // |
| 275 | // CMOV-group-candidate: |
| 276 | // CMOV-group where all the CMOV instructions are |
| 277 | // 1. consecutive. |
| 278 | // 2. have same condition code or opposite one. |
| 279 | // 3. have only operand registers (X86::CMOVrr). |
| 280 | //===--------------------------------------------------------------------===// |
| 281 | // List of possible improvement (TODO's): |
| 282 | // -------------------------------------- |
| 283 | // TODO: Add support for X86::CMOVrm instructions. |
| 284 | // TODO: Add support for X86::SETcc instructions. |
| 285 | // TODO: Add support for CMOV-groups with non consecutive CMOV instructions. |
| 286 | //===--------------------------------------------------------------------===// |
| 287 | |
| 288 | // Current processed CMOV-Group. |
| 289 | CmovGroup Group; |
| 290 | for (auto *MBB : Blocks) { |
| 291 | Group.clear(); |
| 292 | // Condition code of first CMOV instruction current processed range and its |
| 293 | // opposite condition code. |
| 294 | X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID, |
| 295 | MemOpCC = X86::COND_INVALID; |
| 296 | // Indicator of a non CMOVrr instruction in the current processed range. |
| 297 | bool FoundNonCMOVInst = false; |
| 298 | // Indicator for current processed CMOV-group if it should be skipped. |
| 299 | bool SkipGroup = false; |
| 300 | |
| 301 | for (auto &I : *MBB) { |
| 302 | // Skip debug instructions. |
| 303 | if (I.isDebugInstr()) |
| 304 | continue; |
| 305 | |
| 306 | X86::CondCode CC = X86::getCondFromCMov(MI: I); |
| 307 | // Check if we found a X86::CMOVrr instruction. If it is marked as |
| 308 | // unpredictable, skip it and do not convert it to branch. |
| 309 | if (CC != X86::COND_INVALID && |
| 310 | !I.getFlag(Flag: MachineInstr::MIFlag::Unpredictable) && |
| 311 | (IncludeLoads || !I.mayLoad())) { |
| 312 | if (Group.empty()) { |
| 313 | // We found first CMOV in the range, reset flags. |
| 314 | FirstCC = CC; |
| 315 | FirstOppCC = X86::GetOppositeBranchCondition(CC); |
| 316 | // Clear out the prior group's memory operand CC. |
| 317 | MemOpCC = X86::COND_INVALID; |
| 318 | FoundNonCMOVInst = false; |
| 319 | SkipGroup = false; |
| 320 | } |
| 321 | Group.push_back(Elt: &I); |
| 322 | // Check if it is a non-consecutive CMOV instruction or it has different |
| 323 | // condition code than FirstCC or FirstOppCC. |
| 324 | if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC)) |
| 325 | // Mark the SKipGroup indicator to skip current processed CMOV-Group. |
| 326 | SkipGroup = true; |
| 327 | if (I.mayLoad()) { |
| 328 | if (MemOpCC == X86::COND_INVALID) |
| 329 | // The first memory operand CMOV. |
| 330 | MemOpCC = CC; |
| 331 | else if (CC != MemOpCC) |
| 332 | // Can't handle mixed conditions with memory operands. |
| 333 | SkipGroup = true; |
| 334 | } |
| 335 | // Check if we were relying on zero-extending behavior of the CMOV. |
| 336 | if (!SkipGroup && |
| 337 | llvm::any_of( |
| 338 | Range: MRI->use_nodbg_instructions(Reg: I.defs().begin()->getReg()), |
| 339 | P: [&](MachineInstr &UseI) { |
| 340 | return UseI.getOpcode() == X86::SUBREG_TO_REG; |
| 341 | })) |
| 342 | // FIXME: We should model the cost of using an explicit MOV to handle |
| 343 | // the zero-extension rather than just refusing to handle this. |
| 344 | SkipGroup = true; |
| 345 | continue; |
| 346 | } |
| 347 | // If Group is empty, keep looking for first CMOV in the range. |
| 348 | if (Group.empty()) |
| 349 | continue; |
| 350 | |
| 351 | // We found a non X86::CMOVrr instruction. |
| 352 | FoundNonCMOVInst = true; |
| 353 | // Check if this instruction define EFLAGS, to determine end of processed |
| 354 | // range, as there would be no more instructions using current EFLAGS def. |
| 355 | if (I.definesRegister(Reg: X86::EFLAGS, /*TRI=*/nullptr)) { |
| 356 | // Check if current processed CMOV-group should not be skipped and add |
| 357 | // it as a CMOV-group-candidate. |
| 358 | if (!SkipGroup) |
| 359 | CmovInstGroups.push_back(Elt: Group); |
| 360 | else |
| 361 | ++NumOfSkippedCmovGroups; |
| 362 | Group.clear(); |
| 363 | } |
| 364 | } |
| 365 | // End of basic block is considered end of range, check if current processed |
| 366 | // CMOV-group should not be skipped and add it as a CMOV-group-candidate. |
| 367 | if (Group.empty()) |
| 368 | continue; |
| 369 | if (!SkipGroup) |
| 370 | CmovInstGroups.push_back(Elt: Group); |
| 371 | else |
| 372 | ++NumOfSkippedCmovGroups; |
| 373 | } |
| 374 | |
| 375 | NumOfCmovGroupCandidate += CmovInstGroups.size(); |
| 376 | return !CmovInstGroups.empty(); |
| 377 | } |
| 378 | |
| 379 | /// \returns Depth of CMOV instruction as if it was converted into branch. |
| 380 | /// \param TrueOpDepth depth cost of CMOV true value operand. |
| 381 | /// \param FalseOpDepth depth cost of CMOV false value operand. |
| 382 | static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) { |
| 383 | // The depth of the result after branch conversion is |
| 384 | // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability. |
| 385 | // As we have no info about branch weight, we assume 75% for one and 25% for |
| 386 | // the other, and pick the result with the largest resulting depth. |
| 387 | return std::max( |
| 388 | a: divideCeil(Numerator: TrueOpDepth * 3 + FalseOpDepth, Denominator: 4), |
| 389 | b: divideCeil(Numerator: FalseOpDepth * 3 + TrueOpDepth, Denominator: 4)); |
| 390 | } |
| 391 | |
| 392 | bool X86CmovConverterPass::checkForProfitableCmovCandidates( |
| 393 | ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) { |
| 394 | struct DepthInfo { |
| 395 | /// Depth of original loop. |
| 396 | unsigned Depth; |
| 397 | /// Depth of optimized loop. |
| 398 | unsigned OptDepth; |
| 399 | }; |
| 400 | /// Number of loop iterations to calculate depth for ?! |
| 401 | static const unsigned LoopIterations = 2; |
| 402 | DenseMap<MachineInstr *, DepthInfo> DepthMap; |
| 403 | DepthInfo LoopDepth[LoopIterations] = {{.Depth: 0, .OptDepth: 0}, {.Depth: 0, .OptDepth: 0}}; |
| 404 | enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 }; |
| 405 | /// For each register type maps the register to its last def instruction. |
| 406 | DenseMap<Register, MachineInstr *> RegDefMaps[RegTypeNum]; |
| 407 | /// Maps register operand to its def instruction, which can be nullptr if it |
| 408 | /// is unknown (e.g., operand is defined outside the loop). |
| 409 | DenseMap<MachineOperand *, MachineInstr *> OperandToDefMap; |
| 410 | |
| 411 | // Set depth of unknown instruction (i.e., nullptr) to zero. |
| 412 | DepthMap[nullptr] = {.Depth: 0, .OptDepth: 0}; |
| 413 | |
| 414 | SmallPtrSet<MachineInstr *, 4> CmovInstructions; |
| 415 | for (auto &Group : CmovInstGroups) |
| 416 | CmovInstructions.insert_range(R&: Group); |
| 417 | |
| 418 | //===--------------------------------------------------------------------===// |
| 419 | // Step 1: Calculate instruction depth and loop depth. |
| 420 | // Optimized-Loop: |
| 421 | // loop with CMOV-group-candidates converted into branches. |
| 422 | // |
| 423 | // Instruction-Depth: |
| 424 | // instruction latency + max operand depth. |
| 425 | // * For CMOV instruction in optimized loop the depth is calculated as: |
| 426 | // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth) |
| 427 | // TODO: Find a better way to estimate the latency of the branch instruction |
| 428 | // rather than using the CMOV latency. |
| 429 | // |
| 430 | // Loop-Depth: |
| 431 | // max instruction depth of all instructions in the loop. |
| 432 | // Note: instruction with max depth represents the critical-path in the loop. |
| 433 | // |
| 434 | // Loop-Depth[i]: |
| 435 | // Loop-Depth calculated for first `i` iterations. |
| 436 | // Note: it is enough to calculate depth for up to two iterations. |
| 437 | // |
| 438 | // Depth-Diff[i]: |
| 439 | // Number of cycles saved in first 'i` iterations by optimizing the loop. |
| 440 | //===--------------------------------------------------------------------===// |
| 441 | for (DepthInfo &MaxDepth : LoopDepth) { |
| 442 | for (auto *MBB : Blocks) { |
| 443 | // Clear physical registers Def map. |
| 444 | RegDefMaps[PhyRegType].clear(); |
| 445 | for (MachineInstr &MI : *MBB) { |
| 446 | // Skip debug instructions. |
| 447 | if (MI.isDebugInstr()) |
| 448 | continue; |
| 449 | unsigned MIDepth = 0; |
| 450 | unsigned MIDepthOpt = 0; |
| 451 | bool IsCMOV = CmovInstructions.count(Ptr: &MI); |
| 452 | for (auto &MO : MI.uses()) { |
| 453 | // Checks for "isUse()" as "uses()" returns also implicit definitions. |
| 454 | if (!MO.isReg() || !MO.isUse()) |
| 455 | continue; |
| 456 | Register Reg = MO.getReg(); |
| 457 | auto &RDM = RegDefMaps[Reg.isVirtual()]; |
| 458 | if (MachineInstr *DefMI = RDM.lookup(Val: Reg)) { |
| 459 | OperandToDefMap[&MO] = DefMI; |
| 460 | DepthInfo Info = DepthMap.lookup(Val: DefMI); |
| 461 | MIDepth = std::max(a: MIDepth, b: Info.Depth); |
| 462 | if (!IsCMOV) |
| 463 | MIDepthOpt = std::max(a: MIDepthOpt, b: Info.OptDepth); |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | if (IsCMOV) |
| 468 | MIDepthOpt = getDepthOfOptCmov( |
| 469 | TrueOpDepth: DepthMap[OperandToDefMap.lookup(Val: &MI.getOperand(i: 1))].OptDepth, |
| 470 | FalseOpDepth: DepthMap[OperandToDefMap.lookup(Val: &MI.getOperand(i: 2))].OptDepth); |
| 471 | |
| 472 | // Iterates over all operands to handle implicit definitions as well. |
| 473 | for (auto &MO : MI.operands()) { |
| 474 | if (!MO.isReg() || !MO.isDef()) |
| 475 | continue; |
| 476 | Register Reg = MO.getReg(); |
| 477 | RegDefMaps[Reg.isVirtual()][Reg] = &MI; |
| 478 | } |
| 479 | |
| 480 | unsigned Latency = TSchedModel.computeInstrLatency(MI: &MI); |
| 481 | DepthMap[&MI] = {.Depth: MIDepth += Latency, .OptDepth: MIDepthOpt += Latency}; |
| 482 | MaxDepth.Depth = std::max(a: MaxDepth.Depth, b: MIDepth); |
| 483 | MaxDepth.OptDepth = std::max(a: MaxDepth.OptDepth, b: MIDepthOpt); |
| 484 | } |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth, |
| 489 | LoopDepth[1].Depth - LoopDepth[1].OptDepth}; |
| 490 | |
| 491 | //===--------------------------------------------------------------------===// |
| 492 | // Step 2: Check if Loop worth to be optimized. |
| 493 | // Worth-Optimize-Loop: |
| 494 | // case 1: Diff[1] == Diff[0] |
| 495 | // Critical-path is iteration independent - there is no dependency |
| 496 | // of critical-path instructions on critical-path instructions of |
| 497 | // previous iteration. |
| 498 | // Thus, it is enough to check gain percent of 1st iteration - |
| 499 | // To be conservative, the optimized loop need to have a depth of |
| 500 | // 12.5% cycles less than original loop, per iteration. |
| 501 | // |
| 502 | // case 2: Diff[1] > Diff[0] |
| 503 | // Critical-path is iteration dependent - there is dependency of |
| 504 | // critical-path instructions on critical-path instructions of |
| 505 | // previous iteration. |
| 506 | // Thus, check the gain percent of the 2nd iteration (similar to the |
| 507 | // previous case), but it is also required to check the gradient of |
| 508 | // the gain - the change in Depth-Diff compared to the change in |
| 509 | // Loop-Depth between 1st and 2nd iterations. |
| 510 | // To be conservative, the gradient need to be at least 50%. |
| 511 | // |
| 512 | // In addition, In order not to optimize loops with very small gain, the |
| 513 | // gain (in cycles) after 2nd iteration should not be less than a given |
| 514 | // threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply. |
| 515 | // |
| 516 | // If loop is not worth optimizing, remove all CMOV-group-candidates. |
| 517 | //===--------------------------------------------------------------------===// |
| 518 | if (Diff[1] < GainCycleThreshold) |
| 519 | return false; |
| 520 | |
| 521 | bool WorthOptLoop = false; |
| 522 | if (Diff[1] == Diff[0]) |
| 523 | WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth; |
| 524 | else if (Diff[1] > Diff[0]) |
| 525 | WorthOptLoop = |
| 526 | (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) && |
| 527 | (Diff[1] * 8 >= LoopDepth[1].Depth); |
| 528 | |
| 529 | if (!WorthOptLoop) |
| 530 | return false; |
| 531 | |
| 532 | ++NumOfLoopCandidate; |
| 533 | |
| 534 | //===--------------------------------------------------------------------===// |
| 535 | // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. |
| 536 | // Worth-Optimize-Group: |
| 537 | // Iff it is worth to optimize all CMOV instructions in the group. |
| 538 | // |
| 539 | // Worth-Optimize-CMOV: |
| 540 | // Predicted branch is faster than CMOV by the difference between depth of |
| 541 | // condition operand and depth of taken (predicted) value operand. |
| 542 | // To be conservative, the gain of such CMOV transformation should cover at |
| 543 | // at least 25% of branch-misprediction-penalty. |
| 544 | //===--------------------------------------------------------------------===// |
| 545 | unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; |
| 546 | CmovGroups TempGroups; |
| 547 | std::swap(LHS&: TempGroups, RHS&: CmovInstGroups); |
| 548 | for (auto &Group : TempGroups) { |
| 549 | bool WorthOpGroup = true; |
| 550 | for (auto *MI : Group) { |
| 551 | // Avoid CMOV instruction which value is used as a pointer to load from. |
| 552 | // This is another conservative check to avoid converting CMOV instruction |
| 553 | // used with tree-search like algorithm, where the branch is unpredicted. |
| 554 | auto UIs = MRI->use_instructions(Reg: MI->defs().begin()->getReg()); |
| 555 | if (hasSingleElement(C&: UIs)) { |
| 556 | unsigned Op = UIs.begin()->getOpcode(); |
| 557 | if (Op == X86::MOV64rm || Op == X86::MOV32rm) { |
| 558 | WorthOpGroup = false; |
| 559 | break; |
| 560 | } |
| 561 | } |
| 562 | |
| 563 | unsigned CondCost = |
| 564 | DepthMap[OperandToDefMap.lookup(Val: &MI->getOperand(i: 4))].Depth; |
| 565 | unsigned ValCost = getDepthOfOptCmov( |
| 566 | TrueOpDepth: DepthMap[OperandToDefMap.lookup(Val: &MI->getOperand(i: 1))].Depth, |
| 567 | FalseOpDepth: DepthMap[OperandToDefMap.lookup(Val: &MI->getOperand(i: 2))].Depth); |
| 568 | if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) { |
| 569 | WorthOpGroup = false; |
| 570 | break; |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | if (WorthOpGroup) |
| 575 | CmovInstGroups.push_back(Elt: Group); |
| 576 | } |
| 577 | |
| 578 | return !CmovInstGroups.empty(); |
| 579 | } |
| 580 | |
| 581 | static bool checkEFLAGSLive(MachineInstr *MI) { |
| 582 | if (MI->killsRegister(Reg: X86::EFLAGS, /*TRI=*/nullptr)) |
| 583 | return false; |
| 584 | |
| 585 | // The EFLAGS operand of MI might be missing a kill marker. |
| 586 | // Figure out whether EFLAGS operand should LIVE after MI instruction. |
| 587 | MachineBasicBlock *BB = MI->getParent(); |
| 588 | MachineBasicBlock::iterator ItrMI = MI; |
| 589 | |
| 590 | // Scan forward through BB for a use/def of EFLAGS. |
| 591 | for (auto I = std::next(x: ItrMI), E = BB->end(); I != E; ++I) { |
| 592 | if (I->readsRegister(Reg: X86::EFLAGS, /*TRI=*/nullptr)) |
| 593 | return true; |
| 594 | if (I->definesRegister(Reg: X86::EFLAGS, /*TRI=*/nullptr)) |
| 595 | return false; |
| 596 | } |
| 597 | |
| 598 | // We hit the end of the block, check whether EFLAGS is live into a successor. |
| 599 | for (MachineBasicBlock *Succ : BB->successors()) |
| 600 | if (Succ->isLiveIn(Reg: X86::EFLAGS)) |
| 601 | return true; |
| 602 | |
| 603 | return false; |
| 604 | } |
| 605 | |
| 606 | /// Given /p First CMOV instruction and /p Last CMOV instruction representing a |
| 607 | /// group of CMOV instructions, which may contain debug instructions in between, |
| 608 | /// move all debug instructions to after the last CMOV instruction, making the |
| 609 | /// CMOV group consecutive. |
| 610 | static void packCmovGroup(MachineInstr *First, MachineInstr *Last) { |
| 611 | assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID && |
| 612 | "Last instruction in a CMOV group must be a CMOV instruction" ); |
| 613 | |
| 614 | SmallVector<MachineInstr *, 2> DBGInstructions; |
| 615 | for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) { |
| 616 | if (I->isDebugInstr()) |
| 617 | DBGInstructions.push_back(Elt: &*I); |
| 618 | } |
| 619 | |
| 620 | // Splice the debug instruction after the cmov group. |
| 621 | MachineBasicBlock *MBB = First->getParent(); |
| 622 | for (auto *MI : DBGInstructions) |
| 623 | MBB->insertAfter(I: Last, MI: MI->removeFromParent()); |
| 624 | } |
| 625 | |
| 626 | void X86CmovConverterPass::convertCmovInstsToBranches( |
| 627 | SmallVectorImpl<MachineInstr *> &Group) const { |
| 628 | assert(!Group.empty() && "No CMOV instructions to convert" ); |
| 629 | ++NumOfOptimizedCmovGroups; |
| 630 | |
| 631 | // If the CMOV group is not packed, e.g., there are debug instructions between |
| 632 | // first CMOV and last CMOV, then pack the group and make the CMOV instruction |
| 633 | // consecutive by moving the debug instructions to after the last CMOV. |
| 634 | packCmovGroup(First: Group.front(), Last: Group.back()); |
| 635 | |
| 636 | // To convert a CMOVcc instruction, we actually have to insert the diamond |
| 637 | // control-flow pattern. The incoming instruction knows the destination vreg |
| 638 | // to set, the condition code register to branch on, the true/false values to |
| 639 | // select between, and a branch opcode to use. |
| 640 | |
| 641 | // Before |
| 642 | // ----- |
| 643 | // MBB: |
| 644 | // cond = cmp ... |
| 645 | // v1 = CMOVge t1, f1, cond |
| 646 | // v2 = CMOVlt t2, f2, cond |
| 647 | // v3 = CMOVge v1, f3, cond |
| 648 | // |
| 649 | // After |
| 650 | // ----- |
| 651 | // MBB: |
| 652 | // cond = cmp ... |
| 653 | // jge %SinkMBB |
| 654 | // |
| 655 | // FalseMBB: |
| 656 | // jmp %SinkMBB |
| 657 | // |
| 658 | // SinkMBB: |
| 659 | // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] |
| 660 | // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch |
| 661 | // ; true-value with false-value |
| 662 | // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use |
| 663 | // ; previous Phi instruction result |
| 664 | |
| 665 | MachineInstr &MI = *Group.front(); |
| 666 | MachineInstr *LastCMOV = Group.back(); |
| 667 | DebugLoc DL = MI.getDebugLoc(); |
| 668 | |
| 669 | X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI)); |
| 670 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
| 671 | // Potentially swap the condition codes so that any memory operand to a CMOV |
| 672 | // is in the *false* position instead of the *true* position. We can invert |
| 673 | // any non-memory operand CMOV instructions to cope with this and we ensure |
| 674 | // memory operand CMOVs are only included with a single condition code. |
| 675 | if (llvm::any_of(Range&: Group, P: [&](MachineInstr *I) { |
| 676 | return I->mayLoad() && X86::getCondFromCMov(MI: *I) == CC; |
| 677 | })) |
| 678 | std::swap(a&: CC, b&: OppCC); |
| 679 | |
| 680 | MachineBasicBlock *MBB = MI.getParent(); |
| 681 | MachineFunction::iterator It = ++MBB->getIterator(); |
| 682 | MachineFunction *F = MBB->getParent(); |
| 683 | const BasicBlock *BB = MBB->getBasicBlock(); |
| 684 | |
| 685 | MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB); |
| 686 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB); |
| 687 | F->insert(MBBI: It, MBB: FalseMBB); |
| 688 | F->insert(MBBI: It, MBB: SinkMBB); |
| 689 | |
| 690 | // If the EFLAGS register isn't dead in the terminator, then claim that it's |
| 691 | // live into the sink and copy blocks. |
| 692 | if (checkEFLAGSLive(MI: LastCMOV)) { |
| 693 | FalseMBB->addLiveIn(PhysReg: X86::EFLAGS); |
| 694 | SinkMBB->addLiveIn(PhysReg: X86::EFLAGS); |
| 695 | } |
| 696 | |
| 697 | // Transfer the remainder of BB and its successor edges to SinkMBB. |
| 698 | SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB, |
| 699 | From: std::next(x: MachineBasicBlock::iterator(LastCMOV)), To: MBB->end()); |
| 700 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
| 701 | |
| 702 | // Add the false and sink blocks as its successors. |
| 703 | MBB->addSuccessor(Succ: FalseMBB); |
| 704 | MBB->addSuccessor(Succ: SinkMBB); |
| 705 | |
| 706 | // Create the conditional branch instruction. |
| 707 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: X86::JCC_1)).addMBB(MBB: SinkMBB).addImm(Val: CC); |
| 708 | |
| 709 | // Add the sink block to the false block successors. |
| 710 | FalseMBB->addSuccessor(Succ: SinkMBB); |
| 711 | |
| 712 | MachineInstrBuilder MIB; |
| 713 | MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); |
| 714 | MachineBasicBlock::iterator MIItEnd = |
| 715 | std::next(x: MachineBasicBlock::iterator(LastCMOV)); |
| 716 | MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin(); |
| 717 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); |
| 718 | |
| 719 | // First we need to insert an explicit load on the false path for any memory |
| 720 | // operand. We also need to potentially do register rewriting here, but it is |
| 721 | // simpler as the memory operands are always on the false path so we can |
| 722 | // simply take that input, whatever it is. |
| 723 | DenseMap<Register, Register> FalseBBRegRewriteTable; |
| 724 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) { |
| 725 | auto &MI = *MIIt++; |
| 726 | // Skip any CMOVs in this group which don't load from memory. |
| 727 | if (!MI.mayLoad()) { |
| 728 | // Remember the false-side register input. |
| 729 | Register FalseReg = |
| 730 | MI.getOperand(i: X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); |
| 731 | // Walk back through any intermediate cmovs referenced. |
| 732 | while (true) { |
| 733 | auto FRIt = FalseBBRegRewriteTable.find(Val: FalseReg); |
| 734 | if (FRIt == FalseBBRegRewriteTable.end()) |
| 735 | break; |
| 736 | FalseReg = FRIt->second; |
| 737 | } |
| 738 | FalseBBRegRewriteTable[MI.getOperand(i: 0).getReg()] = FalseReg; |
| 739 | continue; |
| 740 | } |
| 741 | |
| 742 | // The condition must be the *opposite* of the one we've decided to branch |
| 743 | // on as the branch will go *around* the load and the load should happen |
| 744 | // when the CMOV condition is false. |
| 745 | assert(X86::getCondFromCMov(MI) == OppCC && |
| 746 | "Can only handle memory-operand cmov instructions with a condition " |
| 747 | "opposite to the selected branch direction." ); |
| 748 | |
| 749 | // The goal is to rewrite the cmov from: |
| 750 | // |
| 751 | // MBB: |
| 752 | // %A = CMOVcc %B (tied), (mem) |
| 753 | // |
| 754 | // to |
| 755 | // |
| 756 | // MBB: |
| 757 | // %A = CMOVcc %B (tied), %C |
| 758 | // FalseMBB: |
| 759 | // %C = MOV (mem) |
| 760 | // |
| 761 | // Which will allow the next loop to rewrite the CMOV in terms of a PHI: |
| 762 | // |
| 763 | // MBB: |
| 764 | // JMP!cc SinkMBB |
| 765 | // FalseMBB: |
| 766 | // %C = MOV (mem) |
| 767 | // SinkMBB: |
| 768 | // %A = PHI [ %C, FalseMBB ], [ %B, MBB] |
| 769 | |
| 770 | // Get a fresh register to use as the destination of the MOV. |
| 771 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: MI.getOperand(i: 0).getReg()); |
| 772 | Register TmpReg = MRI->createVirtualRegister(RegClass: RC); |
| 773 | |
| 774 | // Retain debug instr number when unfolded. |
| 775 | unsigned OldDebugInstrNum = MI.peekDebugInstrNum(); |
| 776 | SmallVector<MachineInstr *, 4> NewMIs; |
| 777 | bool Unfolded = TII->unfoldMemoryOperand(MF&: *MBB->getParent(), MI, Reg: TmpReg, |
| 778 | /*UnfoldLoad*/ true, |
| 779 | /*UnfoldStore*/ false, NewMIs); |
| 780 | (void)Unfolded; |
| 781 | assert(Unfolded && "Should never fail to unfold a loading cmov!" ); |
| 782 | |
| 783 | // Move the new CMOV to just before the old one and reset any impacted |
| 784 | // iterator. |
| 785 | auto *NewCMOV = NewMIs.pop_back_val(); |
| 786 | assert(X86::getCondFromCMov(*NewCMOV) == OppCC && |
| 787 | "Last new instruction isn't the expected CMOV!" ); |
| 788 | LLVM_DEBUG(dbgs() << "\tRewritten cmov: " ; NewCMOV->dump()); |
| 789 | MBB->insert(I: MachineBasicBlock::iterator(MI), MI: NewCMOV); |
| 790 | if (&*MIItBegin == &MI) |
| 791 | MIItBegin = MachineBasicBlock::iterator(NewCMOV); |
| 792 | |
| 793 | if (OldDebugInstrNum) |
| 794 | NewCMOV->setDebugInstrNum(OldDebugInstrNum); |
| 795 | |
| 796 | // Sink whatever instructions were needed to produce the unfolded operand |
| 797 | // into the false block. |
| 798 | for (auto *NewMI : NewMIs) { |
| 799 | LLVM_DEBUG(dbgs() << "\tRewritten load instr: " ; NewMI->dump()); |
| 800 | FalseMBB->insert(I: FalseInsertionPoint, MI: NewMI); |
| 801 | // Re-map any operands that are from other cmovs to the inputs for this block. |
| 802 | for (auto &MOp : NewMI->uses()) { |
| 803 | if (!MOp.isReg()) |
| 804 | continue; |
| 805 | auto It = FalseBBRegRewriteTable.find(Val: MOp.getReg()); |
| 806 | if (It == FalseBBRegRewriteTable.end()) |
| 807 | continue; |
| 808 | |
| 809 | MOp.setReg(It->second); |
| 810 | // This might have been a kill when it referenced the cmov result, but |
| 811 | // it won't necessarily be once rewritten. |
| 812 | // FIXME: We could potentially improve this by tracking whether the |
| 813 | // operand to the cmov was also a kill, and then skipping the PHI node |
| 814 | // construction below. |
| 815 | MOp.setIsKill(false); |
| 816 | } |
| 817 | } |
| 818 | MBB->erase(I: &MI); |
| 819 | |
| 820 | // Add this PHI to the rewrite table. |
| 821 | FalseBBRegRewriteTable[NewCMOV->getOperand(i: 0).getReg()] = TmpReg; |
| 822 | } |
| 823 | |
| 824 | // As we are creating the PHIs, we have to be careful if there is more than |
| 825 | // one. Later CMOVs may reference the results of earlier CMOVs, but later |
| 826 | // PHIs have to reference the individual true/false inputs from earlier PHIs. |
| 827 | // That also means that PHI construction must work forward from earlier to |
| 828 | // later, and that the code must maintain a mapping from earlier PHI's |
| 829 | // destination registers, and the registers that went into the PHI. |
| 830 | DenseMap<Register, std::pair<Register, Register>> RegRewriteTable; |
| 831 | |
| 832 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { |
| 833 | Register DestReg = MIIt->getOperand(i: 0).getReg(); |
| 834 | Register Op1Reg = MIIt->getOperand(i: 1).getReg(); |
| 835 | Register Op2Reg = MIIt->getOperand(i: 2).getReg(); |
| 836 | |
| 837 | // If this CMOV we are processing is the opposite condition from the jump we |
| 838 | // generated, then we have to swap the operands for the PHI that is going to |
| 839 | // be generated. |
| 840 | if (X86::getCondFromCMov(MI: *MIIt) == OppCC) |
| 841 | std::swap(a&: Op1Reg, b&: Op2Reg); |
| 842 | |
| 843 | auto Op1Itr = RegRewriteTable.find(Val: Op1Reg); |
| 844 | if (Op1Itr != RegRewriteTable.end()) |
| 845 | Op1Reg = Op1Itr->second.first; |
| 846 | |
| 847 | auto Op2Itr = RegRewriteTable.find(Val: Op2Reg); |
| 848 | if (Op2Itr != RegRewriteTable.end()) |
| 849 | Op2Reg = Op2Itr->second.second; |
| 850 | |
| 851 | // SinkMBB: |
| 852 | // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ] |
| 853 | // ... |
| 854 | MIB = BuildMI(BB&: *SinkMBB, I: SinkInsertionPoint, MIMD: DL, MCID: TII->get(Opcode: X86::PHI), DestReg) |
| 855 | .addReg(RegNo: Op1Reg) |
| 856 | .addMBB(MBB: FalseMBB) |
| 857 | .addReg(RegNo: Op2Reg) |
| 858 | .addMBB(MBB); |
| 859 | (void)MIB; |
| 860 | LLVM_DEBUG(dbgs() << "\tFrom: " ; MIIt->dump()); |
| 861 | LLVM_DEBUG(dbgs() << "\tTo: " ; MIB->dump()); |
| 862 | |
| 863 | // debug-info: we can just copy the instr-ref number from one instruction |
| 864 | // to the other, seeing how it's a one-for-one substitution. |
| 865 | if (unsigned InstrNum = MIIt->peekDebugInstrNum()) |
| 866 | MIB->setDebugInstrNum(InstrNum); |
| 867 | |
| 868 | // Add this PHI to the rewrite table. |
| 869 | RegRewriteTable[DestReg] = std::make_pair(x&: Op1Reg, y&: Op2Reg); |
| 870 | } |
| 871 | |
| 872 | // Reset the NoPHIs property if a PHI was inserted to prevent a conflict with |
| 873 | // the MachineVerifier during testing. |
| 874 | if (MIItBegin != MIItEnd) |
| 875 | F->getProperties().resetNoPHIs(); |
| 876 | |
| 877 | // Now remove the CMOV(s). |
| 878 | MBB->erase(I: MIItBegin, E: MIItEnd); |
| 879 | |
| 880 | // Add new basic blocks to MachineLoopInfo. |
| 881 | if (MachineLoop *L = MLI->getLoopFor(BB: MBB)) { |
| 882 | L->addBasicBlockToLoop(NewBB: FalseMBB, LI&: *MLI); |
| 883 | L->addBasicBlockToLoop(NewBB: SinkMBB, LI&: *MLI); |
| 884 | } |
| 885 | } |
| 886 | |
| 887 | INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion" , |
| 888 | false, false) |
| 889 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) |
| 890 | INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion" , |
| 891 | false, false) |
| 892 | |
| 893 | FunctionPass *llvm::createX86CmovConverterPass() { |
| 894 | return new X86CmovConverterPass(); |
| 895 | } |
| 896 | |