| 1 | //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass: |
| 10 | // (1) tries to remove compares if CC already contains the required information |
| 11 | // (2) fuses compares and branches into COMPARE AND BRANCH instructions |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "SystemZ.h" |
| 16 | #include "SystemZInstrInfo.h" |
| 17 | #include "SystemZTargetMachine.h" |
| 18 | #include "llvm/ADT/SmallVector.h" |
| 19 | #include "llvm/ADT/Statistic.h" |
| 20 | #include "llvm/ADT/StringRef.h" |
| 21 | #include "llvm/CodeGen/LiveRegUnits.h" |
| 22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 23 | #include "llvm/CodeGen/MachineFunction.h" |
| 24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 25 | #include "llvm/CodeGen/MachineInstr.h" |
| 26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 27 | #include "llvm/CodeGen/MachineOperand.h" |
| 28 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 30 | #include "llvm/MC/MCInstrDesc.h" |
| 31 | #include <cassert> |
| 32 | #include <cstdint> |
| 33 | |
| 34 | using namespace llvm; |
| 35 | |
| 36 | #define DEBUG_TYPE "systemz-elim-compare" |
| 37 | |
| 38 | STATISTIC(BranchOnCounts, "Number of branch-on-count instructions" ); |
| 39 | STATISTIC(LoadAndTraps, "Number of load-and-trap instructions" ); |
| 40 | STATISTIC(EliminatedComparisons, "Number of eliminated comparisons" ); |
| 41 | STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions" ); |
| 42 | |
| 43 | namespace { |
| 44 | |
| 45 | // Represents the references to a particular register in one or more |
| 46 | // instructions. |
| 47 | struct Reference { |
| 48 | Reference() = default; |
| 49 | |
| 50 | Reference &operator|=(const Reference &Other) { |
| 51 | Def |= Other.Def; |
| 52 | Use |= Other.Use; |
| 53 | return *this; |
| 54 | } |
| 55 | |
| 56 | explicit operator bool() const { return Def || Use; } |
| 57 | |
| 58 | // True if the register is defined or used in some form, either directly or |
| 59 | // via a sub- or super-register. |
| 60 | bool Def = false; |
| 61 | bool Use = false; |
| 62 | }; |
| 63 | |
| 64 | class SystemZElimCompare : public MachineFunctionPass { |
| 65 | public: |
| 66 | static char ID; |
| 67 | |
| 68 | SystemZElimCompare() : MachineFunctionPass(ID) {} |
| 69 | |
| 70 | bool processBlock(MachineBasicBlock &MBB); |
| 71 | bool runOnMachineFunction(MachineFunction &F) override; |
| 72 | |
| 73 | MachineFunctionProperties getRequiredProperties() const override { |
| 74 | return MachineFunctionProperties().setNoVRegs(); |
| 75 | } |
| 76 | |
| 77 | private: |
| 78 | Reference getRegReferences(MachineInstr &MI, unsigned Reg); |
| 79 | bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, |
| 80 | SmallVectorImpl<MachineInstr *> &CCUsers); |
| 81 | bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, |
| 82 | SmallVectorImpl<MachineInstr *> &CCUsers); |
| 83 | bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, |
| 84 | SmallVectorImpl<MachineInstr *> &CCUsers); |
| 85 | bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, |
| 86 | SmallVectorImpl<MachineInstr *> &CCUsers); |
| 87 | bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, |
| 88 | SmallVectorImpl<MachineInstr *> &CCUsers, |
| 89 | unsigned ConvOpc = 0); |
| 90 | bool optimizeCompareZero(MachineInstr &Compare, |
| 91 | SmallVectorImpl<MachineInstr *> &CCUsers); |
| 92 | bool fuseCompareOperations(MachineInstr &Compare, |
| 93 | SmallVectorImpl<MachineInstr *> &CCUsers); |
| 94 | |
| 95 | const SystemZInstrInfo *TII = nullptr; |
| 96 | const TargetRegisterInfo *TRI = nullptr; |
| 97 | }; |
| 98 | |
| 99 | char SystemZElimCompare::ID = 0; |
| 100 | |
| 101 | } // end anonymous namespace |
| 102 | |
| 103 | INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE, |
| 104 | "SystemZ Comparison Elimination" , false, false) |
| 105 | |
| 106 | // Returns true if MI is an instruction whose output equals the value in Reg. |
| 107 | static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { |
| 108 | switch (MI.getOpcode()) { |
| 109 | case SystemZ::LR: |
| 110 | case SystemZ::LGR: |
| 111 | case SystemZ::LGFR: |
| 112 | case SystemZ::LTR: |
| 113 | case SystemZ::LTGR: |
| 114 | case SystemZ::LTGFR: |
| 115 | if (MI.getOperand(i: 1).getReg() == Reg) |
| 116 | return true; |
| 117 | } |
| 118 | |
| 119 | return false; |
| 120 | } |
| 121 | |
| 122 | // Return true if any CC result of MI would (perhaps after conversion) |
| 123 | // reflect the value of Reg. |
| 124 | static bool resultTests(MachineInstr &MI, unsigned Reg) { |
| 125 | if (MI.getNumOperands() > 0 && MI.getOperand(i: 0).isReg() && |
| 126 | MI.getOperand(i: 0).isDef() && MI.getOperand(i: 0).getReg() == Reg) |
| 127 | return true; |
| 128 | |
| 129 | return (preservesValueOf(MI, Reg)); |
| 130 | } |
| 131 | |
| 132 | // Describe the references to Reg or any of its aliases in MI. |
| 133 | Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { |
| 134 | Reference Ref; |
| 135 | if (MI.isDebugInstr()) |
| 136 | return Ref; |
| 137 | |
| 138 | for (const MachineOperand &MO : MI.operands()) { |
| 139 | if (MO.isReg()) { |
| 140 | if (Register MOReg = MO.getReg()) { |
| 141 | if (TRI->regsOverlap(RegA: MOReg, RegB: Reg)) { |
| 142 | if (MO.isUse()) |
| 143 | Ref.Use = true; |
| 144 | else if (MO.isDef()) |
| 145 | Ref.Def = true; |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | return Ref; |
| 151 | } |
| 152 | |
| 153 | // Return true if this is a load and test which can be optimized the |
| 154 | // same way as compare instruction. |
| 155 | static bool isLoadAndTestAsCmp(MachineInstr &MI) { |
| 156 | // If we during isel used a load-and-test as a compare with 0, the |
| 157 | // def operand is dead. |
| 158 | return (MI.getOpcode() == SystemZ::LTEBR || |
| 159 | MI.getOpcode() == SystemZ::LTDBR || |
| 160 | MI.getOpcode() == SystemZ::LTXBR) && |
| 161 | MI.getOperand(i: 0).isDead(); |
| 162 | } |
| 163 | |
| 164 | // Return the source register of Compare, which is the unknown value |
| 165 | // being tested. |
| 166 | static unsigned getCompareSourceReg(MachineInstr &Compare) { |
| 167 | unsigned reg = 0; |
| 168 | if (Compare.isCompare()) |
| 169 | reg = Compare.getOperand(i: 0).getReg(); |
| 170 | else if (isLoadAndTestAsCmp(MI&: Compare)) |
| 171 | reg = Compare.getOperand(i: 1).getReg(); |
| 172 | assert(reg); |
| 173 | |
| 174 | return reg; |
| 175 | } |
| 176 | |
| 177 | // Compare compares the result of MI against zero. If MI is an addition |
| 178 | // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition |
| 179 | // and convert the branch to a BRCT(G) or BRCTH. Return true on success. |
| 180 | bool SystemZElimCompare::convertToBRCT( |
| 181 | MachineInstr &MI, MachineInstr &Compare, |
| 182 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
| 183 | // Check whether we have an addition of -1. |
| 184 | unsigned Opcode = MI.getOpcode(); |
| 185 | unsigned BRCT; |
| 186 | if (Opcode == SystemZ::AHI) |
| 187 | BRCT = SystemZ::BRCT; |
| 188 | else if (Opcode == SystemZ::AGHI) |
| 189 | BRCT = SystemZ::BRCTG; |
| 190 | else if (Opcode == SystemZ::AIH) |
| 191 | BRCT = SystemZ::BRCTH; |
| 192 | else |
| 193 | return false; |
| 194 | if (MI.getOperand(i: 2).getImm() != -1) |
| 195 | return false; |
| 196 | |
| 197 | // Check whether we have a single JLH. |
| 198 | if (CCUsers.size() != 1) |
| 199 | return false; |
| 200 | MachineInstr *Branch = CCUsers[0]; |
| 201 | if (Branch->getOpcode() != SystemZ::BRC || |
| 202 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
| 203 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_NE) |
| 204 | return false; |
| 205 | |
| 206 | // We already know that there are no references to the register between |
| 207 | // MI and Compare. Make sure that there are also no references between |
| 208 | // Compare and Branch. |
| 209 | unsigned SrcReg = getCompareSourceReg(Compare); |
| 210 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
| 211 | for (++MBBI; MBBI != MBBE; ++MBBI) |
| 212 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
| 213 | return false; |
| 214 | |
| 215 | // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. |
| 216 | MachineOperand Target(Branch->getOperand(i: 2)); |
| 217 | while (Branch->getNumOperands()) |
| 218 | Branch->removeOperand(OpNo: 0); |
| 219 | Branch->setDesc(TII->get(Opcode: BRCT)); |
| 220 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
| 221 | MIB.add(MO: MI.getOperand(i: 0)).add(MO: MI.getOperand(i: 1)).add(MO: Target); |
| 222 | // Add a CC def to BRCT(G), since we may have to split them again if the |
| 223 | // branch displacement overflows. BRCTH has a 32-bit displacement, so |
| 224 | // this is not necessary there. |
| 225 | if (BRCT != SystemZ::BRCTH) |
| 226 | MIB.addReg(RegNo: SystemZ::CC, flags: RegState::ImplicitDefine | RegState::Dead); |
| 227 | // The debug instr tracking for the counter now used by BRCT needs to be |
| 228 | // updated. |
| 229 | MI.getParent()->getParent()->substituteDebugValuesForInst(Old: MI, New&: *MIB); |
| 230 | MI.eraseFromParent(); |
| 231 | return true; |
| 232 | } |
| 233 | |
| 234 | // Compare compares the result of MI against zero. If MI is a suitable load |
| 235 | // instruction and if CCUsers is a single conditional trap on zero, eliminate |
| 236 | // the load and convert the branch to a load-and-trap. Return true on success. |
| 237 | bool SystemZElimCompare::convertToLoadAndTrap( |
| 238 | MachineInstr &MI, MachineInstr &Compare, |
| 239 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
| 240 | unsigned LATOpcode = TII->getLoadAndTrap(Opcode: MI.getOpcode()); |
| 241 | if (!LATOpcode) |
| 242 | return false; |
| 243 | |
| 244 | // Check whether we have a single CondTrap that traps on zero. |
| 245 | if (CCUsers.size() != 1) |
| 246 | return false; |
| 247 | MachineInstr *Branch = CCUsers[0]; |
| 248 | if (Branch->getOpcode() != SystemZ::CondTrap || |
| 249 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
| 250 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_EQ) |
| 251 | return false; |
| 252 | |
| 253 | // We already know that there are no references to the register between |
| 254 | // MI and Compare. Make sure that there are also no references between |
| 255 | // Compare and Branch. |
| 256 | unsigned SrcReg = getCompareSourceReg(Compare); |
| 257 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
| 258 | for (++MBBI; MBBI != MBBE; ++MBBI) |
| 259 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
| 260 | return false; |
| 261 | |
| 262 | // The transformation is OK. Rebuild Branch as a load-and-trap. |
| 263 | while (Branch->getNumOperands()) |
| 264 | Branch->removeOperand(OpNo: 0); |
| 265 | Branch->setDesc(TII->get(Opcode: LATOpcode)); |
| 266 | MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) |
| 267 | .add(MO: MI.getOperand(i: 0)) |
| 268 | .add(MO: MI.getOperand(i: 1)) |
| 269 | .add(MO: MI.getOperand(i: 2)) |
| 270 | .add(MO: MI.getOperand(i: 3)); |
| 271 | // The debug instr tracking for the load target now used by the load-and-trap |
| 272 | // needs to be updated. |
| 273 | MI.getParent()->getParent()->substituteDebugValuesForInst(Old: MI, New&: *Branch); |
| 274 | MI.eraseFromParent(); |
| 275 | return true; |
| 276 | } |
| 277 | |
| 278 | // If MI is a load instruction, try to convert it into a LOAD AND TEST. |
| 279 | // Return true on success. |
| 280 | bool SystemZElimCompare::convertToLoadAndTest( |
| 281 | MachineInstr &MI, MachineInstr &Compare, |
| 282 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
| 283 | |
| 284 | // Try to adjust CC masks for the LOAD AND TEST opcode that could replace MI. |
| 285 | unsigned Opcode = TII->getLoadAndTest(Opcode: MI.getOpcode()); |
| 286 | if (!Opcode || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc: Opcode)) |
| 287 | return false; |
| 288 | |
| 289 | // Rebuild to get the CC operand in the right place. |
| 290 | auto MIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode)); |
| 291 | for (const auto &MO : MI.operands()) |
| 292 | MIB.add(MO); |
| 293 | MIB.setMemRefs(MI.memoperands()); |
| 294 | // The debug instr tracking for the load target now needs to be updated |
| 295 | // because the load has moved to a new instruction |
| 296 | MI.getParent()->getParent()->substituteDebugValuesForInst(Old: MI, New&: *MIB); |
| 297 | MI.eraseFromParent(); |
| 298 | |
| 299 | // Mark instruction as not raising an FP exception if applicable. We already |
| 300 | // verified earlier that this move is valid. |
| 301 | if (!Compare.mayRaiseFPException()) |
| 302 | MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept); |
| 303 | |
| 304 | return true; |
| 305 | } |
| 306 | |
| 307 | // See if MI is an instruction with an equivalent "logical" opcode that can |
| 308 | // be used and replace MI. This is useful for EQ/NE comparisons where the |
| 309 | // "nsw" flag is missing since the "logical" opcode always sets CC to reflect |
| 310 | // the result being zero or non-zero. |
| 311 | bool SystemZElimCompare::convertToLogical( |
| 312 | MachineInstr &MI, MachineInstr &Compare, |
| 313 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
| 314 | |
| 315 | unsigned ConvOpc = 0; |
| 316 | switch (MI.getOpcode()) { |
| 317 | case SystemZ::AR: ConvOpc = SystemZ::ALR; break; |
| 318 | case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; |
| 319 | case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; |
| 320 | case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; |
| 321 | case SystemZ::A: ConvOpc = SystemZ::AL; break; |
| 322 | case SystemZ::AY: ConvOpc = SystemZ::ALY; break; |
| 323 | case SystemZ::AG: ConvOpc = SystemZ::ALG; break; |
| 324 | default: break; |
| 325 | } |
| 326 | if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) |
| 327 | return false; |
| 328 | |
| 329 | // Operands should be identical, so just change the opcode and remove the |
| 330 | // dead flag on CC. |
| 331 | MI.setDesc(TII->get(Opcode: ConvOpc)); |
| 332 | MI.clearRegisterDeads(Reg: SystemZ::CC); |
| 333 | return true; |
| 334 | } |
| 335 | |
| 336 | #ifndef NDEBUG |
| 337 | static bool isAddWithImmediate(unsigned Opcode) { |
| 338 | switch(Opcode) { |
| 339 | case SystemZ::AHI: |
| 340 | case SystemZ::AHIK: |
| 341 | case SystemZ::AGHI: |
| 342 | case SystemZ::AGHIK: |
| 343 | case SystemZ::AFI: |
| 344 | case SystemZ::AIH: |
| 345 | case SystemZ::AGFI: |
| 346 | return true; |
| 347 | default: break; |
| 348 | } |
| 349 | return false; |
| 350 | } |
| 351 | #endif |
| 352 | |
| 353 | // The CC users in CCUsers are testing the result of a comparison of some |
| 354 | // value X against zero and we know that any CC value produced by MI would |
| 355 | // also reflect the value of X. ConvOpc may be used to pass the transfomed |
| 356 | // opcode MI will have if this succeeds. Try to adjust CCUsers so that they |
| 357 | // test the result of MI directly, returning true on success. Leave |
| 358 | // everything unchanged on failure. |
| 359 | bool SystemZElimCompare::adjustCCMasksForInstr( |
| 360 | MachineInstr &MI, MachineInstr &Compare, |
| 361 | SmallVectorImpl<MachineInstr *> &CCUsers, |
| 362 | unsigned ConvOpc) { |
| 363 | unsigned CompareFlags = Compare.getDesc().TSFlags; |
| 364 | unsigned CompareCCValues = SystemZII::getCCValues(Flags: CompareFlags); |
| 365 | int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); |
| 366 | const MCInstrDesc &Desc = TII->get(Opcode); |
| 367 | unsigned MIFlags = Desc.TSFlags; |
| 368 | |
| 369 | // If Compare may raise an FP exception, we can only eliminate it |
| 370 | // if MI itself would have already raised the exception. |
| 371 | if (Compare.mayRaiseFPException()) { |
| 372 | // If the caller will change MI to use ConvOpc, only test whether |
| 373 | // ConvOpc is suitable; it is on the caller to set the MI flag. |
| 374 | if (ConvOpc && !Desc.mayRaiseFPException()) |
| 375 | return false; |
| 376 | // If the caller will not change MI, we test the MI flag here. |
| 377 | if (!ConvOpc && !MI.mayRaiseFPException()) |
| 378 | return false; |
| 379 | } |
| 380 | |
| 381 | // See which compare-style condition codes are available. |
| 382 | unsigned CCValues = SystemZII::getCCValues(Flags: MIFlags); |
| 383 | unsigned ReusableCCMask = CCValues; |
| 384 | // For unsigned comparisons with zero, only equality makes sense. |
| 385 | if (CompareFlags & SystemZII::IsLogical) |
| 386 | ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; |
| 387 | unsigned OFImplies = 0; |
| 388 | bool LogicalMI = false; |
| 389 | bool MIEquivalentToCmp = false; |
| 390 | if (MI.getFlag(Flag: MachineInstr::NoSWrap) && |
| 391 | (MIFlags & SystemZII::CCIfNoSignedWrap)) { |
| 392 | // If MI has the NSW flag set in combination with the |
| 393 | // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. |
| 394 | } |
| 395 | else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && |
| 396 | MI.getOperand(i: 2).isImm()) { |
| 397 | // Signed addition of immediate. If adding a positive immediate |
| 398 | // overflows, the result must be less than zero. If adding a negative |
| 399 | // immediate overflows, the result must be larger than zero (except in |
| 400 | // the special case of adding the minimum value of the result range, in |
| 401 | // which case we cannot predict whether the result is larger than or |
| 402 | // equal to zero). |
| 403 | assert(isAddWithImmediate(Opcode) && "Expected an add with immediate." ); |
| 404 | assert(!MI.mayLoadOrStore() && "Expected an immediate term." ); |
| 405 | int64_t RHS = MI.getOperand(i: 2).getImm(); |
| 406 | if (SystemZ::GRX32BitRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) && |
| 407 | RHS == INT32_MIN) |
| 408 | return false; |
| 409 | OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); |
| 410 | } |
| 411 | else if ((MIFlags & SystemZII::IsLogical) && CCValues) { |
| 412 | // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be |
| 413 | // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. |
| 414 | LogicalMI = true; |
| 415 | ReusableCCMask = SystemZ::CCMASK_CMP_EQ; |
| 416 | } |
| 417 | else { |
| 418 | ReusableCCMask &= SystemZII::getCompareZeroCCMask(Flags: MIFlags); |
| 419 | assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues" ); |
| 420 | MIEquivalentToCmp = |
| 421 | ReusableCCMask == CCValues && CCValues == CompareCCValues; |
| 422 | } |
| 423 | if (ReusableCCMask == 0) |
| 424 | return false; |
| 425 | |
| 426 | if (!MIEquivalentToCmp) { |
| 427 | // Now check whether these flags are enough for all users. |
| 428 | SmallVector<MachineOperand *, 4> AlterMasks; |
| 429 | for (MachineInstr *CCUserMI : CCUsers) { |
| 430 | // Fail if this isn't a use of CC that we understand. |
| 431 | unsigned Flags = CCUserMI->getDesc().TSFlags; |
| 432 | unsigned FirstOpNum; |
| 433 | if (Flags & SystemZII::CCMaskFirst) |
| 434 | FirstOpNum = 0; |
| 435 | else if (Flags & SystemZII::CCMaskLast) |
| 436 | FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; |
| 437 | else |
| 438 | return false; |
| 439 | |
| 440 | // Check whether the instruction predicate treats all CC values |
| 441 | // outside of ReusableCCMask in the same way. In that case it |
| 442 | // doesn't matter what those CC values mean. |
| 443 | unsigned CCValid = CCUserMI->getOperand(i: FirstOpNum).getImm(); |
| 444 | unsigned CCMask = CCUserMI->getOperand(i: FirstOpNum + 1).getImm(); |
| 445 | assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && |
| 446 | "Corrupt CC operands of CCUser." ); |
| 447 | unsigned OutValid = ~ReusableCCMask & CCValid; |
| 448 | unsigned OutMask = ~ReusableCCMask & CCMask; |
| 449 | if (OutMask != 0 && OutMask != OutValid) |
| 450 | return false; |
| 451 | |
| 452 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum)); |
| 453 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum + 1)); |
| 454 | } |
| 455 | |
| 456 | // All users are OK. Adjust the masks for MI. |
| 457 | for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { |
| 458 | AlterMasks[I]->setImm(CCValues); |
| 459 | unsigned CCMask = AlterMasks[I + 1]->getImm(); |
| 460 | if (LogicalMI) { |
| 461 | // Translate the CCMask into its "logical" value. |
| 462 | CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? |
| 463 | SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); |
| 464 | CCMask &= CCValues; // Logical subtracts never set CC=0. |
| 465 | } else { |
| 466 | if (CCMask & ~ReusableCCMask) |
| 467 | CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); |
| 468 | CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; |
| 469 | } |
| 470 | AlterMasks[I + 1]->setImm(CCMask); |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | // CC is now live after MI. |
| 475 | if (!ConvOpc) |
| 476 | MI.clearRegisterDeads(Reg: SystemZ::CC); |
| 477 | |
| 478 | // Check if MI lies before Compare. |
| 479 | bool BeforeCmp = false; |
| 480 | MachineBasicBlock::iterator MBBI = MI, MBBE = MI.getParent()->end(); |
| 481 | for (++MBBI; MBBI != MBBE; ++MBBI) |
| 482 | if (MBBI == Compare) { |
| 483 | BeforeCmp = true; |
| 484 | break; |
| 485 | } |
| 486 | |
| 487 | // Clear any intervening kills of CC. |
| 488 | if (BeforeCmp) { |
| 489 | MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; |
| 490 | for (++MBBI; MBBI != MBBE; ++MBBI) |
| 491 | MBBI->clearRegisterKills(Reg: SystemZ::CC, RegInfo: TRI); |
| 492 | } |
| 493 | |
| 494 | return true; |
| 495 | } |
| 496 | |
| 497 | // Return true if Compare is a comparison against zero. |
| 498 | static bool isCompareZero(MachineInstr &Compare) { |
| 499 | if (isLoadAndTestAsCmp(MI&: Compare)) |
| 500 | return true; |
| 501 | return Compare.getNumExplicitOperands() == 2 && |
| 502 | Compare.getOperand(i: 1).isImm() && Compare.getOperand(i: 1).getImm() == 0; |
| 503 | } |
| 504 | |
| 505 | // Try to optimize cases where comparison instruction Compare is testing |
| 506 | // a value against zero. Return true on success and if Compare should be |
| 507 | // deleted as dead. CCUsers is the list of instructions that use the CC |
| 508 | // value produced by Compare. |
| 509 | bool SystemZElimCompare::optimizeCompareZero( |
| 510 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
| 511 | if (!isCompareZero(Compare)) |
| 512 | return false; |
| 513 | |
| 514 | // Search back for CC results that are based on the first operand. |
| 515 | unsigned SrcReg = getCompareSourceReg(Compare); |
| 516 | MachineBasicBlock &MBB = *Compare.getParent(); |
| 517 | Reference CCRefs; |
| 518 | Reference SrcRefs; |
| 519 | for (MachineBasicBlock::reverse_iterator MBBI = |
| 520 | std::next(x: MachineBasicBlock::reverse_iterator(&Compare)), |
| 521 | MBBE = MBB.rend(); MBBI != MBBE;) { |
| 522 | MachineInstr &MI = *MBBI++; |
| 523 | if (resultTests(MI, Reg: SrcReg)) { |
| 524 | // Try to remove both MI and Compare by converting a branch to BRCT(G). |
| 525 | // or a load-and-trap instruction. We don't care in this case whether |
| 526 | // CC is modified between MI and Compare. |
| 527 | if (!CCRefs.Use && !SrcRefs) { |
| 528 | if (convertToBRCT(MI, Compare, CCUsers)) { |
| 529 | BranchOnCounts += 1; |
| 530 | return true; |
| 531 | } |
| 532 | if (convertToLoadAndTrap(MI, Compare, CCUsers)) { |
| 533 | LoadAndTraps += 1; |
| 534 | return true; |
| 535 | } |
| 536 | } |
| 537 | // Try to eliminate Compare by reusing a CC result from MI. |
| 538 | if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || |
| 539 | (!CCRefs.Def && |
| 540 | (adjustCCMasksForInstr(MI, Compare, CCUsers) || |
| 541 | convertToLogical(MI, Compare, CCUsers)))) { |
| 542 | EliminatedComparisons += 1; |
| 543 | return true; |
| 544 | } |
| 545 | } |
| 546 | SrcRefs |= getRegReferences(MI, Reg: SrcReg); |
| 547 | if (SrcRefs.Def) |
| 548 | break; |
| 549 | CCRefs |= getRegReferences(MI, Reg: SystemZ::CC); |
| 550 | if (CCRefs.Use && CCRefs.Def) |
| 551 | break; |
| 552 | // Eliminating a Compare that may raise an FP exception will move |
| 553 | // raising the exception to some earlier MI. We cannot do this if |
| 554 | // there is anything in between that might change exception flags. |
| 555 | if (Compare.mayRaiseFPException() && |
| 556 | (MI.isCall() || MI.hasUnmodeledSideEffects())) |
| 557 | break; |
| 558 | } |
| 559 | |
| 560 | // Also do a forward search to handle cases where an instruction after the |
| 561 | // compare can be converted, like |
| 562 | // CGHI %r0d, 0; %r1d = LGR %r0d => LTGR %r1d, %r0d |
| 563 | auto MIRange = llvm::make_range( |
| 564 | x: std::next(x: MachineBasicBlock::iterator(&Compare)), y: MBB.end()); |
| 565 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MIRange)) { |
| 566 | if (preservesValueOf(MI, Reg: SrcReg)) { |
| 567 | // Try to eliminate Compare by reusing a CC result from MI. |
| 568 | if (convertToLoadAndTest(MI, Compare, CCUsers)) { |
| 569 | EliminatedComparisons += 1; |
| 570 | return true; |
| 571 | } |
| 572 | } |
| 573 | if (getRegReferences(MI, Reg: SrcReg).Def) |
| 574 | return false; |
| 575 | if (getRegReferences(MI, Reg: SystemZ::CC)) |
| 576 | return false; |
| 577 | } |
| 578 | |
| 579 | return false; |
| 580 | } |
| 581 | |
| 582 | // Try to fuse comparison instruction Compare into a later branch. |
| 583 | // Return true on success and if Compare is therefore redundant. |
| 584 | bool SystemZElimCompare::fuseCompareOperations( |
| 585 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
| 586 | // See whether we have a single branch with which to fuse. |
| 587 | if (CCUsers.size() != 1) |
| 588 | return false; |
| 589 | MachineInstr *Branch = CCUsers[0]; |
| 590 | SystemZII::FusedCompareType Type; |
| 591 | switch (Branch->getOpcode()) { |
| 592 | case SystemZ::BRC: |
| 593 | Type = SystemZII::CompareAndBranch; |
| 594 | break; |
| 595 | case SystemZ::CondReturn: |
| 596 | Type = SystemZII::CompareAndReturn; |
| 597 | break; |
| 598 | case SystemZ::CallBCR: |
| 599 | Type = SystemZII::CompareAndSibcall; |
| 600 | break; |
| 601 | case SystemZ::CondTrap: |
| 602 | Type = SystemZII::CompareAndTrap; |
| 603 | break; |
| 604 | default: |
| 605 | return false; |
| 606 | } |
| 607 | |
| 608 | // See whether we have a comparison that can be fused. |
| 609 | unsigned FusedOpcode = |
| 610 | TII->getFusedCompare(Opcode: Compare.getOpcode(), Type, MI: &Compare); |
| 611 | if (!FusedOpcode) |
| 612 | return false; |
| 613 | |
| 614 | // Make sure that the operands are available at the branch. |
| 615 | // SrcReg2 is the register if the source operand is a register, |
| 616 | // 0 if the source operand is immediate, and the base register |
| 617 | // if the source operand is memory (index is not supported). |
| 618 | Register SrcReg = Compare.getOperand(i: 0).getReg(); |
| 619 | Register SrcReg2 = |
| 620 | Compare.getOperand(i: 1).isReg() ? Compare.getOperand(i: 1).getReg() : Register(); |
| 621 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
| 622 | for (++MBBI; MBBI != MBBE; ++MBBI) |
| 623 | if (MBBI->modifiesRegister(Reg: SrcReg, TRI) || |
| 624 | (SrcReg2 && MBBI->modifiesRegister(Reg: SrcReg2, TRI))) |
| 625 | return false; |
| 626 | |
| 627 | // Read the branch mask, target (if applicable), regmask (if applicable). |
| 628 | MachineOperand CCMask(MBBI->getOperand(i: 1)); |
| 629 | assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && |
| 630 | "Invalid condition-code mask for integer comparison" ); |
| 631 | // This is only valid for CompareAndBranch and CompareAndSibcall. |
| 632 | MachineOperand Target(MBBI->getOperand( |
| 633 | i: (Type == SystemZII::CompareAndBranch || |
| 634 | Type == SystemZII::CompareAndSibcall) ? 2 : 0)); |
| 635 | const uint32_t *RegMask; |
| 636 | if (Type == SystemZII::CompareAndSibcall) |
| 637 | RegMask = MBBI->getOperand(i: 3).getRegMask(); |
| 638 | |
| 639 | // Clear out all current operands. |
| 640 | int CCUse = MBBI->findRegisterUseOperandIdx(Reg: SystemZ::CC, TRI, isKill: false); |
| 641 | assert(CCUse >= 0 && "BRC/BCR must use CC" ); |
| 642 | Branch->removeOperand(OpNo: CCUse); |
| 643 | // Remove regmask (sibcall). |
| 644 | if (Type == SystemZII::CompareAndSibcall) |
| 645 | Branch->removeOperand(OpNo: 3); |
| 646 | // Remove target (branch or sibcall). |
| 647 | if (Type == SystemZII::CompareAndBranch || |
| 648 | Type == SystemZII::CompareAndSibcall) |
| 649 | Branch->removeOperand(OpNo: 2); |
| 650 | Branch->removeOperand(OpNo: 1); |
| 651 | Branch->removeOperand(OpNo: 0); |
| 652 | |
| 653 | // Rebuild Branch as a fused compare and branch. |
| 654 | // SrcNOps is the number of MI operands of the compare instruction |
| 655 | // that we need to copy over. |
| 656 | unsigned SrcNOps = 2; |
| 657 | if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) |
| 658 | SrcNOps = 3; |
| 659 | Branch->setDesc(TII->get(Opcode: FusedOpcode)); |
| 660 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
| 661 | for (unsigned I = 0; I < SrcNOps; I++) |
| 662 | MIB.add(MO: Compare.getOperand(i: I)); |
| 663 | MIB.add(MO: CCMask); |
| 664 | |
| 665 | if (Type == SystemZII::CompareAndBranch) { |
| 666 | // Only conditional branches define CC, as they may be converted back |
| 667 | // to a non-fused branch because of a long displacement. Conditional |
| 668 | // returns don't have that problem. |
| 669 | MIB.add(MO: Target).addReg(RegNo: SystemZ::CC, |
| 670 | flags: RegState::ImplicitDefine | RegState::Dead); |
| 671 | } |
| 672 | |
| 673 | if (Type == SystemZII::CompareAndSibcall) { |
| 674 | MIB.add(MO: Target); |
| 675 | MIB.addRegMask(Mask: RegMask); |
| 676 | } |
| 677 | |
| 678 | // Clear any intervening kills of SrcReg and SrcReg2. |
| 679 | MBBI = Compare; |
| 680 | for (++MBBI; MBBI != MBBE; ++MBBI) { |
| 681 | MBBI->clearRegisterKills(Reg: SrcReg, RegInfo: TRI); |
| 682 | if (SrcReg2) |
| 683 | MBBI->clearRegisterKills(Reg: SrcReg2, RegInfo: TRI); |
| 684 | } |
| 685 | FusedComparisons += 1; |
| 686 | return true; |
| 687 | } |
| 688 | |
| 689 | // Process all comparison instructions in MBB. Return true if something |
| 690 | // changed. |
| 691 | bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { |
| 692 | bool Changed = false; |
| 693 | |
| 694 | // Walk backwards through the block looking for comparisons, recording |
| 695 | // all CC users as we go. The subroutines can delete Compare and |
| 696 | // instructions before it. |
| 697 | LiveRegUnits LiveRegs(*TRI); |
| 698 | LiveRegs.addLiveOuts(MBB); |
| 699 | bool CompleteCCUsers = LiveRegs.available(Reg: SystemZ::CC); |
| 700 | SmallVector<MachineInstr *, 4> CCUsers; |
| 701 | MachineBasicBlock::iterator MBBI = MBB.end(); |
| 702 | while (MBBI != MBB.begin()) { |
| 703 | MachineInstr &MI = *--MBBI; |
| 704 | if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && |
| 705 | (optimizeCompareZero(Compare&: MI, CCUsers) || |
| 706 | fuseCompareOperations(Compare&: MI, CCUsers))) { |
| 707 | ++MBBI; |
| 708 | MI.eraseFromParent(); |
| 709 | Changed = true; |
| 710 | CCUsers.clear(); |
| 711 | continue; |
| 712 | } |
| 713 | |
| 714 | if (MI.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr)) { |
| 715 | CCUsers.clear(); |
| 716 | CompleteCCUsers = true; |
| 717 | } |
| 718 | if (MI.readsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) && CompleteCCUsers) |
| 719 | CCUsers.push_back(Elt: &MI); |
| 720 | } |
| 721 | return Changed; |
| 722 | } |
| 723 | |
| 724 | bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { |
| 725 | if (skipFunction(F: F.getFunction())) |
| 726 | return false; |
| 727 | |
| 728 | TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); |
| 729 | TRI = &TII->getRegisterInfo(); |
| 730 | |
| 731 | bool Changed = false; |
| 732 | for (auto &MBB : F) |
| 733 | Changed |= processBlock(MBB); |
| 734 | |
| 735 | return Changed; |
| 736 | } |
| 737 | |
| 738 | FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { |
| 739 | return new SystemZElimCompare(); |
| 740 | } |
| 741 | |