| 1 | //===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass lowers all occurrences of i1 values (with a vreg_1 register class) |
| 10 | // to lane masks (32 / 64-bit scalar registers). The pass assumes machine SSA |
| 11 | // form and a wave-level control flow graph. |
| 12 | // |
| 13 | // Before this pass, values that are semantically i1 and are defined and used |
| 14 | // within the same basic block are already represented as lane masks in scalar |
| 15 | // registers. However, values that cross basic blocks are always transferred |
| 16 | // between basic blocks in vreg_1 virtual registers and are lowered by this |
| 17 | // pass. |
| 18 | // |
| 19 | // The only instructions that use or define vreg_1 virtual registers are COPY, |
| 20 | // PHI, and IMPLICIT_DEF. |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | #include "SILowerI1Copies.h" |
| 25 | #include "AMDGPU.h" |
| 26 | #include "llvm/CodeGen/MachineSSAUpdater.h" |
| 27 | #include "llvm/InitializePasses.h" |
| 28 | |
| 29 | #define DEBUG_TYPE "si-i1-copies" |
| 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | static Register |
| 34 | insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, |
| 35 | MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs); |
| 36 | |
| 37 | namespace { |
| 38 | |
| 39 | class Vreg1LoweringHelper : public PhiLoweringHelper { |
| 40 | public: |
| 41 | Vreg1LoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, |
| 42 | MachinePostDominatorTree *PDT); |
| 43 | |
| 44 | private: |
| 45 | DenseSet<Register> ConstrainRegs; |
| 46 | |
| 47 | public: |
| 48 | void markAsLaneMask(Register DstReg) const override; |
| 49 | void getCandidatesForLowering( |
| 50 | SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override; |
| 51 | void collectIncomingValuesFromPhi( |
| 52 | const MachineInstr *MI, |
| 53 | SmallVectorImpl<Incoming> &Incomings) const override; |
| 54 | void replaceDstReg(Register NewReg, Register OldReg, |
| 55 | MachineBasicBlock *MBB) override; |
| 56 | void buildMergeLaneMasks(MachineBasicBlock &MBB, |
| 57 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
| 58 | Register DstReg, Register PrevReg, |
| 59 | Register CurReg) override; |
| 60 | void constrainAsLaneMask(Incoming &In) override; |
| 61 | |
| 62 | bool lowerCopiesFromI1(); |
| 63 | bool lowerCopiesToI1(); |
| 64 | bool cleanConstrainRegs(bool Changed); |
| 65 | bool isVreg1(Register Reg) const { |
| 66 | return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass; |
| 67 | } |
| 68 | }; |
| 69 | |
| 70 | Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF, |
| 71 | MachineDominatorTree *DT, |
| 72 | MachinePostDominatorTree *PDT) |
| 73 | : PhiLoweringHelper(MF, DT, PDT) {} |
| 74 | |
| 75 | bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) { |
| 76 | assert(Changed || ConstrainRegs.empty()); |
| 77 | for (Register Reg : ConstrainRegs) |
| 78 | MRI->constrainRegClass(Reg, RC: &AMDGPU::SReg_1_XEXECRegClass); |
| 79 | ConstrainRegs.clear(); |
| 80 | |
| 81 | return Changed; |
| 82 | } |
| 83 | |
| 84 | /// Helper class that determines the relationship between incoming values of a |
| 85 | /// phi in the control flow graph to determine where an incoming value can |
| 86 | /// simply be taken as a scalar lane mask as-is, and where it needs to be |
| 87 | /// merged with another, previously defined lane mask. |
| 88 | /// |
| 89 | /// The approach is as follows: |
| 90 | /// - Determine all basic blocks which, starting from the incoming blocks, |
| 91 | /// a wave may reach before entering the def block (the block containing the |
| 92 | /// phi). |
| 93 | /// - If an incoming block has no predecessors in this set, we can take the |
| 94 | /// incoming value as a scalar lane mask as-is. |
| 95 | /// -- A special case of this is when the def block has a self-loop. |
| 96 | /// - Otherwise, the incoming value needs to be merged with a previously |
| 97 | /// defined lane mask. |
| 98 | /// - If there is a path into the set of reachable blocks that does _not_ go |
| 99 | /// through an incoming block where we can take the scalar lane mask as-is, |
| 100 | /// we need to invent an available value for the SSAUpdater. Choices are |
| 101 | /// 0 and undef, with differing consequences for how to merge values etc. |
| 102 | /// |
| 103 | /// TODO: We could use region analysis to quickly skip over SESE regions during |
| 104 | /// the traversal. |
| 105 | /// |
| 106 | class PhiIncomingAnalysis { |
| 107 | MachinePostDominatorTree &PDT; |
| 108 | const SIInstrInfo *TII; |
| 109 | |
| 110 | // For each reachable basic block, whether it is a source in the induced |
| 111 | // subgraph of the CFG. |
| 112 | MapVector<MachineBasicBlock *, bool> ReachableMap; |
| 113 | SmallVector<MachineBasicBlock *, 4> Stack; |
| 114 | SmallVector<MachineBasicBlock *, 4> Predecessors; |
| 115 | |
| 116 | public: |
| 117 | PhiIncomingAnalysis(MachinePostDominatorTree &PDT, const SIInstrInfo *TII) |
| 118 | : PDT(PDT), TII(TII) {} |
| 119 | |
| 120 | /// Returns whether \p MBB is a source in the induced subgraph of reachable |
| 121 | /// blocks. |
| 122 | bool isSource(MachineBasicBlock &MBB) const { |
| 123 | return ReachableMap.find(Key: &MBB)->second; |
| 124 | } |
| 125 | |
| 126 | ArrayRef<MachineBasicBlock *> predecessors() const { return Predecessors; } |
| 127 | |
| 128 | void analyze(MachineBasicBlock &DefBlock, ArrayRef<Incoming> Incomings) { |
| 129 | assert(Stack.empty()); |
| 130 | ReachableMap.clear(); |
| 131 | Predecessors.clear(); |
| 132 | |
| 133 | // Insert the def block first, so that it acts as an end point for the |
| 134 | // traversal. |
| 135 | ReachableMap.try_emplace(Key: &DefBlock, Args: false); |
| 136 | |
| 137 | for (auto Incoming : Incomings) { |
| 138 | MachineBasicBlock *MBB = Incoming.Block; |
| 139 | if (MBB == &DefBlock) { |
| 140 | ReachableMap[&DefBlock] = true; // self-loop on DefBlock |
| 141 | continue; |
| 142 | } |
| 143 | |
| 144 | ReachableMap.try_emplace(Key: MBB, Args: false); |
| 145 | |
| 146 | // If this block has a divergent terminator and the def block is its |
| 147 | // post-dominator, the wave may first visit the other successors. |
| 148 | if (TII->hasDivergentBranch(MBB) && PDT.dominates(A: &DefBlock, B: MBB)) |
| 149 | append_range(C&: Stack, R: MBB->successors()); |
| 150 | } |
| 151 | |
| 152 | while (!Stack.empty()) { |
| 153 | MachineBasicBlock *MBB = Stack.pop_back_val(); |
| 154 | if (ReachableMap.try_emplace(Key: MBB, Args: false).second) |
| 155 | append_range(C&: Stack, R: MBB->successors()); |
| 156 | } |
| 157 | |
| 158 | for (auto &[MBB, Reachable] : ReachableMap) { |
| 159 | bool HaveReachablePred = false; |
| 160 | for (MachineBasicBlock *Pred : MBB->predecessors()) { |
| 161 | if (ReachableMap.count(Key: Pred)) { |
| 162 | HaveReachablePred = true; |
| 163 | } else { |
| 164 | Stack.push_back(Elt: Pred); |
| 165 | } |
| 166 | } |
| 167 | if (!HaveReachablePred) |
| 168 | Reachable = true; |
| 169 | if (HaveReachablePred) { |
| 170 | for (MachineBasicBlock *UnreachablePred : Stack) { |
| 171 | if (!llvm::is_contained(Range&: Predecessors, Element: UnreachablePred)) |
| 172 | Predecessors.push_back(Elt: UnreachablePred); |
| 173 | } |
| 174 | } |
| 175 | Stack.clear(); |
| 176 | } |
| 177 | } |
| 178 | }; |
| 179 | |
| 180 | /// Helper class that detects loops which require us to lower an i1 COPY into |
| 181 | /// bitwise manipulation. |
| 182 | /// |
| 183 | /// Unfortunately, we cannot use LoopInfo because LoopInfo does not distinguish |
| 184 | /// between loops with the same header. Consider this example: |
| 185 | /// |
| 186 | /// A-+-+ |
| 187 | /// | | | |
| 188 | /// B-+ | |
| 189 | /// | | |
| 190 | /// C---+ |
| 191 | /// |
| 192 | /// A is the header of a loop containing A, B, and C as far as LoopInfo is |
| 193 | /// concerned. However, an i1 COPY in B that is used in C must be lowered to |
| 194 | /// bitwise operations to combine results from different loop iterations when |
| 195 | /// B has a divergent branch (since by default we will compile this code such |
| 196 | /// that threads in a wave are merged at the entry of C). |
| 197 | /// |
| 198 | /// The following rule is implemented to determine whether bitwise operations |
| 199 | /// are required: use the bitwise lowering for a def in block B if a backward |
| 200 | /// edge to B is reachable without going through the nearest common |
| 201 | /// post-dominator of B and all uses of the def. |
| 202 | /// |
| 203 | /// TODO: This rule is conservative because it does not check whether the |
| 204 | /// relevant branches are actually divergent. |
| 205 | /// |
| 206 | /// The class is designed to cache the CFG traversal so that it can be re-used |
| 207 | /// for multiple defs within the same basic block. |
| 208 | /// |
| 209 | /// TODO: We could use region analysis to quickly skip over SESE regions during |
| 210 | /// the traversal. |
| 211 | /// |
| 212 | class LoopFinder { |
| 213 | MachineDominatorTree &DT; |
| 214 | MachinePostDominatorTree &PDT; |
| 215 | |
| 216 | // All visited / reachable block, tagged by level (level 0 is the def block, |
| 217 | // level 1 are all blocks reachable including but not going through the def |
| 218 | // block's IPDOM, etc.). |
| 219 | DenseMap<MachineBasicBlock *, unsigned> Visited; |
| 220 | |
| 221 | // Nearest common dominator of all visited blocks by level (level 0 is the |
| 222 | // def block). Used for seeding the SSAUpdater. |
| 223 | SmallVector<MachineBasicBlock *, 4> CommonDominators; |
| 224 | |
| 225 | // Post-dominator of all visited blocks. |
| 226 | MachineBasicBlock *VisitedPostDom = nullptr; |
| 227 | |
| 228 | // Level at which a loop was found: 0 is not possible; 1 = a backward edge is |
| 229 | // reachable without going through the IPDOM of the def block (if the IPDOM |
| 230 | // itself has an edge to the def block, the loop level is 2), etc. |
| 231 | unsigned FoundLoopLevel = ~0u; |
| 232 | |
| 233 | MachineBasicBlock *DefBlock = nullptr; |
| 234 | SmallVector<MachineBasicBlock *, 4> Stack; |
| 235 | SmallVector<MachineBasicBlock *, 4> NextLevel; |
| 236 | |
| 237 | public: |
| 238 | LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT) |
| 239 | : DT(DT), PDT(PDT) {} |
| 240 | |
| 241 | void initialize(MachineBasicBlock &MBB) { |
| 242 | Visited.clear(); |
| 243 | CommonDominators.clear(); |
| 244 | Stack.clear(); |
| 245 | NextLevel.clear(); |
| 246 | VisitedPostDom = nullptr; |
| 247 | FoundLoopLevel = ~0u; |
| 248 | |
| 249 | DefBlock = &MBB; |
| 250 | } |
| 251 | |
| 252 | /// Check whether a backward edge can be reached without going through the |
| 253 | /// given \p PostDom of the def block. |
| 254 | /// |
| 255 | /// Return the level of \p PostDom if a loop was found, or 0 otherwise. |
| 256 | unsigned findLoop(MachineBasicBlock *PostDom) { |
| 257 | MachineDomTreeNode *PDNode = PDT.getNode(BB: DefBlock); |
| 258 | |
| 259 | if (!VisitedPostDom) |
| 260 | advanceLevel(); |
| 261 | |
| 262 | unsigned Level = 0; |
| 263 | while (PDNode->getBlock() != PostDom) { |
| 264 | if (PDNode->getBlock() == VisitedPostDom) |
| 265 | advanceLevel(); |
| 266 | PDNode = PDNode->getIDom(); |
| 267 | Level++; |
| 268 | if (FoundLoopLevel == Level) |
| 269 | return Level; |
| 270 | } |
| 271 | |
| 272 | return 0; |
| 273 | } |
| 274 | |
| 275 | /// Add undef values dominating the loop and the optionally given additional |
| 276 | /// blocks, so that the SSA updater doesn't have to search all the way to the |
| 277 | /// function entry. |
| 278 | void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater, |
| 279 | MachineRegisterInfo &MRI, |
| 280 | MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs, |
| 281 | ArrayRef<Incoming> Incomings = {}) { |
| 282 | assert(LoopLevel < CommonDominators.size()); |
| 283 | |
| 284 | MachineBasicBlock *Dom = CommonDominators[LoopLevel]; |
| 285 | for (auto &Incoming : Incomings) |
| 286 | Dom = DT.findNearestCommonDominator(A: Dom, B: Incoming.Block); |
| 287 | |
| 288 | if (!inLoopLevel(MBB&: *Dom, LoopLevel, Incomings)) { |
| 289 | SSAUpdater.AddAvailableValue( |
| 290 | BB: Dom, V: insertUndefLaneMask(MBB: Dom, MRI: &MRI, LaneMaskRegAttrs)); |
| 291 | } else { |
| 292 | // The dominator is part of the loop or the given blocks, so add the |
| 293 | // undef value to unreachable predecessors instead. |
| 294 | for (MachineBasicBlock *Pred : Dom->predecessors()) { |
| 295 | if (!inLoopLevel(MBB&: *Pred, LoopLevel, Incomings)) |
| 296 | SSAUpdater.AddAvailableValue( |
| 297 | BB: Pred, V: insertUndefLaneMask(MBB: Pred, MRI: &MRI, LaneMaskRegAttrs)); |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | private: |
| 303 | bool inLoopLevel(MachineBasicBlock &MBB, unsigned LoopLevel, |
| 304 | ArrayRef<Incoming> Incomings) const { |
| 305 | auto DomIt = Visited.find(Val: &MBB); |
| 306 | if (DomIt != Visited.end() && DomIt->second <= LoopLevel) |
| 307 | return true; |
| 308 | |
| 309 | for (auto &Incoming : Incomings) |
| 310 | if (Incoming.Block == &MBB) |
| 311 | return true; |
| 312 | |
| 313 | return false; |
| 314 | } |
| 315 | |
| 316 | void advanceLevel() { |
| 317 | MachineBasicBlock *VisitedDom; |
| 318 | |
| 319 | if (!VisitedPostDom) { |
| 320 | VisitedPostDom = DefBlock; |
| 321 | VisitedDom = DefBlock; |
| 322 | Stack.push_back(Elt: DefBlock); |
| 323 | } else { |
| 324 | VisitedPostDom = PDT.getNode(BB: VisitedPostDom)->getIDom()->getBlock(); |
| 325 | VisitedDom = CommonDominators.back(); |
| 326 | |
| 327 | for (unsigned i = 0; i < NextLevel.size();) { |
| 328 | if (PDT.dominates(A: VisitedPostDom, B: NextLevel[i])) { |
| 329 | Stack.push_back(Elt: NextLevel[i]); |
| 330 | |
| 331 | NextLevel[i] = NextLevel.back(); |
| 332 | NextLevel.pop_back(); |
| 333 | } else { |
| 334 | i++; |
| 335 | } |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | unsigned Level = CommonDominators.size(); |
| 340 | while (!Stack.empty()) { |
| 341 | MachineBasicBlock *MBB = Stack.pop_back_val(); |
| 342 | if (!PDT.dominates(A: VisitedPostDom, B: MBB)) |
| 343 | NextLevel.push_back(Elt: MBB); |
| 344 | |
| 345 | Visited[MBB] = Level; |
| 346 | VisitedDom = DT.findNearestCommonDominator(A: VisitedDom, B: MBB); |
| 347 | |
| 348 | for (MachineBasicBlock *Succ : MBB->successors()) { |
| 349 | if (Succ == DefBlock) { |
| 350 | if (MBB == VisitedPostDom) |
| 351 | FoundLoopLevel = std::min(a: FoundLoopLevel, b: Level + 1); |
| 352 | else |
| 353 | FoundLoopLevel = std::min(a: FoundLoopLevel, b: Level); |
| 354 | continue; |
| 355 | } |
| 356 | |
| 357 | if (Visited.try_emplace(Key: Succ, Args: ~0u).second) { |
| 358 | if (MBB == VisitedPostDom) |
| 359 | NextLevel.push_back(Elt: Succ); |
| 360 | else |
| 361 | Stack.push_back(Elt: Succ); |
| 362 | } |
| 363 | } |
| 364 | } |
| 365 | |
| 366 | CommonDominators.push_back(Elt: VisitedDom); |
| 367 | } |
| 368 | }; |
| 369 | |
| 370 | } // End anonymous namespace. |
| 371 | |
| 372 | Register |
| 373 | llvm::createLaneMaskReg(MachineRegisterInfo *MRI, |
| 374 | MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs) { |
| 375 | return MRI->createVirtualRegister(RegAttr: LaneMaskRegAttrs); |
| 376 | } |
| 377 | |
| 378 | static Register |
| 379 | insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, |
| 380 | MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs) { |
| 381 | MachineFunction &MF = *MBB->getParent(); |
| 382 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 383 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 384 | Register UndefReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); |
| 385 | BuildMI(BB&: *MBB, I: MBB->getFirstTerminator(), MIMD: {}, MCID: TII->get(Opcode: AMDGPU::IMPLICIT_DEF), |
| 386 | DestReg: UndefReg); |
| 387 | return UndefReg; |
| 388 | } |
| 389 | |
| 390 | #ifndef NDEBUG |
| 391 | static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, |
| 392 | const MachineRegisterInfo &MRI, |
| 393 | Register Reg) { |
| 394 | unsigned Size = TRI.getRegSizeInBits(Reg, MRI); |
| 395 | return Size == 1 || Size == 32; |
| 396 | } |
| 397 | #endif |
| 398 | |
| 399 | bool Vreg1LoweringHelper::lowerCopiesFromI1() { |
| 400 | bool Changed = false; |
| 401 | SmallVector<MachineInstr *, 4> DeadCopies; |
| 402 | |
| 403 | for (MachineBasicBlock &MBB : *MF) { |
| 404 | for (MachineInstr &MI : MBB) { |
| 405 | if (MI.getOpcode() != AMDGPU::COPY) |
| 406 | continue; |
| 407 | |
| 408 | Register DstReg = MI.getOperand(i: 0).getReg(); |
| 409 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
| 410 | if (!isVreg1(Reg: SrcReg)) |
| 411 | continue; |
| 412 | |
| 413 | if (isLaneMaskReg(Reg: DstReg) || isVreg1(Reg: DstReg)) |
| 414 | continue; |
| 415 | |
| 416 | Changed = true; |
| 417 | |
| 418 | // Copy into a 32-bit vector register. |
| 419 | LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI); |
| 420 | DebugLoc DL = MI.getDebugLoc(); |
| 421 | |
| 422 | assert(isVRegCompatibleReg(TII->getRegisterInfo(), *MRI, DstReg)); |
| 423 | assert(!MI.getOperand(0).getSubReg()); |
| 424 | |
| 425 | ConstrainRegs.insert(V: SrcReg); |
| 426 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::V_CNDMASK_B32_e64), DestReg: DstReg) |
| 427 | .addImm(Val: 0) |
| 428 | .addImm(Val: 0) |
| 429 | .addImm(Val: 0) |
| 430 | .addImm(Val: -1) |
| 431 | .addReg(RegNo: SrcReg); |
| 432 | DeadCopies.push_back(Elt: &MI); |
| 433 | } |
| 434 | |
| 435 | for (MachineInstr *MI : DeadCopies) |
| 436 | MI->eraseFromParent(); |
| 437 | DeadCopies.clear(); |
| 438 | } |
| 439 | return Changed; |
| 440 | } |
| 441 | |
| 442 | PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF, |
| 443 | MachineDominatorTree *DT, |
| 444 | MachinePostDominatorTree *PDT) |
| 445 | : MF(MF), DT(DT), PDT(PDT) { |
| 446 | MRI = &MF->getRegInfo(); |
| 447 | |
| 448 | ST = &MF->getSubtarget<GCNSubtarget>(); |
| 449 | TII = ST->getInstrInfo(); |
| 450 | IsWave32 = ST->isWave32(); |
| 451 | |
| 452 | if (IsWave32) { |
| 453 | ExecReg = AMDGPU::EXEC_LO; |
| 454 | MovOp = AMDGPU::S_MOV_B32; |
| 455 | AndOp = AMDGPU::S_AND_B32; |
| 456 | OrOp = AMDGPU::S_OR_B32; |
| 457 | XorOp = AMDGPU::S_XOR_B32; |
| 458 | AndN2Op = AMDGPU::S_ANDN2_B32; |
| 459 | OrN2Op = AMDGPU::S_ORN2_B32; |
| 460 | } else { |
| 461 | ExecReg = AMDGPU::EXEC; |
| 462 | MovOp = AMDGPU::S_MOV_B64; |
| 463 | AndOp = AMDGPU::S_AND_B64; |
| 464 | OrOp = AMDGPU::S_OR_B64; |
| 465 | XorOp = AMDGPU::S_XOR_B64; |
| 466 | AndN2Op = AMDGPU::S_ANDN2_B64; |
| 467 | OrN2Op = AMDGPU::S_ORN2_B64; |
| 468 | } |
| 469 | } |
| 470 | |
| 471 | bool PhiLoweringHelper::lowerPhis() { |
| 472 | MachineSSAUpdater SSAUpdater(*MF); |
| 473 | LoopFinder LF(*DT, *PDT); |
| 474 | PhiIncomingAnalysis PIA(*PDT, TII); |
| 475 | SmallVector<MachineInstr *, 4> Vreg1Phis; |
| 476 | SmallVector<Incoming, 4> Incomings; |
| 477 | |
| 478 | getCandidatesForLowering(Vreg1Phis); |
| 479 | if (Vreg1Phis.empty()) |
| 480 | return false; |
| 481 | |
| 482 | DT->updateDFSNumbers(); |
| 483 | MachineBasicBlock *PrevMBB = nullptr; |
| 484 | for (MachineInstr *MI : Vreg1Phis) { |
| 485 | MachineBasicBlock &MBB = *MI->getParent(); |
| 486 | if (&MBB != PrevMBB) { |
| 487 | LF.initialize(MBB); |
| 488 | PrevMBB = &MBB; |
| 489 | } |
| 490 | |
| 491 | LLVM_DEBUG(dbgs() << "Lower PHI: " << *MI); |
| 492 | |
| 493 | Register DstReg = MI->getOperand(i: 0).getReg(); |
| 494 | markAsLaneMask(DstReg); |
| 495 | initializeLaneMaskRegisterAttributes(LaneMask: DstReg); |
| 496 | |
| 497 | collectIncomingValuesFromPhi(MI, Incomings); |
| 498 | |
| 499 | // Sort the incomings such that incoming values that dominate other incoming |
| 500 | // values are sorted earlier. This allows us to do some amount of on-the-fly |
| 501 | // constant folding. |
| 502 | // Incoming with smaller DFSNumIn goes first, DFSNumIn is 0 for entry block. |
| 503 | llvm::sort(C&: Incomings, Comp: [this](Incoming LHS, Incoming RHS) { |
| 504 | return DT->getNode(BB: LHS.Block)->getDFSNumIn() < |
| 505 | DT->getNode(BB: RHS.Block)->getDFSNumIn(); |
| 506 | }); |
| 507 | |
| 508 | #ifndef NDEBUG |
| 509 | PhiRegisters.insert(DstReg); |
| 510 | #endif |
| 511 | |
| 512 | // Phis in a loop that are observed outside the loop receive a simple but |
| 513 | // conservatively correct treatment. |
| 514 | std::vector<MachineBasicBlock *> DomBlocks = {&MBB}; |
| 515 | for (MachineInstr &Use : MRI->use_instructions(Reg: DstReg)) |
| 516 | DomBlocks.push_back(x: Use.getParent()); |
| 517 | |
| 518 | MachineBasicBlock *PostDomBound = |
| 519 | PDT->findNearestCommonDominator(Blocks: DomBlocks); |
| 520 | |
| 521 | // FIXME: This fails to find irreducible cycles. If we have a def (other |
| 522 | // than a constant) in a pair of blocks that end up looping back to each |
| 523 | // other, it will be mishandle. Due to structurization this shouldn't occur |
| 524 | // in practice. |
| 525 | unsigned FoundLoopLevel = LF.findLoop(PostDom: PostDomBound); |
| 526 | |
| 527 | SSAUpdater.Initialize(V: DstReg); |
| 528 | |
| 529 | if (FoundLoopLevel) { |
| 530 | LF.addLoopEntries(LoopLevel: FoundLoopLevel, SSAUpdater, MRI&: *MRI, LaneMaskRegAttrs, |
| 531 | Incomings); |
| 532 | |
| 533 | for (auto &Incoming : Incomings) { |
| 534 | Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); |
| 535 | SSAUpdater.AddAvailableValue(BB: Incoming.Block, V: Incoming.UpdatedReg); |
| 536 | } |
| 537 | |
| 538 | for (auto &Incoming : Incomings) { |
| 539 | MachineBasicBlock &IMBB = *Incoming.Block; |
| 540 | buildMergeLaneMasks( |
| 541 | MBB&: IMBB, I: getSaluInsertionAtEnd(MBB&: IMBB), DL: {}, DstReg: Incoming.UpdatedReg, |
| 542 | PrevReg: SSAUpdater.GetValueInMiddleOfBlock(BB: &IMBB), CurReg: Incoming.Reg); |
| 543 | } |
| 544 | } else { |
| 545 | // The phi is not observed from outside a loop. Use a more accurate |
| 546 | // lowering. |
| 547 | PIA.analyze(DefBlock&: MBB, Incomings); |
| 548 | |
| 549 | for (MachineBasicBlock *MBB : PIA.predecessors()) |
| 550 | SSAUpdater.AddAvailableValue( |
| 551 | BB: MBB, V: insertUndefLaneMask(MBB, MRI, LaneMaskRegAttrs)); |
| 552 | |
| 553 | for (auto &Incoming : Incomings) { |
| 554 | MachineBasicBlock &IMBB = *Incoming.Block; |
| 555 | if (PIA.isSource(MBB&: IMBB)) { |
| 556 | constrainAsLaneMask(In&: Incoming); |
| 557 | SSAUpdater.AddAvailableValue(BB: &IMBB, V: Incoming.Reg); |
| 558 | } else { |
| 559 | Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); |
| 560 | SSAUpdater.AddAvailableValue(BB: &IMBB, V: Incoming.UpdatedReg); |
| 561 | } |
| 562 | } |
| 563 | |
| 564 | for (auto &Incoming : Incomings) { |
| 565 | if (!Incoming.UpdatedReg.isValid()) |
| 566 | continue; |
| 567 | |
| 568 | MachineBasicBlock &IMBB = *Incoming.Block; |
| 569 | buildMergeLaneMasks( |
| 570 | MBB&: IMBB, I: getSaluInsertionAtEnd(MBB&: IMBB), DL: {}, DstReg: Incoming.UpdatedReg, |
| 571 | PrevReg: SSAUpdater.GetValueInMiddleOfBlock(BB: &IMBB), CurReg: Incoming.Reg); |
| 572 | } |
| 573 | } |
| 574 | |
| 575 | Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(BB: &MBB); |
| 576 | if (NewReg != DstReg) { |
| 577 | replaceDstReg(NewReg, OldReg: DstReg, MBB: &MBB); |
| 578 | MI->eraseFromParent(); |
| 579 | } |
| 580 | |
| 581 | Incomings.clear(); |
| 582 | } |
| 583 | return true; |
| 584 | } |
| 585 | |
| 586 | bool Vreg1LoweringHelper::lowerCopiesToI1() { |
| 587 | bool Changed = false; |
| 588 | MachineSSAUpdater SSAUpdater(*MF); |
| 589 | LoopFinder LF(*DT, *PDT); |
| 590 | SmallVector<MachineInstr *, 4> DeadCopies; |
| 591 | |
| 592 | for (MachineBasicBlock &MBB : *MF) { |
| 593 | LF.initialize(MBB); |
| 594 | |
| 595 | for (MachineInstr &MI : MBB) { |
| 596 | if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF && |
| 597 | MI.getOpcode() != AMDGPU::COPY) |
| 598 | continue; |
| 599 | |
| 600 | Register DstReg = MI.getOperand(i: 0).getReg(); |
| 601 | if (!isVreg1(Reg: DstReg)) |
| 602 | continue; |
| 603 | |
| 604 | Changed = true; |
| 605 | |
| 606 | if (MRI->use_empty(RegNo: DstReg)) { |
| 607 | DeadCopies.push_back(Elt: &MI); |
| 608 | continue; |
| 609 | } |
| 610 | |
| 611 | LLVM_DEBUG(dbgs() << "Lower Other: " << MI); |
| 612 | |
| 613 | markAsLaneMask(DstReg); |
| 614 | initializeLaneMaskRegisterAttributes(LaneMask: DstReg); |
| 615 | |
| 616 | if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) |
| 617 | continue; |
| 618 | |
| 619 | DebugLoc DL = MI.getDebugLoc(); |
| 620 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
| 621 | assert(!MI.getOperand(1).getSubReg()); |
| 622 | |
| 623 | if (!SrcReg.isVirtual() || (!isLaneMaskReg(Reg: SrcReg) && !isVreg1(Reg: SrcReg))) { |
| 624 | assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32); |
| 625 | Register TmpReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); |
| 626 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::V_CMP_NE_U32_e64), DestReg: TmpReg) |
| 627 | .addReg(RegNo: SrcReg) |
| 628 | .addImm(Val: 0); |
| 629 | MI.getOperand(i: 1).setReg(TmpReg); |
| 630 | SrcReg = TmpReg; |
| 631 | } else { |
| 632 | // SrcReg needs to be live beyond copy. |
| 633 | MI.getOperand(i: 1).setIsKill(false); |
| 634 | } |
| 635 | |
| 636 | // Defs in a loop that are observed outside the loop must be transformed |
| 637 | // into appropriate bit manipulation. |
| 638 | std::vector<MachineBasicBlock *> DomBlocks = {&MBB}; |
| 639 | for (MachineInstr &Use : MRI->use_instructions(Reg: DstReg)) |
| 640 | DomBlocks.push_back(x: Use.getParent()); |
| 641 | |
| 642 | MachineBasicBlock *PostDomBound = |
| 643 | PDT->findNearestCommonDominator(Blocks: DomBlocks); |
| 644 | unsigned FoundLoopLevel = LF.findLoop(PostDom: PostDomBound); |
| 645 | if (FoundLoopLevel) { |
| 646 | SSAUpdater.Initialize(V: DstReg); |
| 647 | SSAUpdater.AddAvailableValue(BB: &MBB, V: DstReg); |
| 648 | LF.addLoopEntries(LoopLevel: FoundLoopLevel, SSAUpdater, MRI&: *MRI, LaneMaskRegAttrs); |
| 649 | |
| 650 | buildMergeLaneMasks(MBB, I: MI, DL, DstReg, |
| 651 | PrevReg: SSAUpdater.GetValueInMiddleOfBlock(BB: &MBB), CurReg: SrcReg); |
| 652 | DeadCopies.push_back(Elt: &MI); |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | for (MachineInstr *MI : DeadCopies) |
| 657 | MI->eraseFromParent(); |
| 658 | DeadCopies.clear(); |
| 659 | } |
| 660 | return Changed; |
| 661 | } |
| 662 | |
| 663 | bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const { |
| 664 | const MachineInstr *MI; |
| 665 | for (;;) { |
| 666 | MI = MRI->getUniqueVRegDef(Reg); |
| 667 | if (MI->getOpcode() == AMDGPU::IMPLICIT_DEF) |
| 668 | return true; |
| 669 | |
| 670 | if (MI->getOpcode() != AMDGPU::COPY) |
| 671 | break; |
| 672 | |
| 673 | Reg = MI->getOperand(i: 1).getReg(); |
| 674 | if (!Reg.isVirtual()) |
| 675 | return false; |
| 676 | if (!isLaneMaskReg(Reg)) |
| 677 | return false; |
| 678 | } |
| 679 | |
| 680 | if (MI->getOpcode() != MovOp) |
| 681 | return false; |
| 682 | |
| 683 | if (!MI->getOperand(i: 1).isImm()) |
| 684 | return false; |
| 685 | |
| 686 | int64_t Imm = MI->getOperand(i: 1).getImm(); |
| 687 | if (Imm == 0) { |
| 688 | Val = false; |
| 689 | return true; |
| 690 | } |
| 691 | if (Imm == -1) { |
| 692 | Val = true; |
| 693 | return true; |
| 694 | } |
| 695 | |
| 696 | return false; |
| 697 | } |
| 698 | |
| 699 | static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) { |
| 700 | Def = false; |
| 701 | Use = false; |
| 702 | |
| 703 | for (const MachineOperand &MO : MI.operands()) { |
| 704 | if (MO.isReg() && MO.getReg() == AMDGPU::SCC) { |
| 705 | if (MO.isUse()) |
| 706 | Use = true; |
| 707 | else |
| 708 | Def = true; |
| 709 | } |
| 710 | } |
| 711 | } |
| 712 | |
| 713 | /// Return a point at the end of the given \p MBB to insert SALU instructions |
| 714 | /// for lane mask calculation. Take terminators and SCC into account. |
| 715 | MachineBasicBlock::iterator |
| 716 | PhiLoweringHelper::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const { |
| 717 | auto InsertionPt = MBB.getFirstTerminator(); |
| 718 | bool TerminatorsUseSCC = false; |
| 719 | for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) { |
| 720 | bool DefsSCC; |
| 721 | instrDefsUsesSCC(MI: *I, Def&: DefsSCC, Use&: TerminatorsUseSCC); |
| 722 | if (TerminatorsUseSCC || DefsSCC) |
| 723 | break; |
| 724 | } |
| 725 | |
| 726 | if (!TerminatorsUseSCC) |
| 727 | return InsertionPt; |
| 728 | |
| 729 | while (InsertionPt != MBB.begin()) { |
| 730 | InsertionPt--; |
| 731 | |
| 732 | bool DefSCC, UseSCC; |
| 733 | instrDefsUsesSCC(MI: *InsertionPt, Def&: DefSCC, Use&: UseSCC); |
| 734 | if (DefSCC) |
| 735 | return InsertionPt; |
| 736 | } |
| 737 | |
| 738 | // We should have at least seen an IMPLICIT_DEF or COPY |
| 739 | llvm_unreachable("SCC used by terminator but no def in block" ); |
| 740 | } |
| 741 | |
| 742 | // VReg_1 -> SReg_32 or SReg_64 |
| 743 | void Vreg1LoweringHelper::markAsLaneMask(Register DstReg) const { |
| 744 | MRI->setRegClass(Reg: DstReg, RC: ST->getBoolRC()); |
| 745 | } |
| 746 | |
| 747 | void Vreg1LoweringHelper::getCandidatesForLowering( |
| 748 | SmallVectorImpl<MachineInstr *> &Vreg1Phis) const { |
| 749 | for (MachineBasicBlock &MBB : *MF) { |
| 750 | for (MachineInstr &MI : MBB.phis()) { |
| 751 | if (isVreg1(Reg: MI.getOperand(i: 0).getReg())) |
| 752 | Vreg1Phis.push_back(Elt: &MI); |
| 753 | } |
| 754 | } |
| 755 | } |
| 756 | |
| 757 | void Vreg1LoweringHelper::collectIncomingValuesFromPhi( |
| 758 | const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const { |
| 759 | for (unsigned i = 1; i < MI->getNumOperands(); i += 2) { |
| 760 | assert(i + 1 < MI->getNumOperands()); |
| 761 | Register IncomingReg = MI->getOperand(i).getReg(); |
| 762 | MachineBasicBlock *IncomingMBB = MI->getOperand(i: i + 1).getMBB(); |
| 763 | MachineInstr *IncomingDef = MRI->getUniqueVRegDef(Reg: IncomingReg); |
| 764 | |
| 765 | if (IncomingDef->getOpcode() == AMDGPU::COPY) { |
| 766 | IncomingReg = IncomingDef->getOperand(i: 1).getReg(); |
| 767 | assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg)); |
| 768 | assert(!IncomingDef->getOperand(1).getSubReg()); |
| 769 | } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) { |
| 770 | continue; |
| 771 | } else { |
| 772 | assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg)); |
| 773 | } |
| 774 | |
| 775 | Incomings.emplace_back(Args&: IncomingReg, Args&: IncomingMBB, Args: Register()); |
| 776 | } |
| 777 | } |
| 778 | |
| 779 | void Vreg1LoweringHelper::replaceDstReg(Register NewReg, Register OldReg, |
| 780 | MachineBasicBlock *MBB) { |
| 781 | MRI->replaceRegWith(FromReg: NewReg, ToReg: OldReg); |
| 782 | } |
| 783 | |
| 784 | void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB, |
| 785 | MachineBasicBlock::iterator I, |
| 786 | const DebugLoc &DL, |
| 787 | Register DstReg, Register PrevReg, |
| 788 | Register CurReg) { |
| 789 | bool PrevVal = false; |
| 790 | bool PrevConstant = isConstantLaneMask(Reg: PrevReg, Val&: PrevVal); |
| 791 | bool CurVal = false; |
| 792 | bool CurConstant = isConstantLaneMask(Reg: CurReg, Val&: CurVal); |
| 793 | |
| 794 | if (PrevConstant && CurConstant) { |
| 795 | if (PrevVal == CurVal) { |
| 796 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::COPY), DestReg: DstReg).addReg(RegNo: CurReg); |
| 797 | } else if (CurVal) { |
| 798 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::COPY), DestReg: DstReg).addReg(RegNo: ExecReg); |
| 799 | } else { |
| 800 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: XorOp), DestReg: DstReg) |
| 801 | .addReg(RegNo: ExecReg) |
| 802 | .addImm(Val: -1); |
| 803 | } |
| 804 | return; |
| 805 | } |
| 806 | |
| 807 | Register PrevMaskedReg; |
| 808 | Register CurMaskedReg; |
| 809 | if (!PrevConstant) { |
| 810 | if (CurConstant && CurVal) { |
| 811 | PrevMaskedReg = PrevReg; |
| 812 | } else { |
| 813 | PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); |
| 814 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AndN2Op), DestReg: PrevMaskedReg) |
| 815 | .addReg(RegNo: PrevReg) |
| 816 | .addReg(RegNo: ExecReg); |
| 817 | } |
| 818 | } |
| 819 | if (!CurConstant) { |
| 820 | // TODO: check whether CurReg is already masked by EXEC |
| 821 | if (PrevConstant && PrevVal) { |
| 822 | CurMaskedReg = CurReg; |
| 823 | } else { |
| 824 | CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); |
| 825 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AndOp), DestReg: CurMaskedReg) |
| 826 | .addReg(RegNo: CurReg) |
| 827 | .addReg(RegNo: ExecReg); |
| 828 | } |
| 829 | } |
| 830 | |
| 831 | if (PrevConstant && !PrevVal) { |
| 832 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::COPY), DestReg: DstReg) |
| 833 | .addReg(RegNo: CurMaskedReg); |
| 834 | } else if (CurConstant && !CurVal) { |
| 835 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: AMDGPU::COPY), DestReg: DstReg) |
| 836 | .addReg(RegNo: PrevMaskedReg); |
| 837 | } else if (PrevConstant && PrevVal) { |
| 838 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: OrN2Op), DestReg: DstReg) |
| 839 | .addReg(RegNo: CurMaskedReg) |
| 840 | .addReg(RegNo: ExecReg); |
| 841 | } else { |
| 842 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: OrOp), DestReg: DstReg) |
| 843 | .addReg(RegNo: PrevMaskedReg) |
| 844 | .addReg(RegNo: CurMaskedReg ? CurMaskedReg : ExecReg); |
| 845 | } |
| 846 | } |
| 847 | |
| 848 | void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {} |
| 849 | |
| 850 | /// Lower all instructions that def or use vreg_1 registers. |
| 851 | /// |
| 852 | /// In a first pass, we lower COPYs from vreg_1 to vector registers, as can |
| 853 | /// occur around inline assembly. We do this first, before vreg_1 registers |
| 854 | /// are changed to scalar mask registers. |
| 855 | /// |
| 856 | /// Then we lower all defs of vreg_1 registers. Phi nodes are lowered before |
| 857 | /// all others, because phi lowering looks through copies and can therefore |
| 858 | /// often make copy lowering unnecessary. |
| 859 | static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, |
| 860 | MachinePostDominatorTree &MPDT) { |
| 861 | // Only need to run this in SelectionDAG path. |
| 862 | if (MF.getProperties().hasSelected()) |
| 863 | return false; |
| 864 | |
| 865 | Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT); |
| 866 | bool Changed = false; |
| 867 | Changed |= Helper.lowerCopiesFromI1(); |
| 868 | Changed |= Helper.lowerPhis(); |
| 869 | Changed |= Helper.lowerCopiesToI1(); |
| 870 | return Helper.cleanConstrainRegs(Changed); |
| 871 | } |
| 872 | |
| 873 | PreservedAnalyses |
| 874 | SILowerI1CopiesPass::run(MachineFunction &MF, |
| 875 | MachineFunctionAnalysisManager &MFAM) { |
| 876 | MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF); |
| 877 | MachinePostDominatorTree &MPDT = |
| 878 | MFAM.getResult<MachinePostDominatorTreeAnalysis>(IR&: MF); |
| 879 | bool Changed = runFixI1Copies(MF, MDT, MPDT); |
| 880 | if (!Changed) |
| 881 | return PreservedAnalyses::all(); |
| 882 | |
| 883 | // TODO: Probably preserves most. |
| 884 | PreservedAnalyses PA; |
| 885 | PA.preserveSet<CFGAnalyses>(); |
| 886 | return PA; |
| 887 | } |
| 888 | |
| 889 | class SILowerI1CopiesLegacy : public MachineFunctionPass { |
| 890 | public: |
| 891 | static char ID; |
| 892 | |
| 893 | SILowerI1CopiesLegacy() : MachineFunctionPass(ID) { |
| 894 | initializeSILowerI1CopiesLegacyPass(*PassRegistry::getPassRegistry()); |
| 895 | } |
| 896 | |
| 897 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 898 | |
| 899 | StringRef getPassName() const override { return "SI Lower i1 Copies" ; } |
| 900 | |
| 901 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 902 | AU.setPreservesCFG(); |
| 903 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
| 904 | AU.addRequired<MachinePostDominatorTreeWrapperPass>(); |
| 905 | MachineFunctionPass::getAnalysisUsage(AU); |
| 906 | } |
| 907 | }; |
| 908 | |
| 909 | bool SILowerI1CopiesLegacy::runOnMachineFunction(MachineFunction &MF) { |
| 910 | MachineDominatorTree &MDT = |
| 911 | getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
| 912 | MachinePostDominatorTree &MPDT = |
| 913 | getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); |
| 914 | return runFixI1Copies(MF, MDT, MPDT); |
| 915 | } |
| 916 | |
| 917 | INITIALIZE_PASS_BEGIN(SILowerI1CopiesLegacy, DEBUG_TYPE, "SI Lower i1 Copies" , |
| 918 | false, false) |
| 919 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
| 920 | INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) |
| 921 | INITIALIZE_PASS_END(SILowerI1CopiesLegacy, DEBUG_TYPE, "SI Lower i1 Copies" , |
| 922 | false, false) |
| 923 | |
| 924 | char SILowerI1CopiesLegacy::ID = 0; |
| 925 | |
| 926 | char &llvm::SILowerI1CopiesLegacyID = SILowerI1CopiesLegacy::ID; |
| 927 | |
| 928 | FunctionPass *llvm::createSILowerI1CopiesLegacyPass() { |
| 929 | return new SILowerI1CopiesLegacy(); |
| 930 | } |
| 931 | |