1 | //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Lowering the WWM_COPY instructions for various register classes. |
11 | /// AMDGPU target generates WWM_COPY instruction to differentiate WWM |
12 | /// copy from COPY. This pass generates the necessary exec mask manipulation |
13 | /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to |
14 | /// COPY. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "AMDGPU.h" |
19 | #include "GCNSubtarget.h" |
20 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
21 | #include "SIMachineFunctionInfo.h" |
22 | #include "llvm/CodeGen/LiveIntervals.h" |
23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
24 | #include "llvm/CodeGen/VirtRegMap.h" |
25 | #include "llvm/InitializePasses.h" |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "si-lower-wwm-copies" |
30 | |
31 | namespace { |
32 | |
33 | class SILowerWWMCopies : public MachineFunctionPass { |
34 | public: |
35 | static char ID; |
36 | |
37 | SILowerWWMCopies() : MachineFunctionPass(ID) { |
38 | initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry()); |
39 | } |
40 | |
41 | bool runOnMachineFunction(MachineFunction &MF) override; |
42 | |
43 | StringRef getPassName() const override { return "SI Lower WWM Copies" ; } |
44 | |
45 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
46 | AU.setPreservesAll(); |
47 | MachineFunctionPass::getAnalysisUsage(AU); |
48 | } |
49 | |
50 | private: |
51 | bool isSCCLiveAtMI(const MachineInstr &MI); |
52 | void addToWWMSpills(MachineFunction &MF, Register Reg); |
53 | |
54 | LiveIntervals *LIS; |
55 | SlotIndexes *Indexes; |
56 | VirtRegMap *VRM; |
57 | const SIRegisterInfo *TRI; |
58 | const MachineRegisterInfo *MRI; |
59 | SIMachineFunctionInfo *MFI; |
60 | }; |
61 | |
62 | } // End anonymous namespace. |
63 | |
64 | INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies" , |
65 | false, false) |
66 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
67 | INITIALIZE_PASS_DEPENDENCY(VirtRegMap) |
68 | INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies" , false, |
69 | false) |
70 | |
71 | char SILowerWWMCopies::ID = 0; |
72 | |
73 | char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID; |
74 | |
75 | bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { |
76 | // We can't determine the liveness info if LIS isn't available. Early return |
77 | // in that case and always assume SCC is live. |
78 | if (!LIS) |
79 | return true; |
80 | |
81 | LiveRange &LR = |
82 | LIS->getRegUnit(Unit: *MCRegUnitIterator(MCRegister::from(Val: AMDGPU::SCC), TRI)); |
83 | SlotIndex Idx = LIS->getInstructionIndex(Instr: MI); |
84 | return LR.liveAt(index: Idx); |
85 | } |
86 | |
87 | // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills |
88 | // for preserving its entire lanes at function prolog/epilog. |
89 | void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { |
90 | if (Reg.isPhysical()) |
91 | return; |
92 | |
93 | Register PhysReg = VRM->getPhys(virtReg: Reg); |
94 | assert(PhysReg != VirtRegMap::NO_PHYS_REG && |
95 | "should have allocated a physical register" ); |
96 | |
97 | MFI->allocateWWMSpill(MF, VGPR: PhysReg); |
98 | } |
99 | |
100 | bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) { |
101 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
102 | const SIInstrInfo *TII = ST.getInstrInfo(); |
103 | |
104 | MFI = MF.getInfo<SIMachineFunctionInfo>(); |
105 | auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); |
106 | LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; |
107 | auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); |
108 | Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; |
109 | VRM = getAnalysisIfAvailable<VirtRegMap>(); |
110 | TRI = ST.getRegisterInfo(); |
111 | MRI = &MF.getRegInfo(); |
112 | |
113 | if (!MFI->hasVRegFlags()) |
114 | return false; |
115 | |
116 | bool Changed = false; |
117 | for (MachineBasicBlock &MBB : MF) { |
118 | for (MachineInstr &MI : MBB) { |
119 | if (MI.getOpcode() != AMDGPU::WWM_COPY) |
120 | continue; |
121 | |
122 | // TODO: Club adjacent WWM ops between same exec save/restore |
123 | assert(TII->isVGPRCopy(MI)); |
124 | |
125 | // For WWM vector copies, manipulate the exec mask around the copy |
126 | // instruction. |
127 | const DebugLoc &DL = MI.getDebugLoc(); |
128 | MachineBasicBlock::iterator InsertPt = MI.getIterator(); |
129 | Register RegForExecCopy = MFI->getSGPRForEXECCopy(); |
130 | TII->insertScratchExecCopy(MF, MBB, MBBI: InsertPt, DL, Reg: RegForExecCopy, |
131 | IsSCCLive: isSCCLiveAtMI(MI), Indexes); |
132 | TII->restoreExec(MF, MBB, MBBI: ++InsertPt, DL, Reg: RegForExecCopy, Indexes); |
133 | addToWWMSpills(MF, Reg: MI.getOperand(i: 0).getReg()); |
134 | LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); |
135 | |
136 | // Lower WWM_COPY back to COPY |
137 | MI.setDesc(TII->get(Opcode: AMDGPU::COPY)); |
138 | Changed |= true; |
139 | } |
140 | } |
141 | |
142 | return Changed; |
143 | } |
144 | |