1 | //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning |
10 | // of a MachineFunction. |
11 | // |
12 | // mov %SPL, %depot |
13 | // cvta.local %SP, %SPL |
14 | // |
15 | // Because Frame Index is a generic address and alloca can only return generic |
16 | // pointer, without this pass the instructions producing alloca'ed address will |
17 | // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on |
18 | // this address with their .local versions, but this may introduce a lot of |
19 | // cvta.to.local instructions. Performance can be improved if we avoid casting |
20 | // address back and forth and directly calculate local address based on %SPL. |
21 | // This peephole pass optimizes these cases, for example |
22 | // |
23 | // It will transform the following pattern |
24 | // %0 = LEA_ADDRi64 %VRFrame64, 4 |
25 | // %1 = cvta_to_local_64 %0 |
26 | // |
27 | // into |
28 | // %1 = LEA_ADDRi64 %VRFrameLocal64, 4 |
29 | // |
30 | // %VRFrameLocal64 is the virtual register name of %SPL |
31 | // |
32 | //===----------------------------------------------------------------------===// |
33 | |
34 | #include "NVPTX.h" |
35 | #include "NVPTXRegisterInfo.h" |
36 | #include "NVPTXSubtarget.h" |
37 | #include "llvm/CodeGen/MachineFunctionPass.h" |
38 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
39 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
40 | #include "llvm/CodeGen/TargetInstrInfo.h" |
41 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
42 | |
43 | using namespace llvm; |
44 | |
45 | #define DEBUG_TYPE "nvptx-peephole" |
46 | |
47 | namespace llvm { |
48 | void initializeNVPTXPeepholePass(PassRegistry &); |
49 | } |
50 | |
51 | namespace { |
52 | struct NVPTXPeephole : public MachineFunctionPass { |
53 | public: |
54 | static char ID; |
55 | NVPTXPeephole() : MachineFunctionPass(ID) { |
56 | initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry()); |
57 | } |
58 | |
59 | bool runOnMachineFunction(MachineFunction &MF) override; |
60 | |
61 | StringRef getPassName() const override { |
62 | return "NVPTX optimize redundant cvta.to.local instruction" ; |
63 | } |
64 | |
65 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
66 | MachineFunctionPass::getAnalysisUsage(AU); |
67 | } |
68 | }; |
69 | } |
70 | |
71 | char NVPTXPeephole::ID = 0; |
72 | |
73 | INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole" , "NVPTX Peephole" , false, false) |
74 | |
75 | static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { |
76 | auto &MBB = *Root.getParent(); |
77 | auto &MF = *MBB.getParent(); |
78 | // Check current instruction is cvta.to.local |
79 | if (Root.getOpcode() != NVPTX::cvta_to_local_64 && |
80 | Root.getOpcode() != NVPTX::cvta_to_local) |
81 | return false; |
82 | |
83 | auto &Op = Root.getOperand(i: 1); |
84 | const auto &MRI = MF.getRegInfo(); |
85 | MachineInstr *GenericAddrDef = nullptr; |
86 | if (Op.isReg() && Op.getReg().isVirtual()) { |
87 | GenericAddrDef = MRI.getUniqueVRegDef(Reg: Op.getReg()); |
88 | } |
89 | |
90 | // Check the register operand is uniquely defined by LEA_ADDRi instruction |
91 | if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB || |
92 | (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 && |
93 | GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) { |
94 | return false; |
95 | } |
96 | |
97 | const NVPTXRegisterInfo *NRI = |
98 | MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); |
99 | |
100 | // Check the LEA_ADDRi operand is Frame index |
101 | auto &BaseAddrOp = GenericAddrDef->getOperand(i: 1); |
102 | if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) { |
103 | return true; |
104 | } |
105 | |
106 | return false; |
107 | } |
108 | |
109 | static void CombineCVTAToLocal(MachineInstr &Root) { |
110 | auto &MBB = *Root.getParent(); |
111 | auto &MF = *MBB.getParent(); |
112 | const auto &MRI = MF.getRegInfo(); |
113 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
114 | auto &Prev = *MRI.getUniqueVRegDef(Reg: Root.getOperand(i: 1).getReg()); |
115 | |
116 | const NVPTXRegisterInfo *NRI = |
117 | MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); |
118 | |
119 | MachineInstrBuilder MIB = |
120 | BuildMI(MF, MIMD: Root.getDebugLoc(), MCID: TII->get(Opcode: Prev.getOpcode()), |
121 | DestReg: Root.getOperand(i: 0).getReg()) |
122 | .addReg(RegNo: NRI->getFrameLocalRegister(MF)) |
123 | .add(MO: Prev.getOperand(i: 2)); |
124 | |
125 | MBB.insert(I: (MachineBasicBlock::iterator)&Root, MI: MIB); |
126 | |
127 | // Check if MRI has only one non dbg use, which is Root |
128 | if (MRI.hasOneNonDBGUse(RegNo: Prev.getOperand(i: 0).getReg())) { |
129 | Prev.eraseFromParent(); |
130 | } |
131 | Root.eraseFromParent(); |
132 | } |
133 | |
134 | bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { |
135 | if (skipFunction(F: MF.getFunction())) |
136 | return false; |
137 | |
138 | bool Changed = false; |
139 | // Loop over all of the basic blocks. |
140 | for (auto &MBB : MF) { |
141 | // Traverse the basic block. |
142 | auto BlockIter = MBB.begin(); |
143 | |
144 | while (BlockIter != MBB.end()) { |
145 | auto &MI = *BlockIter++; |
146 | if (isCVTAToLocalCombinationCandidate(Root&: MI)) { |
147 | CombineCVTAToLocal(Root&: MI); |
148 | Changed = true; |
149 | } |
150 | } // Instruction |
151 | } // Basic Block |
152 | |
153 | const NVPTXRegisterInfo *NRI = |
154 | MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); |
155 | |
156 | // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal |
157 | const auto &MRI = MF.getRegInfo(); |
158 | if (MRI.use_empty(RegNo: NRI->getFrameRegister(MF))) { |
159 | if (auto MI = MRI.getUniqueVRegDef(Reg: NRI->getFrameRegister(MF))) { |
160 | MI->eraseFromParent(); |
161 | } |
162 | } |
163 | |
164 | return Changed; |
165 | } |
166 | |
167 | MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); } |
168 | |