1 | //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ARM.h" |
10 | #include "ARMMachineFunctionInfo.h" |
11 | #include "ARMSubtarget.h" |
12 | #include "Thumb2InstrInfo.h" |
13 | #include "llvm/ADT/SmallVector.h" |
14 | #include "llvm/ADT/Statistic.h" |
15 | #include "llvm/ADT/StringRef.h" |
16 | #include "llvm/CodeGen/MachineBasicBlock.h" |
17 | #include "llvm/CodeGen/MachineFunction.h" |
18 | #include "llvm/CodeGen/MachineFunctionPass.h" |
19 | #include "llvm/CodeGen/MachineInstr.h" |
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
21 | #include "llvm/CodeGen/MachineInstrBundle.h" |
22 | #include "llvm/CodeGen/MachineOperand.h" |
23 | #include "llvm/IR/DebugLoc.h" |
24 | #include "llvm/Support/Debug.h" |
25 | #include <cassert> |
26 | #include <new> |
27 | |
28 | using namespace llvm; |
29 | |
30 | #define DEBUG_TYPE "arm-mve-vpt" |
31 | |
32 | namespace { |
33 | class MVEVPTBlock : public MachineFunctionPass { |
34 | public: |
35 | static char ID; |
36 | const Thumb2InstrInfo *TII; |
37 | const TargetRegisterInfo *TRI; |
38 | |
39 | MVEVPTBlock() : MachineFunctionPass(ID) {} |
40 | |
41 | bool runOnMachineFunction(MachineFunction &Fn) override; |
42 | |
43 | MachineFunctionProperties getRequiredProperties() const override { |
44 | return MachineFunctionProperties().setNoVRegs(); |
45 | } |
46 | |
47 | StringRef getPassName() const override { |
48 | return "MVE VPT block insertion pass" ; |
49 | } |
50 | |
51 | private: |
52 | bool InsertVPTBlocks(MachineBasicBlock &MBB); |
53 | }; |
54 | |
55 | char MVEVPTBlock::ID = 0; |
56 | |
57 | } // end anonymous namespace |
58 | |
59 | INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass" , false, false) |
60 | |
61 | static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, |
62 | const TargetRegisterInfo *TRI, |
63 | unsigned &NewOpcode) { |
64 | // Search backwards to the instruction that defines VPR. This may or not |
65 | // be a VCMP, we check that after this loop. If we find another instruction |
66 | // that reads cpsr, we return nullptr. |
67 | MachineBasicBlock::iterator CmpMI = MI; |
68 | while (CmpMI != MI->getParent()->begin()) { |
69 | --CmpMI; |
70 | if (CmpMI->modifiesRegister(Reg: ARM::VPR, TRI)) |
71 | break; |
72 | if (CmpMI->readsRegister(Reg: ARM::VPR, TRI)) |
73 | break; |
74 | } |
75 | |
76 | if (CmpMI == MI) |
77 | return nullptr; |
78 | NewOpcode = VCMPOpcodeToVPT(Opcode: CmpMI->getOpcode()); |
79 | if (NewOpcode == 0) |
80 | return nullptr; |
81 | |
82 | // Search forward from CmpMI to MI, checking if either register was def'd |
83 | if (registerDefinedBetween(Reg: CmpMI->getOperand(i: 1).getReg(), From: std::next(x: CmpMI), |
84 | To: MI, TRI)) |
85 | return nullptr; |
86 | if (registerDefinedBetween(Reg: CmpMI->getOperand(i: 2).getReg(), From: std::next(x: CmpMI), |
87 | To: MI, TRI)) |
88 | return nullptr; |
89 | return &*CmpMI; |
90 | } |
91 | |
92 | // Advances Iter past a block of predicated instructions. |
93 | // Returns true if it successfully skipped the whole block of predicated |
94 | // instructions. Returns false when it stopped early (due to MaxSteps), or if |
95 | // Iter didn't point to a predicated instruction. |
96 | static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, |
97 | MachineBasicBlock::instr_iterator EndIter, |
98 | unsigned MaxSteps, |
99 | unsigned &) { |
100 | ARMVCC::VPTCodes NextPred = ARMVCC::None; |
101 | Register PredReg; |
102 | NumInstrsSteppedOver = 0; |
103 | |
104 | while (Iter != EndIter) { |
105 | if (Iter->isDebugInstr()) { |
106 | // Skip debug instructions |
107 | ++Iter; |
108 | continue; |
109 | } |
110 | |
111 | NextPred = getVPTInstrPredicate(MI: *Iter, PredReg); |
112 | assert(NextPred != ARMVCC::Else && |
113 | "VPT block pass does not expect Else preds" ); |
114 | if (NextPred == ARMVCC::None || MaxSteps == 0) |
115 | break; |
116 | --MaxSteps; |
117 | ++Iter; |
118 | ++NumInstrsSteppedOver; |
119 | }; |
120 | |
121 | return NumInstrsSteppedOver != 0 && |
122 | (NextPred == ARMVCC::None || Iter == EndIter); |
123 | } |
124 | |
125 | // Returns true if at least one instruction in the range [Iter, End) defines |
126 | // or kills VPR. |
127 | static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, |
128 | MachineBasicBlock::iterator End) { |
129 | for (; Iter != End; ++Iter) |
130 | if (Iter->definesRegister(Reg: ARM::VPR, /*TRI=*/nullptr) || |
131 | Iter->killsRegister(Reg: ARM::VPR, /*TRI=*/nullptr)) |
132 | return true; |
133 | return false; |
134 | } |
135 | |
136 | // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. |
137 | static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { |
138 | switch (BlockSize) { |
139 | case 1: |
140 | return ARM::PredBlockMask::T; |
141 | case 2: |
142 | return ARM::PredBlockMask::TT; |
143 | case 3: |
144 | return ARM::PredBlockMask::TTT; |
145 | case 4: |
146 | return ARM::PredBlockMask::TTTT; |
147 | default: |
148 | llvm_unreachable("Invalid BlockSize!" ); |
149 | } |
150 | } |
151 | |
152 | // Given an iterator (Iter) that points at an instruction with a "Then" |
153 | // predicate, tries to create the largest block of continuous predicated |
154 | // instructions possible, and returns the VPT Block Mask of that block. |
155 | // |
156 | // This will try to perform some minor optimization in order to maximize the |
157 | // size of the block. |
158 | static ARM::PredBlockMask |
159 | CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, |
160 | MachineBasicBlock::instr_iterator EndIter, |
161 | SmallVectorImpl<MachineInstr *> &DeadInstructions) { |
162 | MachineBasicBlock::instr_iterator BlockBeg = Iter; |
163 | (void)BlockBeg; |
164 | assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
165 | "Expected a Predicated Instruction" ); |
166 | |
167 | LLVM_DEBUG(dbgs() << "VPT block created for: " ; Iter->dump()); |
168 | |
169 | unsigned BlockSize; |
170 | StepOverPredicatedInstrs(Iter, EndIter, MaxSteps: 4, NumInstrsSteppedOver&: BlockSize); |
171 | |
172 | LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = |
173 | std::next(BlockBeg); |
174 | AddedInstIter != Iter; ++AddedInstIter) { |
175 | if (AddedInstIter->isDebugInstr()) |
176 | continue; |
177 | dbgs() << " adding: " ; |
178 | AddedInstIter->dump(); |
179 | }); |
180 | |
181 | // Generate the initial BlockMask |
182 | ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); |
183 | |
184 | // Remove VPNOTs while there's still room in the block, so we can make the |
185 | // largest block possible. |
186 | ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; |
187 | while (BlockSize < 4 && Iter != EndIter && |
188 | Iter->getOpcode() == ARM::MVE_VPNOT) { |
189 | |
190 | // Try to skip all of the predicated instructions after the VPNOT, stopping |
191 | // after (4 - BlockSize). If we can't skip them all, stop. |
192 | unsigned ElseInstCnt = 0; |
193 | MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(x: Iter); |
194 | if (!StepOverPredicatedInstrs(Iter&: VPNOTBlockEndIter, EndIter, MaxSteps: (4 - BlockSize), |
195 | NumInstrsSteppedOver&: ElseInstCnt)) |
196 | break; |
197 | |
198 | // Check if this VPNOT can be removed or not: It can only be removed if at |
199 | // least one of the predicated instruction that follows it kills or sets |
200 | // VPR. |
201 | if (!IsVPRDefinedOrKilledByBlock(Iter, End: VPNOTBlockEndIter)) |
202 | break; |
203 | |
204 | LLVM_DEBUG(dbgs() << " removing VPNOT: " ; Iter->dump()); |
205 | |
206 | // Record the new size of the block |
207 | BlockSize += ElseInstCnt; |
208 | assert(BlockSize <= 4 && "Block is too large!" ); |
209 | |
210 | // Record the VPNot to remove it later. |
211 | DeadInstructions.push_back(Elt: &*Iter); |
212 | ++Iter; |
213 | |
214 | // Replace the predicates of the instructions we're adding. |
215 | // Note that we are using "Iter" to iterate over the block so we can update |
216 | // it at the same time. |
217 | for (; Iter != VPNOTBlockEndIter; ++Iter) { |
218 | if (Iter->isDebugInstr()) |
219 | continue; |
220 | |
221 | // Find the register in which the predicate is |
222 | int OpIdx = findFirstVPTPredOperandIdx(MI: *Iter); |
223 | assert(OpIdx != -1); |
224 | |
225 | // Change the predicate and update the mask |
226 | Iter->getOperand(i: OpIdx).setImm(CurrentPredicate); |
227 | BlockMask = expandPredBlockMask(BlockMask, Kind: CurrentPredicate); |
228 | |
229 | LLVM_DEBUG(dbgs() << " adding : " ; Iter->dump()); |
230 | } |
231 | |
232 | CurrentPredicate = |
233 | (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); |
234 | } |
235 | return BlockMask; |
236 | } |
237 | |
238 | bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { |
239 | bool Modified = false; |
240 | MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); |
241 | MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); |
242 | |
243 | SmallVector<MachineInstr *, 4> DeadInstructions; |
244 | |
245 | while (MBIter != EndIter) { |
246 | MachineInstr *MI = &*MBIter; |
247 | Register PredReg; |
248 | DebugLoc DL = MI->getDebugLoc(); |
249 | |
250 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(MI: *MI, PredReg); |
251 | |
252 | // The idea of the predicate is that None, Then and Else are for use when |
253 | // handling assembly language: they correspond to the three possible |
254 | // suffixes "", "t" and "e" on the mnemonic. So when instructions are read |
255 | // from assembly source or disassembled from object code, you expect to |
256 | // see a mixture whenever there's a long VPT block. But in code |
257 | // generation, we hope we'll never generate an Else as input to this pass. |
258 | assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds" ); |
259 | |
260 | if (Pred == ARMVCC::None) { |
261 | ++MBIter; |
262 | continue; |
263 | } |
264 | |
265 | ARM::PredBlockMask BlockMask = |
266 | CreateVPTBlock(Iter&: MBIter, EndIter, DeadInstructions); |
267 | |
268 | // Search back for a VCMP that can be folded to create a VPT, or else |
269 | // create a VPST directly |
270 | MachineInstrBuilder MIBuilder; |
271 | unsigned NewOpcode; |
272 | LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n" ); |
273 | if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { |
274 | LLVM_DEBUG(dbgs() << " folding VCMP into VPST: " ; VCMP->dump()); |
275 | MIBuilder = BuildMI(BB&: Block, I: MI, MIMD: DL, MCID: TII->get(Opcode: NewOpcode)); |
276 | MIBuilder.addImm(Val: (uint64_t)BlockMask); |
277 | MIBuilder.add(MO: VCMP->getOperand(i: 1)); |
278 | MIBuilder.add(MO: VCMP->getOperand(i: 2)); |
279 | MIBuilder.add(MO: VCMP->getOperand(i: 3)); |
280 | |
281 | // We need to remove any kill flags between the original VCMP and the new |
282 | // insertion point. |
283 | for (MachineInstr &MII : |
284 | make_range(x: VCMP->getIterator(), y: MI->getIterator())) { |
285 | MII.clearRegisterKills(Reg: VCMP->getOperand(i: 1).getReg(), RegInfo: TRI); |
286 | MII.clearRegisterKills(Reg: VCMP->getOperand(i: 2).getReg(), RegInfo: TRI); |
287 | } |
288 | |
289 | VCMP->eraseFromParent(); |
290 | } else { |
291 | MIBuilder = BuildMI(BB&: Block, I: MI, MIMD: DL, MCID: TII->get(Opcode: ARM::MVE_VPST)); |
292 | MIBuilder.addImm(Val: (uint64_t)BlockMask); |
293 | } |
294 | |
295 | // Erase all dead instructions (VPNOT's). Do that now so that they do not |
296 | // mess with the bundle creation. |
297 | for (MachineInstr *DeadMI : DeadInstructions) |
298 | DeadMI->eraseFromParent(); |
299 | DeadInstructions.clear(); |
300 | |
301 | finalizeBundle( |
302 | MBB&: Block, FirstMI: MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), LastMI: MBIter); |
303 | |
304 | Modified = true; |
305 | } |
306 | |
307 | return Modified; |
308 | } |
309 | |
310 | bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { |
311 | const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); |
312 | |
313 | if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) |
314 | return false; |
315 | |
316 | TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); |
317 | TRI = STI.getRegisterInfo(); |
318 | |
319 | LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" |
320 | << "********** Function: " << Fn.getName() << '\n'); |
321 | |
322 | bool Modified = false; |
323 | for (MachineBasicBlock &MBB : Fn) |
324 | Modified |= InsertVPTBlocks(Block&: MBB); |
325 | |
326 | LLVM_DEBUG(dbgs() << "**************************************\n" ); |
327 | return Modified; |
328 | } |
329 | |
330 | /// createMVEVPTBlock - Returns an instance of the MVE VPT block |
331 | /// insertion pass. |
332 | FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } |
333 | |