1 | //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ARM.h" |
10 | #include "ARMMachineFunctionInfo.h" |
11 | #include "ARMSubtarget.h" |
12 | #include "MCTargetDesc/ARMBaseInfo.h" |
13 | #include "Thumb2InstrInfo.h" |
14 | #include "llvm/ADT/SmallVector.h" |
15 | #include "llvm/ADT/Statistic.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFunction.h" |
19 | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineInstrBundle.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/IR/DebugLoc.h" |
25 | #include "llvm/MC/MCInstrDesc.h" |
26 | #include "llvm/MC/MCRegisterInfo.h" |
27 | #include "llvm/Support/Debug.h" |
28 | #include <cassert> |
29 | #include <new> |
30 | |
31 | using namespace llvm; |
32 | |
33 | #define DEBUG_TYPE "arm-mve-vpt" |
34 | |
35 | namespace { |
36 | class MVEVPTBlock : public MachineFunctionPass { |
37 | public: |
38 | static char ID; |
39 | const Thumb2InstrInfo *TII; |
40 | const TargetRegisterInfo *TRI; |
41 | |
42 | MVEVPTBlock() : MachineFunctionPass(ID) {} |
43 | |
44 | bool runOnMachineFunction(MachineFunction &Fn) override; |
45 | |
46 | MachineFunctionProperties getRequiredProperties() const override { |
47 | return MachineFunctionProperties().set( |
48 | MachineFunctionProperties::Property::NoVRegs); |
49 | } |
50 | |
51 | StringRef getPassName() const override { |
52 | return "MVE VPT block insertion pass" ; |
53 | } |
54 | |
55 | private: |
56 | bool InsertVPTBlocks(MachineBasicBlock &MBB); |
57 | }; |
58 | |
59 | char MVEVPTBlock::ID = 0; |
60 | |
61 | } // end anonymous namespace |
62 | |
63 | INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass" , false, false) |
64 | |
65 | static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, |
66 | const TargetRegisterInfo *TRI, |
67 | unsigned &NewOpcode) { |
68 | // Search backwards to the instruction that defines VPR. This may or not |
69 | // be a VCMP, we check that after this loop. If we find another instruction |
70 | // that reads cpsr, we return nullptr. |
71 | MachineBasicBlock::iterator CmpMI = MI; |
72 | while (CmpMI != MI->getParent()->begin()) { |
73 | --CmpMI; |
74 | if (CmpMI->modifiesRegister(Reg: ARM::VPR, TRI)) |
75 | break; |
76 | if (CmpMI->readsRegister(Reg: ARM::VPR, TRI)) |
77 | break; |
78 | } |
79 | |
80 | if (CmpMI == MI) |
81 | return nullptr; |
82 | NewOpcode = VCMPOpcodeToVPT(Opcode: CmpMI->getOpcode()); |
83 | if (NewOpcode == 0) |
84 | return nullptr; |
85 | |
86 | // Search forward from CmpMI to MI, checking if either register was def'd |
87 | if (registerDefinedBetween(Reg: CmpMI->getOperand(i: 1).getReg(), From: std::next(x: CmpMI), |
88 | To: MI, TRI)) |
89 | return nullptr; |
90 | if (registerDefinedBetween(Reg: CmpMI->getOperand(i: 2).getReg(), From: std::next(x: CmpMI), |
91 | To: MI, TRI)) |
92 | return nullptr; |
93 | return &*CmpMI; |
94 | } |
95 | |
96 | // Advances Iter past a block of predicated instructions. |
97 | // Returns true if it successfully skipped the whole block of predicated |
98 | // instructions. Returns false when it stopped early (due to MaxSteps), or if |
99 | // Iter didn't point to a predicated instruction. |
100 | static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, |
101 | MachineBasicBlock::instr_iterator EndIter, |
102 | unsigned MaxSteps, |
103 | unsigned &) { |
104 | ARMVCC::VPTCodes NextPred = ARMVCC::None; |
105 | Register PredReg; |
106 | NumInstrsSteppedOver = 0; |
107 | |
108 | while (Iter != EndIter) { |
109 | if (Iter->isDebugInstr()) { |
110 | // Skip debug instructions |
111 | ++Iter; |
112 | continue; |
113 | } |
114 | |
115 | NextPred = getVPTInstrPredicate(MI: *Iter, PredReg); |
116 | assert(NextPred != ARMVCC::Else && |
117 | "VPT block pass does not expect Else preds" ); |
118 | if (NextPred == ARMVCC::None || MaxSteps == 0) |
119 | break; |
120 | --MaxSteps; |
121 | ++Iter; |
122 | ++NumInstrsSteppedOver; |
123 | }; |
124 | |
125 | return NumInstrsSteppedOver != 0 && |
126 | (NextPred == ARMVCC::None || Iter == EndIter); |
127 | } |
128 | |
129 | // Returns true if at least one instruction in the range [Iter, End) defines |
130 | // or kills VPR. |
131 | static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, |
132 | MachineBasicBlock::iterator End) { |
133 | for (; Iter != End; ++Iter) |
134 | if (Iter->definesRegister(Reg: ARM::VPR, /*TRI=*/nullptr) || |
135 | Iter->killsRegister(Reg: ARM::VPR, /*TRI=*/nullptr)) |
136 | return true; |
137 | return false; |
138 | } |
139 | |
140 | // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. |
141 | static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { |
142 | switch (BlockSize) { |
143 | case 1: |
144 | return ARM::PredBlockMask::T; |
145 | case 2: |
146 | return ARM::PredBlockMask::TT; |
147 | case 3: |
148 | return ARM::PredBlockMask::TTT; |
149 | case 4: |
150 | return ARM::PredBlockMask::TTTT; |
151 | default: |
152 | llvm_unreachable("Invalid BlockSize!" ); |
153 | } |
154 | } |
155 | |
156 | // Given an iterator (Iter) that points at an instruction with a "Then" |
157 | // predicate, tries to create the largest block of continuous predicated |
158 | // instructions possible, and returns the VPT Block Mask of that block. |
159 | // |
160 | // This will try to perform some minor optimization in order to maximize the |
161 | // size of the block. |
162 | static ARM::PredBlockMask |
163 | CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, |
164 | MachineBasicBlock::instr_iterator EndIter, |
165 | SmallVectorImpl<MachineInstr *> &DeadInstructions) { |
166 | MachineBasicBlock::instr_iterator BlockBeg = Iter; |
167 | (void)BlockBeg; |
168 | assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
169 | "Expected a Predicated Instruction" ); |
170 | |
171 | LLVM_DEBUG(dbgs() << "VPT block created for: " ; Iter->dump()); |
172 | |
173 | unsigned BlockSize; |
174 | StepOverPredicatedInstrs(Iter, EndIter, MaxSteps: 4, NumInstrsSteppedOver&: BlockSize); |
175 | |
176 | LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = |
177 | std::next(BlockBeg); |
178 | AddedInstIter != Iter; ++AddedInstIter) { |
179 | if (AddedInstIter->isDebugInstr()) |
180 | continue; |
181 | dbgs() << " adding: " ; |
182 | AddedInstIter->dump(); |
183 | }); |
184 | |
185 | // Generate the initial BlockMask |
186 | ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); |
187 | |
188 | // Remove VPNOTs while there's still room in the block, so we can make the |
189 | // largest block possible. |
190 | ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; |
191 | while (BlockSize < 4 && Iter != EndIter && |
192 | Iter->getOpcode() == ARM::MVE_VPNOT) { |
193 | |
194 | // Try to skip all of the predicated instructions after the VPNOT, stopping |
195 | // after (4 - BlockSize). If we can't skip them all, stop. |
196 | unsigned ElseInstCnt = 0; |
197 | MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(x: Iter); |
198 | if (!StepOverPredicatedInstrs(Iter&: VPNOTBlockEndIter, EndIter, MaxSteps: (4 - BlockSize), |
199 | NumInstrsSteppedOver&: ElseInstCnt)) |
200 | break; |
201 | |
202 | // Check if this VPNOT can be removed or not: It can only be removed if at |
203 | // least one of the predicated instruction that follows it kills or sets |
204 | // VPR. |
205 | if (!IsVPRDefinedOrKilledByBlock(Iter, End: VPNOTBlockEndIter)) |
206 | break; |
207 | |
208 | LLVM_DEBUG(dbgs() << " removing VPNOT: " ; Iter->dump()); |
209 | |
210 | // Record the new size of the block |
211 | BlockSize += ElseInstCnt; |
212 | assert(BlockSize <= 4 && "Block is too large!" ); |
213 | |
214 | // Record the VPNot to remove it later. |
215 | DeadInstructions.push_back(Elt: &*Iter); |
216 | ++Iter; |
217 | |
218 | // Replace the predicates of the instructions we're adding. |
219 | // Note that we are using "Iter" to iterate over the block so we can update |
220 | // it at the same time. |
221 | for (; Iter != VPNOTBlockEndIter; ++Iter) { |
222 | if (Iter->isDebugInstr()) |
223 | continue; |
224 | |
225 | // Find the register in which the predicate is |
226 | int OpIdx = findFirstVPTPredOperandIdx(MI: *Iter); |
227 | assert(OpIdx != -1); |
228 | |
229 | // Change the predicate and update the mask |
230 | Iter->getOperand(i: OpIdx).setImm(CurrentPredicate); |
231 | BlockMask = expandPredBlockMask(BlockMask, Kind: CurrentPredicate); |
232 | |
233 | LLVM_DEBUG(dbgs() << " adding : " ; Iter->dump()); |
234 | } |
235 | |
236 | CurrentPredicate = |
237 | (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); |
238 | } |
239 | return BlockMask; |
240 | } |
241 | |
242 | bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { |
243 | bool Modified = false; |
244 | MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); |
245 | MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); |
246 | |
247 | SmallVector<MachineInstr *, 4> DeadInstructions; |
248 | |
249 | while (MBIter != EndIter) { |
250 | MachineInstr *MI = &*MBIter; |
251 | Register PredReg; |
252 | DebugLoc DL = MI->getDebugLoc(); |
253 | |
254 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(MI: *MI, PredReg); |
255 | |
256 | // The idea of the predicate is that None, Then and Else are for use when |
257 | // handling assembly language: they correspond to the three possible |
258 | // suffixes "", "t" and "e" on the mnemonic. So when instructions are read |
259 | // from assembly source or disassembled from object code, you expect to |
260 | // see a mixture whenever there's a long VPT block. But in code |
261 | // generation, we hope we'll never generate an Else as input to this pass. |
262 | assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds" ); |
263 | |
264 | if (Pred == ARMVCC::None) { |
265 | ++MBIter; |
266 | continue; |
267 | } |
268 | |
269 | ARM::PredBlockMask BlockMask = |
270 | CreateVPTBlock(Iter&: MBIter, EndIter, DeadInstructions); |
271 | |
272 | // Search back for a VCMP that can be folded to create a VPT, or else |
273 | // create a VPST directly |
274 | MachineInstrBuilder MIBuilder; |
275 | unsigned NewOpcode; |
276 | LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n" ); |
277 | if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { |
278 | LLVM_DEBUG(dbgs() << " folding VCMP into VPST: " ; VCMP->dump()); |
279 | MIBuilder = BuildMI(BB&: Block, I: MI, MIMD: DL, MCID: TII->get(Opcode: NewOpcode)); |
280 | MIBuilder.addImm(Val: (uint64_t)BlockMask); |
281 | MIBuilder.add(MO: VCMP->getOperand(i: 1)); |
282 | MIBuilder.add(MO: VCMP->getOperand(i: 2)); |
283 | MIBuilder.add(MO: VCMP->getOperand(i: 3)); |
284 | |
285 | // We need to remove any kill flags between the original VCMP and the new |
286 | // insertion point. |
287 | for (MachineInstr &MII : |
288 | make_range(x: VCMP->getIterator(), y: MI->getIterator())) { |
289 | MII.clearRegisterKills(Reg: VCMP->getOperand(i: 1).getReg(), RegInfo: TRI); |
290 | MII.clearRegisterKills(Reg: VCMP->getOperand(i: 2).getReg(), RegInfo: TRI); |
291 | } |
292 | |
293 | VCMP->eraseFromParent(); |
294 | } else { |
295 | MIBuilder = BuildMI(BB&: Block, I: MI, MIMD: DL, MCID: TII->get(Opcode: ARM::MVE_VPST)); |
296 | MIBuilder.addImm(Val: (uint64_t)BlockMask); |
297 | } |
298 | |
299 | // Erase all dead instructions (VPNOT's). Do that now so that they do not |
300 | // mess with the bundle creation. |
301 | for (MachineInstr *DeadMI : DeadInstructions) |
302 | DeadMI->eraseFromParent(); |
303 | DeadInstructions.clear(); |
304 | |
305 | finalizeBundle( |
306 | MBB&: Block, FirstMI: MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), LastMI: MBIter); |
307 | |
308 | Modified = true; |
309 | } |
310 | |
311 | return Modified; |
312 | } |
313 | |
314 | bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { |
315 | const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); |
316 | |
317 | if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) |
318 | return false; |
319 | |
320 | TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); |
321 | TRI = STI.getRegisterInfo(); |
322 | |
323 | LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" |
324 | << "********** Function: " << Fn.getName() << '\n'); |
325 | |
326 | bool Modified = false; |
327 | for (MachineBasicBlock &MBB : Fn) |
328 | Modified |= InsertVPTBlocks(Block&: MBB); |
329 | |
330 | LLVM_DEBUG(dbgs() << "**************************************\n" ); |
331 | return Modified; |
332 | } |
333 | |
334 | /// createMVEVPTBlock - Returns an instance of the MVE VPT block |
335 | /// insertion pass. |
336 | FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } |
337 | |