1 | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "ARMBaseInstrInfo.h" |
14 | #include "ARMBaseRegisterInfo.h" |
15 | #include "ARMConstantPoolValue.h" |
16 | #include "ARMFeatures.h" |
17 | #include "ARMHazardRecognizer.h" |
18 | #include "ARMMachineFunctionInfo.h" |
19 | #include "ARMSubtarget.h" |
20 | #include "MCTargetDesc/ARMAddressingModes.h" |
21 | #include "MCTargetDesc/ARMBaseInfo.h" |
22 | #include "MVETailPredUtils.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/STLExtras.h" |
25 | #include "llvm/ADT/SmallSet.h" |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/CodeGen/CFIInstBuilder.h" |
28 | #include "llvm/CodeGen/DFAPacketizer.h" |
29 | #include "llvm/CodeGen/LiveVariables.h" |
30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | #include "llvm/CodeGen/MachineConstantPool.h" |
32 | #include "llvm/CodeGen/MachineFrameInfo.h" |
33 | #include "llvm/CodeGen/MachineFunction.h" |
34 | #include "llvm/CodeGen/MachineInstr.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineModuleInfo.h" |
38 | #include "llvm/CodeGen/MachineOperand.h" |
39 | #include "llvm/CodeGen/MachinePipeliner.h" |
40 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
41 | #include "llvm/CodeGen/MachineScheduler.h" |
42 | #include "llvm/CodeGen/MultiHazardRecognizer.h" |
43 | #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" |
44 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
45 | #include "llvm/CodeGen/TargetInstrInfo.h" |
46 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
47 | #include "llvm/CodeGen/TargetSchedule.h" |
48 | #include "llvm/IR/Attributes.h" |
49 | #include "llvm/IR/DebugLoc.h" |
50 | #include "llvm/IR/Function.h" |
51 | #include "llvm/IR/GlobalValue.h" |
52 | #include "llvm/IR/Module.h" |
53 | #include "llvm/MC/MCAsmInfo.h" |
54 | #include "llvm/MC/MCInstrDesc.h" |
55 | #include "llvm/MC/MCInstrItineraries.h" |
56 | #include "llvm/Support/BranchProbability.h" |
57 | #include "llvm/Support/Casting.h" |
58 | #include "llvm/Support/Compiler.h" |
59 | #include "llvm/Support/Debug.h" |
60 | #include "llvm/Support/ErrorHandling.h" |
61 | #include "llvm/Support/raw_ostream.h" |
62 | #include "llvm/Target/TargetMachine.h" |
63 | #include <algorithm> |
64 | #include <cassert> |
65 | #include <cstdint> |
66 | #include <iterator> |
67 | #include <new> |
68 | #include <utility> |
69 | #include <vector> |
70 | |
71 | using namespace llvm; |
72 | |
73 | #define DEBUG_TYPE "arm-instrinfo" |
74 | |
75 | #define GET_INSTRINFO_CTOR_DTOR |
76 | #include "ARMGenInstrInfo.inc" |
77 | |
78 | /// ARM_MLxEntry - Record information about MLA / MLS instructions. |
79 | struct ARM_MLxEntry { |
80 | uint16_t MLxOpc; // MLA / MLS opcode |
81 | uint16_t MulOpc; // Expanded multiplication opcode |
82 | uint16_t AddSubOpc; // Expanded add / sub opcode |
83 | bool NegAcc; // True if the acc is negated before the add / sub. |
84 | bool HasLane; // True if instruction has an extra "lane" operand. |
85 | }; |
86 | |
87 | static const ARM_MLxEntry ARM_MLxTable[] = { |
88 | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane |
89 | // fp scalar ops |
90 | { .MLxOpc: ARM::VMLAS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VADDS, .NegAcc: false, .HasLane: false }, |
91 | { .MLxOpc: ARM::VMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: false, .HasLane: false }, |
92 | { .MLxOpc: ARM::VMLAD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VADDD, .NegAcc: false, .HasLane: false }, |
93 | { .MLxOpc: ARM::VMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: false, .HasLane: false }, |
94 | { .MLxOpc: ARM::VNMLAS, .MulOpc: ARM::VNMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false }, |
95 | { .MLxOpc: ARM::VNMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false }, |
96 | { .MLxOpc: ARM::VNMLAD, .MulOpc: ARM::VNMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false }, |
97 | { .MLxOpc: ARM::VNMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false }, |
98 | |
99 | // fp SIMD ops |
100 | { .MLxOpc: ARM::VMLAfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: false }, |
101 | { .MLxOpc: ARM::VMLSfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: false }, |
102 | { .MLxOpc: ARM::VMLAfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: false }, |
103 | { .MLxOpc: ARM::VMLSfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: false }, |
104 | { .MLxOpc: ARM::VMLAslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: true }, |
105 | { .MLxOpc: ARM::VMLSslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: true }, |
106 | { .MLxOpc: ARM::VMLAslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: true }, |
107 | { .MLxOpc: ARM::VMLSslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: true }, |
108 | }; |
109 | |
110 | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) |
111 | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), |
112 | Subtarget(STI) { |
113 | for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { |
114 | if (!MLxEntryMap.insert(KV: std::make_pair(x: ARM_MLxTable[i].MLxOpc, y&: i)).second) |
115 | llvm_unreachable("Duplicated entries?" ); |
116 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].AddSubOpc); |
117 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].MulOpc); |
118 | } |
119 | } |
120 | |
121 | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl |
122 | // currently defaults to no prepass hazard recognizer. |
123 | ScheduleHazardRecognizer * |
124 | ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
125 | const ScheduleDAG *DAG) const { |
126 | if (usePreRAHazardRecognizer()) { |
127 | const InstrItineraryData *II = |
128 | static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); |
129 | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched" ); |
130 | } |
131 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
132 | } |
133 | |
134 | // Called during: |
135 | // - pre-RA scheduling |
136 | // - post-RA scheduling when FeatureUseMISched is set |
137 | ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( |
138 | const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { |
139 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
140 | |
141 | // We would like to restrict this hazard recognizer to only |
142 | // post-RA scheduling; we can tell that we're post-RA because we don't |
143 | // track VRegLiveness. |
144 | // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM |
145 | // banks banked on bit 2. Assume that TCMs are in use. |
146 | if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) |
147 | MHR->AddHazardRecognizer( |
148 | std::make_unique<ARMBankConflictHazardRecognizer>(args&: DAG, args: 0x4, args: true)); |
149 | |
150 | // Not inserting ARMHazardRecognizerFPMLx because that would change |
151 | // legacy behavior |
152 | |
153 | auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); |
154 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
155 | return MHR; |
156 | } |
157 | |
158 | // Called during post-RA scheduling when FeatureUseMISched is not set |
159 | ScheduleHazardRecognizer *ARMBaseInstrInfo:: |
160 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
161 | const ScheduleDAG *DAG) const { |
162 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
163 | |
164 | if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) |
165 | MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>()); |
166 | |
167 | auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
168 | if (BHR) |
169 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
170 | return MHR; |
171 | } |
172 | |
173 | // Branch analysis. |
174 | // Cond vector output format: |
175 | // 0 elements indicates an unconditional branch |
176 | // 2 elements indicates a conditional branch; the elements are |
177 | // the condition to check and the CPSR. |
178 | // 3 elements indicates a hardware loop end; the elements |
179 | // are the opcode, the operand value to test, and a dummy |
180 | // operand used to pad out to 3 operands. |
181 | bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
182 | MachineBasicBlock *&TBB, |
183 | MachineBasicBlock *&FBB, |
184 | SmallVectorImpl<MachineOperand> &Cond, |
185 | bool AllowModify) const { |
186 | TBB = nullptr; |
187 | FBB = nullptr; |
188 | |
189 | MachineBasicBlock::instr_iterator I = MBB.instr_end(); |
190 | if (I == MBB.instr_begin()) |
191 | return false; // Empty blocks are easy. |
192 | --I; |
193 | |
194 | // Walk backwards from the end of the basic block until the branch is |
195 | // analyzed or we give up. |
196 | while (isPredicated(MI: *I) || I->isTerminator() || I->isDebugValue()) { |
197 | // Flag to be raised on unanalyzeable instructions. This is useful in cases |
198 | // where we want to clean up on the end of the basic block before we bail |
199 | // out. |
200 | bool CantAnalyze = false; |
201 | |
202 | // Skip over DEBUG values, predicated nonterminators and speculation |
203 | // barrier terminators. |
204 | while (I->isDebugInstr() || !I->isTerminator() || |
205 | isSpeculationBarrierEndBBOpcode(Opc: I->getOpcode()) || |
206 | I->getOpcode() == ARM::t2DoLoopStartTP){ |
207 | if (I == MBB.instr_begin()) |
208 | return false; |
209 | --I; |
210 | } |
211 | |
212 | if (isIndirectBranchOpcode(Opc: I->getOpcode()) || |
213 | isJumpTableBranchOpcode(Opc: I->getOpcode())) { |
214 | // Indirect branches and jump tables can't be analyzed, but we still want |
215 | // to clean up any instructions at the tail of the basic block. |
216 | CantAnalyze = true; |
217 | } else if (isUncondBranchOpcode(Opc: I->getOpcode())) { |
218 | TBB = I->getOperand(i: 0).getMBB(); |
219 | } else if (isCondBranchOpcode(Opc: I->getOpcode())) { |
220 | // Bail out if we encounter multiple conditional branches. |
221 | if (!Cond.empty()) |
222 | return true; |
223 | |
224 | assert(!FBB && "FBB should have been null." ); |
225 | FBB = TBB; |
226 | TBB = I->getOperand(i: 0).getMBB(); |
227 | Cond.push_back(Elt: I->getOperand(i: 1)); |
228 | Cond.push_back(Elt: I->getOperand(i: 2)); |
229 | } else if (I->isReturn()) { |
230 | // Returns can't be analyzed, but we should run cleanup. |
231 | CantAnalyze = true; |
232 | } else if (I->getOpcode() == ARM::t2LoopEnd && |
233 | MBB.getParent() |
234 | ->getSubtarget<ARMSubtarget>() |
235 | .enableMachinePipeliner()) { |
236 | if (!Cond.empty()) |
237 | return true; |
238 | FBB = TBB; |
239 | TBB = I->getOperand(i: 1).getMBB(); |
240 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: I->getOpcode())); |
241 | Cond.push_back(Elt: I->getOperand(i: 0)); |
242 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: 0)); |
243 | } else { |
244 | // We encountered other unrecognized terminator. Bail out immediately. |
245 | return true; |
246 | } |
247 | |
248 | // Cleanup code - to be run for unpredicated unconditional branches and |
249 | // returns. |
250 | if (!isPredicated(MI: *I) && |
251 | (isUncondBranchOpcode(Opc: I->getOpcode()) || |
252 | isIndirectBranchOpcode(Opc: I->getOpcode()) || |
253 | isJumpTableBranchOpcode(Opc: I->getOpcode()) || |
254 | I->isReturn())) { |
255 | // Forget any previous condition branch information - it no longer applies. |
256 | Cond.clear(); |
257 | FBB = nullptr; |
258 | |
259 | // If we can modify the function, delete everything below this |
260 | // unconditional branch. |
261 | if (AllowModify) { |
262 | MachineBasicBlock::iterator DI = std::next(x: I); |
263 | while (DI != MBB.instr_end()) { |
264 | MachineInstr &InstToDelete = *DI; |
265 | ++DI; |
266 | // Speculation barriers must not be deleted. |
267 | if (isSpeculationBarrierEndBBOpcode(Opc: InstToDelete.getOpcode())) |
268 | continue; |
269 | InstToDelete.eraseFromParent(); |
270 | } |
271 | } |
272 | } |
273 | |
274 | if (CantAnalyze) { |
275 | // We may not be able to analyze the block, but we could still have |
276 | // an unconditional branch as the last instruction in the block, which |
277 | // just branches to layout successor. If this is the case, then just |
278 | // remove it if we're allowed to make modifications. |
279 | if (AllowModify && !isPredicated(MI: MBB.back()) && |
280 | isUncondBranchOpcode(Opc: MBB.back().getOpcode()) && |
281 | TBB && MBB.isLayoutSuccessor(MBB: TBB)) |
282 | removeBranch(MBB); |
283 | return true; |
284 | } |
285 | |
286 | if (I == MBB.instr_begin()) |
287 | return false; |
288 | |
289 | --I; |
290 | } |
291 | |
292 | // We made it past the terminators without bailing out - we must have |
293 | // analyzed this branch successfully. |
294 | return false; |
295 | } |
296 | |
297 | unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, |
298 | int *BytesRemoved) const { |
299 | assert(!BytesRemoved && "code size not handled" ); |
300 | |
301 | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
302 | if (I == MBB.end()) |
303 | return 0; |
304 | |
305 | if (!isUncondBranchOpcode(Opc: I->getOpcode()) && |
306 | !isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
307 | return 0; |
308 | |
309 | // Remove the branch. |
310 | I->eraseFromParent(); |
311 | |
312 | I = MBB.end(); |
313 | |
314 | if (I == MBB.begin()) return 1; |
315 | --I; |
316 | if (!isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
317 | return 1; |
318 | |
319 | // Remove the branch. |
320 | I->eraseFromParent(); |
321 | return 2; |
322 | } |
323 | |
324 | unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, |
325 | MachineBasicBlock *TBB, |
326 | MachineBasicBlock *FBB, |
327 | ArrayRef<MachineOperand> Cond, |
328 | const DebugLoc &DL, |
329 | int *BytesAdded) const { |
330 | assert(!BytesAdded && "code size not handled" ); |
331 | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); |
332 | int BOpc = !AFI->isThumbFunction() |
333 | ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); |
334 | int BccOpc = !AFI->isThumbFunction() |
335 | ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); |
336 | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); |
337 | |
338 | // Shouldn't be a fall through. |
339 | assert(TBB && "insertBranch must not be told to insert a fallthrough" ); |
340 | assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && |
341 | "ARM branch conditions have two or three components!" ); |
342 | |
343 | // For conditional branches, we use addOperand to preserve CPSR flags. |
344 | |
345 | if (!FBB) { |
346 | if (Cond.empty()) { // Unconditional branch? |
347 | if (isThumb) |
348 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB).add(MOs: predOps(Pred: ARMCC::AL)); |
349 | else |
350 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB); |
351 | } else if (Cond.size() == 2) { |
352 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc)) |
353 | .addMBB(MBB: TBB) |
354 | .addImm(Val: Cond[0].getImm()) |
355 | .add(MO: Cond[1]); |
356 | } else |
357 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB); |
358 | return 1; |
359 | } |
360 | |
361 | // Two-way conditional branch. |
362 | if (Cond.size() == 2) |
363 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc)) |
364 | .addMBB(MBB: TBB) |
365 | .addImm(Val: Cond[0].getImm()) |
366 | .add(MO: Cond[1]); |
367 | else if (Cond.size() == 3) |
368 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB); |
369 | if (isThumb) |
370 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB).add(MOs: predOps(Pred: ARMCC::AL)); |
371 | else |
372 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB); |
373 | return 2; |
374 | } |
375 | |
376 | bool ARMBaseInstrInfo:: |
377 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
378 | if (Cond.size() == 2) { |
379 | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); |
380 | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); |
381 | return false; |
382 | } |
383 | return true; |
384 | } |
385 | |
386 | bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { |
387 | if (MI.isBundle()) { |
388 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
389 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
390 | while (++I != E && I->isInsideBundle()) { |
391 | int PIdx = I->findFirstPredOperandIdx(); |
392 | if (PIdx != -1 && I->getOperand(i: PIdx).getImm() != ARMCC::AL) |
393 | return true; |
394 | } |
395 | return false; |
396 | } |
397 | |
398 | int PIdx = MI.findFirstPredOperandIdx(); |
399 | return PIdx != -1 && MI.getOperand(i: PIdx).getImm() != ARMCC::AL; |
400 | } |
401 | |
402 | std::string ARMBaseInstrInfo::createMIROperandComment( |
403 | const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, |
404 | const TargetRegisterInfo *TRI) const { |
405 | |
406 | // First, let's see if there is a generic comment for this operand |
407 | std::string = |
408 | TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); |
409 | if (!GenericComment.empty()) |
410 | return GenericComment; |
411 | |
412 | // If not, check if we have an immediate operand. |
413 | if (!Op.isImm()) |
414 | return std::string(); |
415 | |
416 | // And print its corresponding condition code if the immediate is a |
417 | // predicate. |
418 | int FirstPredOp = MI.findFirstPredOperandIdx(); |
419 | if (FirstPredOp != (int) OpIdx) |
420 | return std::string(); |
421 | |
422 | std::string CC = "CC::" ; |
423 | CC += ARMCondCodeToString(CC: (ARMCC::CondCodes)Op.getImm()); |
424 | return CC; |
425 | } |
426 | |
427 | bool ARMBaseInstrInfo::PredicateInstruction( |
428 | MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { |
429 | unsigned Opc = MI.getOpcode(); |
430 | if (isUncondBranchOpcode(Opc)) { |
431 | MI.setDesc(get(Opcode: getMatchingCondBranchOpcode(Opc))); |
432 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
433 | .addImm(Val: Pred[0].getImm()) |
434 | .addReg(RegNo: Pred[1].getReg()); |
435 | return true; |
436 | } |
437 | |
438 | int PIdx = MI.findFirstPredOperandIdx(); |
439 | if (PIdx != -1) { |
440 | MachineOperand &PMO = MI.getOperand(i: PIdx); |
441 | PMO.setImm(Pred[0].getImm()); |
442 | MI.getOperand(i: PIdx+1).setReg(Pred[1].getReg()); |
443 | |
444 | // Thumb 1 arithmetic instructions do not set CPSR when executed inside an |
445 | // IT block. This affects how they are printed. |
446 | const MCInstrDesc &MCID = MI.getDesc(); |
447 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { |
448 | assert(MCID.operands()[1].isOptionalDef() && |
449 | "CPSR def isn't expected operand" ); |
450 | assert((MI.getOperand(1).isDead() || |
451 | MI.getOperand(1).getReg() != ARM::CPSR) && |
452 | "if conversion tried to stop defining used CPSR" ); |
453 | MI.getOperand(i: 1).setReg(ARM::NoRegister); |
454 | } |
455 | |
456 | return true; |
457 | } |
458 | return false; |
459 | } |
460 | |
461 | bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
462 | ArrayRef<MachineOperand> Pred2) const { |
463 | if (Pred1.size() > 2 || Pred2.size() > 2) |
464 | return false; |
465 | |
466 | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); |
467 | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); |
468 | if (CC1 == CC2) |
469 | return true; |
470 | |
471 | switch (CC1) { |
472 | default: |
473 | return false; |
474 | case ARMCC::AL: |
475 | return true; |
476 | case ARMCC::HS: |
477 | return CC2 == ARMCC::HI; |
478 | case ARMCC::LS: |
479 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; |
480 | case ARMCC::GE: |
481 | return CC2 == ARMCC::GT; |
482 | case ARMCC::LE: |
483 | return CC2 == ARMCC::LT; |
484 | } |
485 | } |
486 | |
487 | bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, |
488 | std::vector<MachineOperand> &Pred, |
489 | bool SkipDead) const { |
490 | bool Found = false; |
491 | for (const MachineOperand &MO : MI.operands()) { |
492 | bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR); |
493 | bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR; |
494 | if (ClobbersCPSR || IsCPSR) { |
495 | |
496 | // Filter out T1 instructions that have a dead CPSR, |
497 | // allowing IT blocks to be generated containing T1 instructions |
498 | const MCInstrDesc &MCID = MI.getDesc(); |
499 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() && |
500 | SkipDead) |
501 | continue; |
502 | |
503 | Pred.push_back(x: MO); |
504 | Found = true; |
505 | } |
506 | } |
507 | |
508 | return Found; |
509 | } |
510 | |
511 | bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { |
512 | for (const auto &MO : MI.operands()) |
513 | if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) |
514 | return true; |
515 | return false; |
516 | } |
517 | |
518 | static bool isEligibleForITBlock(const MachineInstr *MI) { |
519 | switch (MI->getOpcode()) { |
520 | default: return true; |
521 | case ARM::tADC: // ADC (register) T1 |
522 | case ARM::tADDi3: // ADD (immediate) T1 |
523 | case ARM::tADDi8: // ADD (immediate) T2 |
524 | case ARM::tADDrr: // ADD (register) T1 |
525 | case ARM::tAND: // AND (register) T1 |
526 | case ARM::tASRri: // ASR (immediate) T1 |
527 | case ARM::tASRrr: // ASR (register) T1 |
528 | case ARM::tBIC: // BIC (register) T1 |
529 | case ARM::tEOR: // EOR (register) T1 |
530 | case ARM::tLSLri: // LSL (immediate) T1 |
531 | case ARM::tLSLrr: // LSL (register) T1 |
532 | case ARM::tLSRri: // LSR (immediate) T1 |
533 | case ARM::tLSRrr: // LSR (register) T1 |
534 | case ARM::tMUL: // MUL T1 |
535 | case ARM::tMVN: // MVN (register) T1 |
536 | case ARM::tORR: // ORR (register) T1 |
537 | case ARM::tROR: // ROR (register) T1 |
538 | case ARM::tRSB: // RSB (immediate) T1 |
539 | case ARM::tSBC: // SBC (register) T1 |
540 | case ARM::tSUBi3: // SUB (immediate) T1 |
541 | case ARM::tSUBi8: // SUB (immediate) T2 |
542 | case ARM::tSUBrr: // SUB (register) T1 |
543 | return !ARMBaseInstrInfo::isCPSRDefined(MI: *MI); |
544 | } |
545 | } |
546 | |
547 | /// isPredicable - Return true if the specified instruction can be predicated. |
548 | /// By default, this returns true for every instruction with a |
549 | /// PredicateOperand. |
550 | bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { |
551 | if (!MI.isPredicable()) |
552 | return false; |
553 | |
554 | if (MI.isBundle()) |
555 | return false; |
556 | |
557 | if (!isEligibleForITBlock(MI: &MI)) |
558 | return false; |
559 | |
560 | const MachineFunction *MF = MI.getParent()->getParent(); |
561 | const ARMFunctionInfo *AFI = |
562 | MF->getInfo<ARMFunctionInfo>(); |
563 | |
564 | // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. |
565 | // In their ARM encoding, they can't be encoded in a conditional form. |
566 | if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) |
567 | return false; |
568 | |
569 | // Make indirect control flow changes unpredicable when SLS mitigation is |
570 | // enabled. |
571 | const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>(); |
572 | if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI)) |
573 | return false; |
574 | if (ST.hardenSlsBlr() && isIndirectCall(MI)) |
575 | return false; |
576 | |
577 | if (AFI->isThumb2Function()) { |
578 | if (getSubtarget().restrictIT()) |
579 | return isV8EligibleForIT(Instr: &MI); |
580 | } |
581 | |
582 | return true; |
583 | } |
584 | |
585 | namespace llvm { |
586 | |
587 | template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { |
588 | for (const MachineOperand &MO : MI->operands()) { |
589 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
590 | continue; |
591 | if (MO.getReg() != ARM::CPSR) |
592 | continue; |
593 | if (!MO.isDead()) |
594 | return false; |
595 | } |
596 | // all definitions of CPSR are dead |
597 | return true; |
598 | } |
599 | |
600 | } // end namespace llvm |
601 | |
602 | /// GetInstSize - Return the size of the specified MachineInstr. |
603 | /// |
604 | unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
605 | const MachineBasicBlock &MBB = *MI.getParent(); |
606 | const MachineFunction *MF = MBB.getParent(); |
607 | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); |
608 | |
609 | const MCInstrDesc &MCID = MI.getDesc(); |
610 | |
611 | switch (MI.getOpcode()) { |
612 | default: |
613 | // Return the size specified in .td file. If there's none, return 0, as we |
614 | // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2 |
615 | // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in |
616 | // contrast to AArch64 instructions which have a default size of 4 bytes for |
617 | // example. |
618 | return MCID.getSize(); |
619 | case TargetOpcode::BUNDLE: |
620 | return getInstBundleLength(MI); |
621 | case ARM::CONSTPOOL_ENTRY: |
622 | case ARM::JUMPTABLE_INSTS: |
623 | case ARM::JUMPTABLE_ADDRS: |
624 | case ARM::JUMPTABLE_TBB: |
625 | case ARM::JUMPTABLE_TBH: |
626 | // If this machine instr is a constant pool entry, its size is recorded as |
627 | // operand #2. |
628 | return MI.getOperand(i: 2).getImm(); |
629 | case ARM::SPACE: |
630 | return MI.getOperand(i: 1).getImm(); |
631 | case ARM::INLINEASM: |
632 | case ARM::INLINEASM_BR: { |
633 | // If this machine instr is an inline asm, measure it. |
634 | unsigned Size = getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(), MAI: *MAI); |
635 | if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) |
636 | Size = alignTo(Value: Size, Align: 4); |
637 | return Size; |
638 | } |
639 | } |
640 | } |
641 | |
642 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { |
643 | unsigned Size = 0; |
644 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
645 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
646 | while (++I != E && I->isInsideBundle()) { |
647 | assert(!I->isBundle() && "No nested bundle!" ); |
648 | Size += getInstSizeInBytes(MI: *I); |
649 | } |
650 | return Size; |
651 | } |
652 | |
653 | void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, |
654 | MachineBasicBlock::iterator I, |
655 | MCRegister DestReg, bool KillSrc, |
656 | const ARMSubtarget &Subtarget) const { |
657 | unsigned Opc = Subtarget.isThumb() |
658 | ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) |
659 | : ARM::MRS; |
660 | |
661 | MachineInstrBuilder MIB = |
662 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg); |
663 | |
664 | // There is only 1 A/R class MRS instruction, and it always refers to |
665 | // APSR. However, there are lots of other possibilities on M-class cores. |
666 | if (Subtarget.isMClass()) |
667 | MIB.addImm(Val: 0x800); |
668 | |
669 | MIB.add(MOs: predOps(Pred: ARMCC::AL)) |
670 | .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | getKillRegState(B: KillSrc)); |
671 | } |
672 | |
673 | void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, |
674 | MachineBasicBlock::iterator I, |
675 | MCRegister SrcReg, bool KillSrc, |
676 | const ARMSubtarget &Subtarget) const { |
677 | unsigned Opc = Subtarget.isThumb() |
678 | ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) |
679 | : ARM::MSR; |
680 | |
681 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc)); |
682 | |
683 | if (Subtarget.isMClass()) |
684 | MIB.addImm(Val: 0x800); |
685 | else |
686 | MIB.addImm(Val: 8); |
687 | |
688 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
689 | .add(MOs: predOps(Pred: ARMCC::AL)) |
690 | .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | RegState::Define); |
691 | } |
692 | |
693 | void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { |
694 | MIB.addImm(Val: ARMVCC::None); |
695 | MIB.addReg(RegNo: 0); |
696 | MIB.addReg(RegNo: 0); // tp_reg |
697 | } |
698 | |
699 | void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, |
700 | Register DestReg) { |
701 | addUnpredicatedMveVpredNOp(MIB); |
702 | MIB.addReg(RegNo: DestReg, flags: RegState::Undef); |
703 | } |
704 | |
705 | void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { |
706 | MIB.addImm(Val: Cond); |
707 | MIB.addReg(RegNo: ARM::VPR, flags: RegState::Implicit); |
708 | MIB.addReg(RegNo: 0); // tp_reg |
709 | } |
710 | |
711 | void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, |
712 | unsigned Cond, unsigned Inactive) { |
713 | addPredicatedMveVpredNOp(MIB, Cond); |
714 | MIB.addReg(RegNo: Inactive); |
715 | } |
716 | |
717 | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
718 | MachineBasicBlock::iterator I, |
719 | const DebugLoc &DL, Register DestReg, |
720 | Register SrcReg, bool KillSrc, |
721 | bool RenamableDest, |
722 | bool RenamableSrc) const { |
723 | bool GPRDest = ARM::GPRRegClass.contains(Reg: DestReg); |
724 | bool GPRSrc = ARM::GPRRegClass.contains(Reg: SrcReg); |
725 | |
726 | if (GPRDest && GPRSrc) { |
727 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MOVr), DestReg) |
728 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
729 | .add(MOs: predOps(Pred: ARMCC::AL)) |
730 | .add(MO: condCodeOp()); |
731 | return; |
732 | } |
733 | |
734 | bool SPRDest = ARM::SPRRegClass.contains(Reg: DestReg); |
735 | bool SPRSrc = ARM::SPRRegClass.contains(Reg: SrcReg); |
736 | |
737 | unsigned Opc = 0; |
738 | if (SPRDest && SPRSrc) |
739 | Opc = ARM::VMOVS; |
740 | else if (GPRDest && SPRSrc) |
741 | Opc = ARM::VMOVRS; |
742 | else if (SPRDest && GPRSrc) |
743 | Opc = ARM::VMOVSR; |
744 | else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && Subtarget.hasFP64()) |
745 | Opc = ARM::VMOVD; |
746 | else if (ARM::QPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) |
747 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy; |
748 | |
749 | if (Opc) { |
750 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Opc), DestReg); |
751 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
752 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) |
753 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
754 | if (Opc == ARM::MVE_VORR) |
755 | addUnpredicatedMveVpredROp(MIB, DestReg); |
756 | else if (Opc != ARM::MQPRCopy) |
757 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
758 | return; |
759 | } |
760 | |
761 | // Handle register classes that require multiple instructions. |
762 | unsigned BeginIdx = 0; |
763 | unsigned SubRegs = 0; |
764 | int Spacing = 1; |
765 | |
766 | // Use VORRq when possible. |
767 | if (ARM::QQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
768 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
769 | BeginIdx = ARM::qsub_0; |
770 | SubRegs = 2; |
771 | } else if (ARM::QQQQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
772 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
773 | BeginIdx = ARM::qsub_0; |
774 | SubRegs = 4; |
775 | // Fall back to VMOVD. |
776 | } else if (ARM::DPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
777 | Opc = ARM::VMOVD; |
778 | BeginIdx = ARM::dsub_0; |
779 | SubRegs = 2; |
780 | } else if (ARM::DTripleRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
781 | Opc = ARM::VMOVD; |
782 | BeginIdx = ARM::dsub_0; |
783 | SubRegs = 3; |
784 | } else if (ARM::DQuadRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
785 | Opc = ARM::VMOVD; |
786 | BeginIdx = ARM::dsub_0; |
787 | SubRegs = 4; |
788 | } else if (ARM::GPRPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
789 | Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; |
790 | BeginIdx = ARM::gsub_0; |
791 | SubRegs = 2; |
792 | } else if (ARM::DPairSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
793 | Opc = ARM::VMOVD; |
794 | BeginIdx = ARM::dsub_0; |
795 | SubRegs = 2; |
796 | Spacing = 2; |
797 | } else if (ARM::DTripleSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
798 | Opc = ARM::VMOVD; |
799 | BeginIdx = ARM::dsub_0; |
800 | SubRegs = 3; |
801 | Spacing = 2; |
802 | } else if (ARM::DQuadSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
803 | Opc = ARM::VMOVD; |
804 | BeginIdx = ARM::dsub_0; |
805 | SubRegs = 4; |
806 | Spacing = 2; |
807 | } else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && |
808 | !Subtarget.hasFP64()) { |
809 | Opc = ARM::VMOVS; |
810 | BeginIdx = ARM::ssub_0; |
811 | SubRegs = 2; |
812 | } else if (SrcReg == ARM::CPSR) { |
813 | copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); |
814 | return; |
815 | } else if (DestReg == ARM::CPSR) { |
816 | copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); |
817 | return; |
818 | } else if (DestReg == ARM::VPR) { |
819 | assert(ARM::GPRRegClass.contains(SrcReg)); |
820 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_P0), DestReg) |
821 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
822 | .add(MOs: predOps(Pred: ARMCC::AL)); |
823 | return; |
824 | } else if (SrcReg == ARM::VPR) { |
825 | assert(ARM::GPRRegClass.contains(DestReg)); |
826 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_P0), DestReg) |
827 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
828 | .add(MOs: predOps(Pred: ARMCC::AL)); |
829 | return; |
830 | } else if (DestReg == ARM::FPSCR_NZCV) { |
831 | assert(ARM::GPRRegClass.contains(SrcReg)); |
832 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_FPSCR_NZCVQC), DestReg) |
833 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
834 | .add(MOs: predOps(Pred: ARMCC::AL)); |
835 | return; |
836 | } else if (SrcReg == ARM::FPSCR_NZCV) { |
837 | assert(ARM::GPRRegClass.contains(DestReg)); |
838 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_FPSCR_NZCVQC), DestReg) |
839 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
840 | .add(MOs: predOps(Pred: ARMCC::AL)); |
841 | return; |
842 | } |
843 | |
844 | assert(Opc && "Impossible reg-to-reg copy" ); |
845 | |
846 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
847 | MachineInstrBuilder Mov; |
848 | |
849 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. |
850 | if (TRI->regsOverlap(RegA: SrcReg, RegB: TRI->getSubReg(Reg: DestReg, Idx: BeginIdx))) { |
851 | BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); |
852 | Spacing = -Spacing; |
853 | } |
854 | #ifndef NDEBUG |
855 | SmallSet<unsigned, 4> DstRegs; |
856 | #endif |
857 | for (unsigned i = 0; i != SubRegs; ++i) { |
858 | Register Dst = TRI->getSubReg(Reg: DestReg, Idx: BeginIdx + i * Spacing); |
859 | Register Src = TRI->getSubReg(Reg: SrcReg, Idx: BeginIdx + i * Spacing); |
860 | assert(Dst && Src && "Bad sub-register" ); |
861 | #ifndef NDEBUG |
862 | assert(!DstRegs.count(Src) && "destructive vector copy" ); |
863 | DstRegs.insert(Dst); |
864 | #endif |
865 | Mov = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg: Dst).addReg(RegNo: Src); |
866 | // VORR (NEON or MVE) takes two source operands. |
867 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { |
868 | Mov.addReg(RegNo: Src); |
869 | } |
870 | // MVE VORR takes predicate operands in place of an ordinary condition. |
871 | if (Opc == ARM::MVE_VORR) |
872 | addUnpredicatedMveVpredROp(MIB&: Mov, DestReg: Dst); |
873 | else |
874 | Mov = Mov.add(MOs: predOps(Pred: ARMCC::AL)); |
875 | // MOVr can set CC. |
876 | if (Opc == ARM::MOVr) |
877 | Mov = Mov.add(MO: condCodeOp()); |
878 | } |
879 | // Add implicit super-register defs and kills to the last instruction. |
880 | Mov->addRegisterDefined(Reg: DestReg, RegInfo: TRI); |
881 | if (KillSrc) |
882 | Mov->addRegisterKilled(IncomingReg: SrcReg, RegInfo: TRI); |
883 | } |
884 | |
885 | std::optional<DestSourcePair> |
886 | ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { |
887 | // VMOVRRD is also a copy instruction but it requires |
888 | // special way of handling. It is more complex copy version |
889 | // and since that we are not considering it. For recognition |
890 | // of such instruction isExtractSubregLike MI interface fuction |
891 | // could be used. |
892 | // VORRq is considered as a move only if two inputs are |
893 | // the same register. |
894 | if (!MI.isMoveReg() || |
895 | (MI.getOpcode() == ARM::VORRq && |
896 | MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg())) |
897 | return std::nullopt; |
898 | return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)}; |
899 | } |
900 | |
901 | std::optional<ParamLoadedValue> |
902 | ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, |
903 | Register Reg) const { |
904 | if (auto DstSrcPair = isCopyInstrImpl(MI)) { |
905 | Register DstReg = DstSrcPair->Destination->getReg(); |
906 | |
907 | // TODO: We don't handle cases where the forwarding reg is narrower/wider |
908 | // than the copy registers. Consider for example: |
909 | // |
910 | // s16 = VMOVS s0 |
911 | // s17 = VMOVS s1 |
912 | // call @callee(d0) |
913 | // |
914 | // We'd like to describe the call site value of d0 as d8, but this requires |
915 | // gathering and merging the descriptions for the two VMOVS instructions. |
916 | // |
917 | // We also don't handle the reverse situation, where the forwarding reg is |
918 | // narrower than the copy destination: |
919 | // |
920 | // d8 = VMOVD d0 |
921 | // call @callee(s1) |
922 | // |
923 | // We need to produce a fragment description (the call site value of s1 is |
924 | // /not/ just d8). |
925 | if (DstReg != Reg) |
926 | return std::nullopt; |
927 | } |
928 | return TargetInstrInfo::describeLoadedValue(MI, Reg); |
929 | } |
930 | |
931 | const MachineInstrBuilder & |
932 | ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
933 | unsigned SubIdx, unsigned State, |
934 | const TargetRegisterInfo *TRI) const { |
935 | if (!SubIdx) |
936 | return MIB.addReg(RegNo: Reg, flags: State); |
937 | |
938 | if (Register::isPhysicalRegister(Reg)) |
939 | return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State); |
940 | return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx); |
941 | } |
942 | |
943 | void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
944 | MachineBasicBlock::iterator I, |
945 | Register SrcReg, bool isKill, int FI, |
946 | const TargetRegisterClass *RC, |
947 | const TargetRegisterInfo *TRI, |
948 | Register VReg, |
949 | MachineInstr::MIFlag Flags) const { |
950 | MachineFunction &MF = *MBB.getParent(); |
951 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
952 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
953 | |
954 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
955 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
956 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
957 | |
958 | switch (TRI->getSpillSize(RC: *RC)) { |
959 | case 2: |
960 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
961 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRH)) |
962 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
963 | .addFrameIndex(Idx: FI) |
964 | .addImm(Val: 0) |
965 | .addMemOperand(MMO) |
966 | .add(MOs: predOps(Pred: ARMCC::AL)); |
967 | } else |
968 | llvm_unreachable("Unknown reg class!" ); |
969 | break; |
970 | case 4: |
971 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
972 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRi12)) |
973 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
974 | .addFrameIndex(Idx: FI) |
975 | .addImm(Val: 0) |
976 | .addMemOperand(MMO) |
977 | .add(MOs: predOps(Pred: ARMCC::AL)); |
978 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
979 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRS)) |
980 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
981 | .addFrameIndex(Idx: FI) |
982 | .addImm(Val: 0) |
983 | .addMemOperand(MMO) |
984 | .add(MOs: predOps(Pred: ARMCC::AL)); |
985 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
986 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_P0_off)) |
987 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
988 | .addFrameIndex(Idx: FI) |
989 | .addImm(Val: 0) |
990 | .addMemOperand(MMO) |
991 | .add(MOs: predOps(Pred: ARMCC::AL)); |
992 | } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) { |
993 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_FPSCR_NZCVQC_off)) |
994 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
995 | .addFrameIndex(Idx: FI) |
996 | .addImm(Val: 0) |
997 | .addMemOperand(MMO) |
998 | .add(MOs: predOps(Pred: ARMCC::AL)); |
999 | } else |
1000 | llvm_unreachable("Unknown reg class!" ); |
1001 | break; |
1002 | case 8: |
1003 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1004 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRD)) |
1005 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1006 | .addFrameIndex(Idx: FI) |
1007 | .addImm(Val: 0) |
1008 | .addMemOperand(MMO) |
1009 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1010 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
1011 | if (Subtarget.hasV5TEOps()) { |
1012 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRD)); |
1013 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
1014 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
1015 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
1016 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1017 | } else { |
1018 | // Fallback to STM instruction, which has existed since the dawn of |
1019 | // time. |
1020 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STMIA)) |
1021 | .addFrameIndex(Idx: FI) |
1022 | .addMemOperand(MMO) |
1023 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1024 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
1025 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
1026 | } |
1027 | } else |
1028 | llvm_unreachable("Unknown reg class!" ); |
1029 | break; |
1030 | case 16: |
1031 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1032 | // Use aligned spills if the stack can be realigned. |
1033 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1034 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1q64)) |
1035 | .addFrameIndex(Idx: FI) |
1036 | .addImm(Val: 16) |
1037 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1038 | .addMemOperand(MMO) |
1039 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1040 | } else { |
1041 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMQIA)) |
1042 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1043 | .addFrameIndex(Idx: FI) |
1044 | .addMemOperand(MMO) |
1045 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1046 | } |
1047 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
1048 | Subtarget.hasMVEIntegerOps()) { |
1049 | auto MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MVE_VSTRWU32)); |
1050 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1051 | .addFrameIndex(Idx: FI) |
1052 | .addImm(Val: 0) |
1053 | .addMemOperand(MMO); |
1054 | addUnpredicatedMveVpredNOp(MIB); |
1055 | } else |
1056 | llvm_unreachable("Unknown reg class!" ); |
1057 | break; |
1058 | case 24: |
1059 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1060 | // Use aligned spills if the stack can be realigned. |
1061 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1062 | Subtarget.hasNEON()) { |
1063 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64TPseudo)) |
1064 | .addFrameIndex(Idx: FI) |
1065 | .addImm(Val: 16) |
1066 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1067 | .addMemOperand(MMO) |
1068 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1069 | } else { |
1070 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), |
1071 | MCID: get(Opcode: ARM::VSTMDIA)) |
1072 | .addFrameIndex(Idx: FI) |
1073 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1074 | .addMemOperand(MMO); |
1075 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
1076 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
1077 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
1078 | } |
1079 | } else |
1080 | llvm_unreachable("Unknown reg class!" ); |
1081 | break; |
1082 | case 32: |
1083 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
1084 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
1085 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1086 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1087 | Subtarget.hasNEON()) { |
1088 | // FIXME: It's possible to only store part of the QQ register if the |
1089 | // spilled def has a sub-register index. |
1090 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64QPseudo)) |
1091 | .addFrameIndex(Idx: FI) |
1092 | .addImm(Val: 16) |
1093 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1094 | .addMemOperand(MMO) |
1095 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1096 | } else if (Subtarget.hasMVEIntegerOps()) { |
1097 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQPRStore)) |
1098 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1099 | .addFrameIndex(Idx: FI) |
1100 | .addMemOperand(MMO); |
1101 | } else { |
1102 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), |
1103 | MCID: get(Opcode: ARM::VSTMDIA)) |
1104 | .addFrameIndex(Idx: FI) |
1105 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1106 | .addMemOperand(MMO); |
1107 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
1108 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
1109 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
1110 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI); |
1111 | } |
1112 | } else |
1113 | llvm_unreachable("Unknown reg class!" ); |
1114 | break; |
1115 | case 64: |
1116 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
1117 | Subtarget.hasMVEIntegerOps()) { |
1118 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQQQPRStore)) |
1119 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1120 | .addFrameIndex(Idx: FI) |
1121 | .addMemOperand(MMO); |
1122 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1123 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMDIA)) |
1124 | .addFrameIndex(Idx: FI) |
1125 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1126 | .addMemOperand(MMO); |
1127 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
1128 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
1129 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
1130 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI); |
1131 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_4, State: 0, TRI); |
1132 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_5, State: 0, TRI); |
1133 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_6, State: 0, TRI); |
1134 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_7, State: 0, TRI); |
1135 | } else |
1136 | llvm_unreachable("Unknown reg class!" ); |
1137 | break; |
1138 | default: |
1139 | llvm_unreachable("Unknown reg class!" ); |
1140 | } |
1141 | } |
1142 | |
1143 | Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
1144 | int &FrameIndex) const { |
1145 | switch (MI.getOpcode()) { |
1146 | default: break; |
1147 | case ARM::STRrs: |
1148 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. |
1149 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
1150 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
1151 | MI.getOperand(i: 3).getImm() == 0) { |
1152 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1153 | return MI.getOperand(i: 0).getReg(); |
1154 | } |
1155 | break; |
1156 | case ARM::STRi12: |
1157 | case ARM::t2STRi12: |
1158 | case ARM::tSTRspi: |
1159 | case ARM::VSTRD: |
1160 | case ARM::VSTRS: |
1161 | case ARM::VSTRH: |
1162 | case ARM::VSTR_P0_off: |
1163 | case ARM::VSTR_FPSCR_NZCVQC_off: |
1164 | case ARM::MVE_VSTRWU32: |
1165 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
1166 | MI.getOperand(i: 2).getImm() == 0) { |
1167 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1168 | return MI.getOperand(i: 0).getReg(); |
1169 | } |
1170 | break; |
1171 | case ARM::VST1q64: |
1172 | case ARM::VST1d64TPseudo: |
1173 | case ARM::VST1d64QPseudo: |
1174 | if (MI.getOperand(i: 0).isFI() && MI.getOperand(i: 2).getSubReg() == 0) { |
1175 | FrameIndex = MI.getOperand(i: 0).getIndex(); |
1176 | return MI.getOperand(i: 2).getReg(); |
1177 | } |
1178 | break; |
1179 | case ARM::VSTMQIA: |
1180 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1181 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1182 | return MI.getOperand(i: 0).getReg(); |
1183 | } |
1184 | break; |
1185 | case ARM::MQQPRStore: |
1186 | case ARM::MQQQQPRStore: |
1187 | if (MI.getOperand(i: 1).isFI()) { |
1188 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1189 | return MI.getOperand(i: 0).getReg(); |
1190 | } |
1191 | break; |
1192 | } |
1193 | |
1194 | return 0; |
1195 | } |
1196 | |
1197 | Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
1198 | int &FrameIndex) const { |
1199 | SmallVector<const MachineMemOperand *, 1> Accesses; |
1200 | if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && |
1201 | Accesses.size() == 1) { |
1202 | FrameIndex = |
1203 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
1204 | ->getFrameIndex(); |
1205 | return true; |
1206 | } |
1207 | return false; |
1208 | } |
1209 | |
1210 | void ARMBaseInstrInfo::loadRegFromStackSlot( |
1211 | MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, |
1212 | int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, |
1213 | Register VReg, MachineInstr::MIFlag Flags) const { |
1214 | DebugLoc DL; |
1215 | if (I != MBB.end()) DL = I->getDebugLoc(); |
1216 | MachineFunction &MF = *MBB.getParent(); |
1217 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1218 | const Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
1219 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1220 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
1221 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
1222 | |
1223 | switch (TRI->getSpillSize(RC: *RC)) { |
1224 | case 2: |
1225 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1226 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRH), DestReg) |
1227 | .addFrameIndex(Idx: FI) |
1228 | .addImm(Val: 0) |
1229 | .addMemOperand(MMO) |
1230 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1231 | } else |
1232 | llvm_unreachable("Unknown reg class!" ); |
1233 | break; |
1234 | case 4: |
1235 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1236 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRi12), DestReg) |
1237 | .addFrameIndex(Idx: FI) |
1238 | .addImm(Val: 0) |
1239 | .addMemOperand(MMO) |
1240 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1241 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
1242 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRS), DestReg) |
1243 | .addFrameIndex(Idx: FI) |
1244 | .addImm(Val: 0) |
1245 | .addMemOperand(MMO) |
1246 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1247 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
1248 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_P0_off), DestReg) |
1249 | .addFrameIndex(Idx: FI) |
1250 | .addImm(Val: 0) |
1251 | .addMemOperand(MMO) |
1252 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1253 | } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) { |
1254 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_FPSCR_NZCVQC_off), DestReg) |
1255 | .addFrameIndex(Idx: FI) |
1256 | .addImm(Val: 0) |
1257 | .addMemOperand(MMO) |
1258 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1259 | } else |
1260 | llvm_unreachable("Unknown reg class!" ); |
1261 | break; |
1262 | case 8: |
1263 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1264 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRD), DestReg) |
1265 | .addFrameIndex(Idx: FI) |
1266 | .addImm(Val: 0) |
1267 | .addMemOperand(MMO) |
1268 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1269 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
1270 | MachineInstrBuilder MIB; |
1271 | |
1272 | if (Subtarget.hasV5TEOps()) { |
1273 | MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRD)); |
1274 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
1275 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
1276 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
1277 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1278 | } else { |
1279 | // Fallback to LDM instruction, which has existed since the dawn of |
1280 | // time. |
1281 | MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDMIA)) |
1282 | .addFrameIndex(Idx: FI) |
1283 | .addMemOperand(MMO) |
1284 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1285 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
1286 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
1287 | } |
1288 | |
1289 | if (DestReg.isPhysical()) |
1290 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1291 | } else |
1292 | llvm_unreachable("Unknown reg class!" ); |
1293 | break; |
1294 | case 16: |
1295 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1296 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1297 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1q64), DestReg) |
1298 | .addFrameIndex(Idx: FI) |
1299 | .addImm(Val: 16) |
1300 | .addMemOperand(MMO) |
1301 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1302 | } else { |
1303 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMQIA), DestReg) |
1304 | .addFrameIndex(Idx: FI) |
1305 | .addMemOperand(MMO) |
1306 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1307 | } |
1308 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
1309 | Subtarget.hasMVEIntegerOps()) { |
1310 | auto MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MVE_VLDRWU32), DestReg); |
1311 | MIB.addFrameIndex(Idx: FI) |
1312 | .addImm(Val: 0) |
1313 | .addMemOperand(MMO); |
1314 | addUnpredicatedMveVpredNOp(MIB); |
1315 | } else |
1316 | llvm_unreachable("Unknown reg class!" ); |
1317 | break; |
1318 | case 24: |
1319 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1320 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1321 | Subtarget.hasNEON()) { |
1322 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64TPseudo), DestReg) |
1323 | .addFrameIndex(Idx: FI) |
1324 | .addImm(Val: 16) |
1325 | .addMemOperand(MMO) |
1326 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1327 | } else { |
1328 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
1329 | .addFrameIndex(Idx: FI) |
1330 | .addMemOperand(MMO) |
1331 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1332 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
1333 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
1334 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
1335 | if (DestReg.isPhysical()) |
1336 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1337 | } |
1338 | } else |
1339 | llvm_unreachable("Unknown reg class!" ); |
1340 | break; |
1341 | case 32: |
1342 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
1343 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
1344 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1345 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1346 | Subtarget.hasNEON()) { |
1347 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64QPseudo), DestReg) |
1348 | .addFrameIndex(Idx: FI) |
1349 | .addImm(Val: 16) |
1350 | .addMemOperand(MMO) |
1351 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1352 | } else if (Subtarget.hasMVEIntegerOps()) { |
1353 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQPRLoad), DestReg) |
1354 | .addFrameIndex(Idx: FI) |
1355 | .addMemOperand(MMO); |
1356 | } else { |
1357 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
1358 | .addFrameIndex(Idx: FI) |
1359 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1360 | .addMemOperand(MMO); |
1361 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
1362 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
1363 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
1364 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI); |
1365 | if (DestReg.isPhysical()) |
1366 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1367 | } |
1368 | } else |
1369 | llvm_unreachable("Unknown reg class!" ); |
1370 | break; |
1371 | case 64: |
1372 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
1373 | Subtarget.hasMVEIntegerOps()) { |
1374 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQQQPRLoad), DestReg) |
1375 | .addFrameIndex(Idx: FI) |
1376 | .addMemOperand(MMO); |
1377 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1378 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
1379 | .addFrameIndex(Idx: FI) |
1380 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1381 | .addMemOperand(MMO); |
1382 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
1383 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
1384 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
1385 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI); |
1386 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_4, State: RegState::DefineNoRead, TRI); |
1387 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_5, State: RegState::DefineNoRead, TRI); |
1388 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_6, State: RegState::DefineNoRead, TRI); |
1389 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_7, State: RegState::DefineNoRead, TRI); |
1390 | if (DestReg.isPhysical()) |
1391 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1392 | } else |
1393 | llvm_unreachable("Unknown reg class!" ); |
1394 | break; |
1395 | default: |
1396 | llvm_unreachable("Unknown regclass!" ); |
1397 | } |
1398 | } |
1399 | |
1400 | Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
1401 | int &FrameIndex) const { |
1402 | switch (MI.getOpcode()) { |
1403 | default: break; |
1404 | case ARM::LDRrs: |
1405 | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. |
1406 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
1407 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
1408 | MI.getOperand(i: 3).getImm() == 0) { |
1409 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1410 | return MI.getOperand(i: 0).getReg(); |
1411 | } |
1412 | break; |
1413 | case ARM::LDRi12: |
1414 | case ARM::t2LDRi12: |
1415 | case ARM::tLDRspi: |
1416 | case ARM::VLDRD: |
1417 | case ARM::VLDRS: |
1418 | case ARM::VLDRH: |
1419 | case ARM::VLDR_P0_off: |
1420 | case ARM::VLDR_FPSCR_NZCVQC_off: |
1421 | case ARM::MVE_VLDRWU32: |
1422 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
1423 | MI.getOperand(i: 2).getImm() == 0) { |
1424 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1425 | return MI.getOperand(i: 0).getReg(); |
1426 | } |
1427 | break; |
1428 | case ARM::VLD1q64: |
1429 | case ARM::VLD1d8TPseudo: |
1430 | case ARM::VLD1d16TPseudo: |
1431 | case ARM::VLD1d32TPseudo: |
1432 | case ARM::VLD1d64TPseudo: |
1433 | case ARM::VLD1d8QPseudo: |
1434 | case ARM::VLD1d16QPseudo: |
1435 | case ARM::VLD1d32QPseudo: |
1436 | case ARM::VLD1d64QPseudo: |
1437 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1438 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1439 | return MI.getOperand(i: 0).getReg(); |
1440 | } |
1441 | break; |
1442 | case ARM::VLDMQIA: |
1443 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1444 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1445 | return MI.getOperand(i: 0).getReg(); |
1446 | } |
1447 | break; |
1448 | case ARM::MQQPRLoad: |
1449 | case ARM::MQQQQPRLoad: |
1450 | if (MI.getOperand(i: 1).isFI()) { |
1451 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1452 | return MI.getOperand(i: 0).getReg(); |
1453 | } |
1454 | break; |
1455 | } |
1456 | |
1457 | return 0; |
1458 | } |
1459 | |
1460 | Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
1461 | int &FrameIndex) const { |
1462 | SmallVector<const MachineMemOperand *, 1> Accesses; |
1463 | if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && |
1464 | Accesses.size() == 1) { |
1465 | FrameIndex = |
1466 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
1467 | ->getFrameIndex(); |
1468 | return true; |
1469 | } |
1470 | return false; |
1471 | } |
1472 | |
1473 | /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD |
1474 | /// depending on whether the result is used. |
1475 | void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { |
1476 | bool isThumb1 = Subtarget.isThumb1Only(); |
1477 | bool isThumb2 = Subtarget.isThumb2(); |
1478 | const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); |
1479 | |
1480 | DebugLoc dl = MI->getDebugLoc(); |
1481 | MachineBasicBlock *BB = MI->getParent(); |
1482 | |
1483 | MachineInstrBuilder LDM, STM; |
1484 | if (isThumb1 || !MI->getOperand(i: 1).isDead()) { |
1485 | MachineOperand LDWb(MI->getOperand(i: 1)); |
1486 | LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA_UPD |
1487 | : isThumb1 ? ARM::tLDMIA_UPD |
1488 | : ARM::LDMIA_UPD)) |
1489 | .add(MO: LDWb); |
1490 | } else { |
1491 | LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); |
1492 | } |
1493 | |
1494 | if (isThumb1 || !MI->getOperand(i: 0).isDead()) { |
1495 | MachineOperand STWb(MI->getOperand(i: 0)); |
1496 | STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA_UPD |
1497 | : isThumb1 ? ARM::tSTMIA_UPD |
1498 | : ARM::STMIA_UPD)) |
1499 | .add(MO: STWb); |
1500 | } else { |
1501 | STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA : ARM::STMIA)); |
1502 | } |
1503 | |
1504 | MachineOperand LDBase(MI->getOperand(i: 3)); |
1505 | LDM.add(MO: LDBase).add(MOs: predOps(Pred: ARMCC::AL)); |
1506 | |
1507 | MachineOperand STBase(MI->getOperand(i: 2)); |
1508 | STM.add(MO: STBase).add(MOs: predOps(Pred: ARMCC::AL)); |
1509 | |
1510 | // Sort the scratch registers into ascending order. |
1511 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
1512 | SmallVector<unsigned, 6> ScratchRegs; |
1513 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 5)) |
1514 | ScratchRegs.push_back(Elt: MO.getReg()); |
1515 | llvm::sort(C&: ScratchRegs, |
1516 | Comp: [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { |
1517 | return TRI.getEncodingValue(Reg: Reg1) < |
1518 | TRI.getEncodingValue(Reg: Reg2); |
1519 | }); |
1520 | |
1521 | for (const auto &Reg : ScratchRegs) { |
1522 | LDM.addReg(RegNo: Reg, flags: RegState::Define); |
1523 | STM.addReg(RegNo: Reg, flags: RegState::Kill); |
1524 | } |
1525 | |
1526 | BB->erase(I: MI); |
1527 | } |
1528 | |
1529 | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1530 | if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { |
1531 | expandLoadStackGuard(MI); |
1532 | MI.getParent()->erase(I: MI); |
1533 | return true; |
1534 | } |
1535 | |
1536 | if (MI.getOpcode() == ARM::MEMCPY) { |
1537 | expandMEMCPY(MI); |
1538 | return true; |
1539 | } |
1540 | |
1541 | // This hook gets to expand COPY instructions before they become |
1542 | // copyPhysReg() calls. Look for VMOVS instructions that can legally be |
1543 | // widened to VMOVD. We prefer the VMOVD when possible because it may be |
1544 | // changed into a VORR that can go down the NEON pipeline. |
1545 | if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) |
1546 | return false; |
1547 | |
1548 | // Look for a copy between even S-registers. That is where we keep floats |
1549 | // when using NEON v2f32 instructions for f32 arithmetic. |
1550 | Register DstRegS = MI.getOperand(i: 0).getReg(); |
1551 | Register SrcRegS = MI.getOperand(i: 1).getReg(); |
1552 | if (!ARM::SPRRegClass.contains(Reg1: DstRegS, Reg2: SrcRegS)) |
1553 | return false; |
1554 | |
1555 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1556 | MCRegister DstRegD = |
1557 | TRI->getMatchingSuperReg(Reg: DstRegS, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
1558 | MCRegister SrcRegD = |
1559 | TRI->getMatchingSuperReg(Reg: SrcRegS, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
1560 | if (!DstRegD || !SrcRegD) |
1561 | return false; |
1562 | |
1563 | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only |
1564 | // legal if the COPY already defines the full DstRegD, and it isn't a |
1565 | // sub-register insertion. |
1566 | if (!MI.definesRegister(Reg: DstRegD, TRI) || MI.readsRegister(Reg: DstRegD, TRI)) |
1567 | return false; |
1568 | |
1569 | // A dead copy shouldn't show up here, but reject it just in case. |
1570 | if (MI.getOperand(i: 0).isDead()) |
1571 | return false; |
1572 | |
1573 | // All clear, widen the COPY. |
1574 | LLVM_DEBUG(dbgs() << "widening: " << MI); |
1575 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
1576 | |
1577 | // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg |
1578 | // or some other super-register. |
1579 | int ImpDefIdx = MI.findRegisterDefOperandIdx(Reg: DstRegD, /*TRI=*/nullptr); |
1580 | if (ImpDefIdx != -1) |
1581 | MI.removeOperand(OpNo: ImpDefIdx); |
1582 | |
1583 | // Change the opcode and operands. |
1584 | MI.setDesc(get(Opcode: ARM::VMOVD)); |
1585 | MI.getOperand(i: 0).setReg(DstRegD); |
1586 | MI.getOperand(i: 1).setReg(SrcRegD); |
1587 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
1588 | |
1589 | // We are now reading SrcRegD instead of SrcRegS. This may upset the |
1590 | // register scavenger and machine verifier, so we need to indicate that we |
1591 | // are reading an undefined value from SrcRegD, but a proper value from |
1592 | // SrcRegS. |
1593 | MI.getOperand(i: 1).setIsUndef(); |
1594 | MIB.addReg(RegNo: SrcRegS, flags: RegState::Implicit); |
1595 | |
1596 | // SrcRegD may actually contain an unrelated value in the ssub_1 |
1597 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. |
1598 | if (MI.getOperand(i: 1).isKill()) { |
1599 | MI.getOperand(i: 1).setIsKill(false); |
1600 | MI.addRegisterKilled(IncomingReg: SrcRegS, RegInfo: TRI, AddIfNotFound: true); |
1601 | } |
1602 | |
1603 | LLVM_DEBUG(dbgs() << "replaced by: " << MI); |
1604 | return true; |
1605 | } |
1606 | |
1607 | /// Create a copy of a const pool value. Update CPI to the new index and return |
1608 | /// the label UID. |
1609 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { |
1610 | MachineConstantPool *MCP = MF.getConstantPool(); |
1611 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1612 | |
1613 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; |
1614 | assert(MCPE.isMachineConstantPoolEntry() && |
1615 | "Expecting a machine constantpool entry!" ); |
1616 | ARMConstantPoolValue *ACPV = |
1617 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); |
1618 | |
1619 | unsigned PCLabelId = AFI->createPICLabelUId(); |
1620 | ARMConstantPoolValue *NewCPV = nullptr; |
1621 | |
1622 | // FIXME: The below assumes PIC relocation model and that the function |
1623 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and |
1624 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR |
1625 | // instructions, so that's probably OK, but is PIC always correct when |
1626 | // we get here? |
1627 | if (ACPV->isGlobalValue()) |
1628 | NewCPV = ARMConstantPoolConstant::Create( |
1629 | C: cast<ARMConstantPoolConstant>(Val: ACPV)->getGV(), ID: PCLabelId, Kind: ARMCP::CPValue, |
1630 | PCAdj: 4, Modifier: ACPV->getModifier(), AddCurrentAddress: ACPV->mustAddCurrentAddress()); |
1631 | else if (ACPV->isExtSymbol()) |
1632 | NewCPV = ARMConstantPoolSymbol:: |
1633 | Create(C&: MF.getFunction().getContext(), |
1634 | s: cast<ARMConstantPoolSymbol>(Val: ACPV)->getSymbol(), ID: PCLabelId, PCAdj: 4); |
1635 | else if (ACPV->isBlockAddress()) |
1636 | NewCPV = ARMConstantPoolConstant:: |
1637 | Create(C: cast<ARMConstantPoolConstant>(Val: ACPV)->getBlockAddress(), ID: PCLabelId, |
1638 | Kind: ARMCP::CPBlockAddress, PCAdj: 4); |
1639 | else if (ACPV->isLSDA()) |
1640 | NewCPV = ARMConstantPoolConstant::Create(C: &MF.getFunction(), ID: PCLabelId, |
1641 | Kind: ARMCP::CPLSDA, PCAdj: 4); |
1642 | else if (ACPV->isMachineBasicBlock()) |
1643 | NewCPV = ARMConstantPoolMBB:: |
1644 | Create(C&: MF.getFunction().getContext(), |
1645 | mbb: cast<ARMConstantPoolMBB>(Val: ACPV)->getMBB(), ID: PCLabelId, PCAdj: 4); |
1646 | else |
1647 | llvm_unreachable("Unexpected ARM constantpool value type!!" ); |
1648 | CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: MCPE.getAlign()); |
1649 | return PCLabelId; |
1650 | } |
1651 | |
1652 | void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, |
1653 | MachineBasicBlock::iterator I, |
1654 | Register DestReg, unsigned SubIdx, |
1655 | const MachineInstr &Orig, |
1656 | const TargetRegisterInfo &TRI) const { |
1657 | unsigned Opcode = Orig.getOpcode(); |
1658 | switch (Opcode) { |
1659 | default: { |
1660 | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig: &Orig); |
1661 | MI->substituteRegister(FromReg: Orig.getOperand(i: 0).getReg(), ToReg: DestReg, SubIdx, RegInfo: TRI); |
1662 | MBB.insert(I, MI); |
1663 | break; |
1664 | } |
1665 | case ARM::tLDRpci_pic: |
1666 | case ARM::t2LDRpci_pic: { |
1667 | MachineFunction &MF = *MBB.getParent(); |
1668 | unsigned CPI = Orig.getOperand(i: 1).getIndex(); |
1669 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1670 | BuildMI(BB&: MBB, I, MIMD: Orig.getDebugLoc(), MCID: get(Opcode), DestReg) |
1671 | .addConstantPoolIndex(Idx: CPI) |
1672 | .addImm(Val: PCLabelId) |
1673 | .cloneMemRefs(OtherMI: Orig); |
1674 | break; |
1675 | } |
1676 | } |
1677 | } |
1678 | |
1679 | MachineInstr & |
1680 | ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, |
1681 | MachineBasicBlock::iterator InsertBefore, |
1682 | const MachineInstr &Orig) const { |
1683 | MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); |
1684 | MachineBasicBlock::instr_iterator I = Cloned.getIterator(); |
1685 | for (;;) { |
1686 | switch (I->getOpcode()) { |
1687 | case ARM::tLDRpci_pic: |
1688 | case ARM::t2LDRpci_pic: { |
1689 | MachineFunction &MF = *MBB.getParent(); |
1690 | unsigned CPI = I->getOperand(i: 1).getIndex(); |
1691 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1692 | I->getOperand(i: 1).setIndex(CPI); |
1693 | I->getOperand(i: 2).setImm(PCLabelId); |
1694 | break; |
1695 | } |
1696 | } |
1697 | if (!I->isBundledWithSucc()) |
1698 | break; |
1699 | ++I; |
1700 | } |
1701 | return Cloned; |
1702 | } |
1703 | |
1704 | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, |
1705 | const MachineInstr &MI1, |
1706 | const MachineRegisterInfo *MRI) const { |
1707 | unsigned Opcode = MI0.getOpcode(); |
1708 | if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || |
1709 | Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || |
1710 | Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1711 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
1712 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
1713 | Opcode == ARM::t2MOV_ga_pcrel) { |
1714 | if (MI1.getOpcode() != Opcode) |
1715 | return false; |
1716 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1717 | return false; |
1718 | |
1719 | const MachineOperand &MO0 = MI0.getOperand(i: 1); |
1720 | const MachineOperand &MO1 = MI1.getOperand(i: 1); |
1721 | if (MO0.getOffset() != MO1.getOffset()) |
1722 | return false; |
1723 | |
1724 | if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1725 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
1726 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
1727 | Opcode == ARM::t2MOV_ga_pcrel) |
1728 | // Ignore the PC labels. |
1729 | return MO0.getGlobal() == MO1.getGlobal(); |
1730 | |
1731 | const MachineFunction *MF = MI0.getParent()->getParent(); |
1732 | const MachineConstantPool *MCP = MF->getConstantPool(); |
1733 | int CPI0 = MO0.getIndex(); |
1734 | int CPI1 = MO1.getIndex(); |
1735 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; |
1736 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; |
1737 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); |
1738 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); |
1739 | if (isARMCP0 && isARMCP1) { |
1740 | ARMConstantPoolValue *ACPV0 = |
1741 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); |
1742 | ARMConstantPoolValue *ACPV1 = |
1743 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); |
1744 | return ACPV0->hasSameValue(ACPV: ACPV1); |
1745 | } else if (!isARMCP0 && !isARMCP1) { |
1746 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; |
1747 | } |
1748 | return false; |
1749 | } else if (Opcode == ARM::PICLDR) { |
1750 | if (MI1.getOpcode() != Opcode) |
1751 | return false; |
1752 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1753 | return false; |
1754 | |
1755 | Register Addr0 = MI0.getOperand(i: 1).getReg(); |
1756 | Register Addr1 = MI1.getOperand(i: 1).getReg(); |
1757 | if (Addr0 != Addr1) { |
1758 | if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual()) |
1759 | return false; |
1760 | |
1761 | // This assumes SSA form. |
1762 | MachineInstr *Def0 = MRI->getVRegDef(Reg: Addr0); |
1763 | MachineInstr *Def1 = MRI->getVRegDef(Reg: Addr1); |
1764 | // Check if the loaded value, e.g. a constantpool of a global address, are |
1765 | // the same. |
1766 | if (!produceSameValue(MI0: *Def0, MI1: *Def1, MRI)) |
1767 | return false; |
1768 | } |
1769 | |
1770 | for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { |
1771 | // %12 = PICLDR %11, 0, 14, %noreg |
1772 | const MachineOperand &MO0 = MI0.getOperand(i); |
1773 | const MachineOperand &MO1 = MI1.getOperand(i); |
1774 | if (!MO0.isIdenticalTo(Other: MO1)) |
1775 | return false; |
1776 | } |
1777 | return true; |
1778 | } |
1779 | |
1780 | return MI0.isIdenticalTo(Other: MI1, Check: MachineInstr::IgnoreVRegDefs); |
1781 | } |
1782 | |
1783 | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
1784 | /// determine if two loads are loading from the same base address. It should |
1785 | /// only return true if the base pointers are the same and the only differences |
1786 | /// between the two addresses is the offset. It also returns the offsets by |
1787 | /// reference. |
1788 | /// |
1789 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1790 | /// is permanently disabled. |
1791 | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
1792 | int64_t &Offset1, |
1793 | int64_t &Offset2) const { |
1794 | // Don't worry about Thumb: just ARM and Thumb2. |
1795 | if (Subtarget.isThumb1Only()) return false; |
1796 | |
1797 | if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) |
1798 | return false; |
1799 | |
1800 | auto IsLoadOpcode = [&](unsigned Opcode) { |
1801 | switch (Opcode) { |
1802 | default: |
1803 | return false; |
1804 | case ARM::LDRi12: |
1805 | case ARM::LDRBi12: |
1806 | case ARM::LDRD: |
1807 | case ARM::LDRH: |
1808 | case ARM::LDRSB: |
1809 | case ARM::LDRSH: |
1810 | case ARM::VLDRD: |
1811 | case ARM::VLDRS: |
1812 | case ARM::t2LDRi8: |
1813 | case ARM::t2LDRBi8: |
1814 | case ARM::t2LDRDi8: |
1815 | case ARM::t2LDRSHi8: |
1816 | case ARM::t2LDRi12: |
1817 | case ARM::t2LDRBi12: |
1818 | case ARM::t2LDRSHi12: |
1819 | return true; |
1820 | } |
1821 | }; |
1822 | |
1823 | if (!IsLoadOpcode(Load1->getMachineOpcode()) || |
1824 | !IsLoadOpcode(Load2->getMachineOpcode())) |
1825 | return false; |
1826 | |
1827 | // Check if base addresses and chain operands match. |
1828 | if (Load1->getOperand(Num: 0) != Load2->getOperand(Num: 0) || |
1829 | Load1->getOperand(Num: 4) != Load2->getOperand(Num: 4)) |
1830 | return false; |
1831 | |
1832 | // Index should be Reg0. |
1833 | if (Load1->getOperand(Num: 3) != Load2->getOperand(Num: 3)) |
1834 | return false; |
1835 | |
1836 | // Determine the offsets. |
1837 | if (isa<ConstantSDNode>(Val: Load1->getOperand(Num: 1)) && |
1838 | isa<ConstantSDNode>(Val: Load2->getOperand(Num: 1))) { |
1839 | Offset1 = cast<ConstantSDNode>(Val: Load1->getOperand(Num: 1))->getSExtValue(); |
1840 | Offset2 = cast<ConstantSDNode>(Val: Load2->getOperand(Num: 1))->getSExtValue(); |
1841 | return true; |
1842 | } |
1843 | |
1844 | return false; |
1845 | } |
1846 | |
1847 | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
1848 | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should |
1849 | /// be scheduled togther. On some targets if two loads are loading from |
1850 | /// addresses in the same cache line, it's better if they are scheduled |
1851 | /// together. This function takes two integers that represent the load offsets |
1852 | /// from the common base address. It returns true if it decides it's desirable |
1853 | /// to schedule the two loads together. "NumLoads" is the number of loads that |
1854 | /// have already been scheduled after Load1. |
1855 | /// |
1856 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1857 | /// is permanently disabled. |
1858 | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
1859 | int64_t Offset1, int64_t Offset2, |
1860 | unsigned NumLoads) const { |
1861 | // Don't worry about Thumb: just ARM and Thumb2. |
1862 | if (Subtarget.isThumb1Only()) return false; |
1863 | |
1864 | assert(Offset2 > Offset1); |
1865 | |
1866 | if ((Offset2 - Offset1) / 8 > 64) |
1867 | return false; |
1868 | |
1869 | // Check if the machine opcodes are different. If they are different |
1870 | // then we consider them to not be of the same base address, |
1871 | // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. |
1872 | // In this case, they are considered to be the same because they are different |
1873 | // encoding forms of the same basic instruction. |
1874 | if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && |
1875 | !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && |
1876 | Load2->getMachineOpcode() == ARM::t2LDRBi12) || |
1877 | (Load1->getMachineOpcode() == ARM::t2LDRBi12 && |
1878 | Load2->getMachineOpcode() == ARM::t2LDRBi8))) |
1879 | return false; // FIXME: overly conservative? |
1880 | |
1881 | // Four loads in a row should be sufficient. |
1882 | if (NumLoads >= 3) |
1883 | return false; |
1884 | |
1885 | return true; |
1886 | } |
1887 | |
1888 | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
1889 | const MachineBasicBlock *MBB, |
1890 | const MachineFunction &MF) const { |
1891 | // Debug info is never a scheduling boundary. It's necessary to be explicit |
1892 | // due to the special treatment of IT instructions below, otherwise a |
1893 | // dbg_value followed by an IT will result in the IT instruction being |
1894 | // considered a scheduling hazard, which is wrong. It should be the actual |
1895 | // instruction preceding the dbg_value instruction(s), just like it is |
1896 | // when debug info is not present. |
1897 | if (MI.isDebugInstr()) |
1898 | return false; |
1899 | |
1900 | // Terminators and labels can't be scheduled around. |
1901 | if (MI.isTerminator() || MI.isPosition()) |
1902 | return true; |
1903 | |
1904 | // INLINEASM_BR can jump to another block |
1905 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
1906 | return true; |
1907 | |
1908 | if (isSEHInstruction(MI)) |
1909 | return true; |
1910 | |
1911 | // Treat the start of the IT block as a scheduling boundary, but schedule |
1912 | // t2IT along with all instructions following it. |
1913 | // FIXME: This is a big hammer. But the alternative is to add all potential |
1914 | // true and anti dependencies to IT block instructions as implicit operands |
1915 | // to the t2IT instruction. The added compile time and complexity does not |
1916 | // seem worth it. |
1917 | MachineBasicBlock::const_iterator I = MI; |
1918 | // Make sure to skip any debug instructions |
1919 | while (++I != MBB->end() && I->isDebugInstr()) |
1920 | ; |
1921 | if (I != MBB->end() && I->getOpcode() == ARM::t2IT) |
1922 | return true; |
1923 | |
1924 | // Don't attempt to schedule around any instruction that defines |
1925 | // a stack-oriented pointer, as it's unlikely to be profitable. This |
1926 | // saves compile time, because it doesn't require every single |
1927 | // stack slot reference to depend on the instruction that does the |
1928 | // modification. |
1929 | // Calls don't actually change the stack pointer, even if they have imp-defs. |
1930 | // No ARM calling conventions change the stack pointer. (X86 calling |
1931 | // conventions sometimes do). |
1932 | if (!MI.isCall() && MI.definesRegister(Reg: ARM::SP, /*TRI=*/nullptr)) |
1933 | return true; |
1934 | |
1935 | return false; |
1936 | } |
1937 | |
1938 | bool ARMBaseInstrInfo:: |
1939 | isProfitableToIfCvt(MachineBasicBlock &MBB, |
1940 | unsigned NumCycles, unsigned , |
1941 | BranchProbability Probability) const { |
1942 | if (!NumCycles) |
1943 | return false; |
1944 | |
1945 | // If we are optimizing for size, see if the branch in the predecessor can be |
1946 | // lowered to cbn?z by the constant island lowering pass, and return false if |
1947 | // so. This results in a shorter instruction sequence. |
1948 | if (MBB.getParent()->getFunction().hasOptSize()) { |
1949 | MachineBasicBlock *Pred = *MBB.pred_begin(); |
1950 | if (!Pred->empty()) { |
1951 | MachineInstr *LastMI = &*Pred->rbegin(); |
1952 | if (LastMI->getOpcode() == ARM::t2Bcc) { |
1953 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1954 | MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br: LastMI, TRI); |
1955 | if (CmpMI) |
1956 | return false; |
1957 | } |
1958 | } |
1959 | } |
1960 | return isProfitableToIfCvt(TMBB&: MBB, NumT: NumCycles, ExtraT: ExtraPredCycles, |
1961 | FMBB&: MBB, NumF: 0, ExtraF: 0, Probability); |
1962 | } |
1963 | |
1964 | bool ARMBaseInstrInfo:: |
1965 | isProfitableToIfCvt(MachineBasicBlock &TBB, |
1966 | unsigned TCycles, unsigned , |
1967 | MachineBasicBlock &FBB, |
1968 | unsigned FCycles, unsigned , |
1969 | BranchProbability Probability) const { |
1970 | if (!TCycles) |
1971 | return false; |
1972 | |
1973 | // In thumb code we often end up trading one branch for a IT block, and |
1974 | // if we are cloning the instruction can increase code size. Prevent |
1975 | // blocks with multiple predecesors from being ifcvted to prevent this |
1976 | // cloning. |
1977 | if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { |
1978 | if (TBB.pred_size() != 1 || FBB.pred_size() != 1) |
1979 | return false; |
1980 | } |
1981 | |
1982 | // Attempt to estimate the relative costs of predication versus branching. |
1983 | // Here we scale up each component of UnpredCost to avoid precision issue when |
1984 | // scaling TCycles/FCycles by Probability. |
1985 | const unsigned ScalingUpFactor = 1024; |
1986 | |
1987 | unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; |
1988 | unsigned UnpredCost; |
1989 | if (!Subtarget.hasBranchPredictor()) { |
1990 | // When we don't have a branch predictor it's always cheaper to not take a |
1991 | // branch than take it, so we have to take that into account. |
1992 | unsigned NotTakenBranchCost = 1; |
1993 | unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); |
1994 | unsigned TUnpredCycles, FUnpredCycles; |
1995 | if (!FCycles) { |
1996 | // Triangle: TBB is the fallthrough |
1997 | TUnpredCycles = TCycles + NotTakenBranchCost; |
1998 | FUnpredCycles = TakenBranchCost; |
1999 | } else { |
2000 | // Diamond: TBB is the block that is branched to, FBB is the fallthrough |
2001 | TUnpredCycles = TCycles + TakenBranchCost; |
2002 | FUnpredCycles = FCycles + NotTakenBranchCost; |
2003 | // The branch at the end of FBB will disappear when it's predicated, so |
2004 | // discount it from PredCost. |
2005 | PredCost -= 1 * ScalingUpFactor; |
2006 | } |
2007 | // The total cost is the cost of each path scaled by their probabilites |
2008 | unsigned TUnpredCost = Probability.scale(Num: TUnpredCycles * ScalingUpFactor); |
2009 | unsigned FUnpredCost = Probability.getCompl().scale(Num: FUnpredCycles * ScalingUpFactor); |
2010 | UnpredCost = TUnpredCost + FUnpredCost; |
2011 | // When predicating assume that the first IT can be folded away but later |
2012 | // ones cost one cycle each |
2013 | if (Subtarget.isThumb2() && TCycles + FCycles > 4) { |
2014 | PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; |
2015 | } |
2016 | } else { |
2017 | unsigned TUnpredCost = Probability.scale(Num: TCycles * ScalingUpFactor); |
2018 | unsigned FUnpredCost = |
2019 | Probability.getCompl().scale(Num: FCycles * ScalingUpFactor); |
2020 | UnpredCost = TUnpredCost + FUnpredCost; |
2021 | UnpredCost += 1 * ScalingUpFactor; // The branch itself |
2022 | UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; |
2023 | } |
2024 | |
2025 | return PredCost <= UnpredCost; |
2026 | } |
2027 | |
2028 | unsigned |
2029 | ARMBaseInstrInfo::(const MachineFunction &MF, |
2030 | unsigned NumInsts) const { |
2031 | // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. |
2032 | // ARM has a condition code field in every predicable instruction, using it |
2033 | // doesn't change code size. |
2034 | if (!Subtarget.isThumb2()) |
2035 | return 0; |
2036 | |
2037 | // It's possible that the size of the IT is restricted to a single block. |
2038 | unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4; |
2039 | return divideCeil(Numerator: NumInsts, Denominator: MaxInsts) * 2; |
2040 | } |
2041 | |
2042 | unsigned |
2043 | ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { |
2044 | // If this branch is likely to be folded into the comparison to form a |
2045 | // CB(N)Z, then removing it won't reduce code size at all, because that will |
2046 | // just replace the CB(N)Z with a CMP. |
2047 | if (MI.getOpcode() == ARM::t2Bcc && |
2048 | findCMPToFoldIntoCBZ(Br: &MI, TRI: &getRegisterInfo())) |
2049 | return 0; |
2050 | |
2051 | unsigned Size = getInstSizeInBytes(MI); |
2052 | |
2053 | // For Thumb2, all branches are 32-bit instructions during the if conversion |
2054 | // pass, but may be replaced with 16-bit instructions during size reduction. |
2055 | // Since the branches considered by if conversion tend to be forward branches |
2056 | // over small basic blocks, they are very likely to be in range for the |
2057 | // narrow instructions, so we assume the final code size will be half what it |
2058 | // currently is. |
2059 | if (Subtarget.isThumb2()) |
2060 | Size /= 2; |
2061 | |
2062 | return Size; |
2063 | } |
2064 | |
2065 | bool |
2066 | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
2067 | MachineBasicBlock &FMBB) const { |
2068 | // Reduce false anti-dependencies to let the target's out-of-order execution |
2069 | // engine do its thing. |
2070 | return Subtarget.isProfitableToUnpredicate(); |
2071 | } |
2072 | |
2073 | /// getInstrPredicate - If instruction is predicated, returns its predicate |
2074 | /// condition, otherwise returns AL. It also returns the condition code |
2075 | /// register by reference. |
2076 | ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, |
2077 | Register &PredReg) { |
2078 | int PIdx = MI.findFirstPredOperandIdx(); |
2079 | if (PIdx == -1) { |
2080 | PredReg = 0; |
2081 | return ARMCC::AL; |
2082 | } |
2083 | |
2084 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
2085 | return (ARMCC::CondCodes)MI.getOperand(i: PIdx).getImm(); |
2086 | } |
2087 | |
2088 | unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { |
2089 | if (Opc == ARM::B) |
2090 | return ARM::Bcc; |
2091 | if (Opc == ARM::tB) |
2092 | return ARM::tBcc; |
2093 | if (Opc == ARM::t2B) |
2094 | return ARM::t2Bcc; |
2095 | |
2096 | llvm_unreachable("Unknown unconditional branch opcode!" ); |
2097 | } |
2098 | |
2099 | MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, |
2100 | bool NewMI, |
2101 | unsigned OpIdx1, |
2102 | unsigned OpIdx2) const { |
2103 | switch (MI.getOpcode()) { |
2104 | case ARM::MOVCCr: |
2105 | case ARM::t2MOVCCr: { |
2106 | // MOVCC can be commuted by inverting the condition. |
2107 | Register PredReg; |
2108 | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); |
2109 | // MOVCC AL can't be inverted. Shouldn't happen. |
2110 | if (CC == ARMCC::AL || PredReg != ARM::CPSR) |
2111 | return nullptr; |
2112 | MachineInstr *CommutedMI = |
2113 | TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2114 | if (!CommutedMI) |
2115 | return nullptr; |
2116 | // After swapping the MOVCC operands, also invert the condition. |
2117 | CommutedMI->getOperand(i: CommutedMI->findFirstPredOperandIdx()) |
2118 | .setImm(ARMCC::getOppositeCondition(CC)); |
2119 | return CommutedMI; |
2120 | } |
2121 | } |
2122 | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2123 | } |
2124 | |
2125 | /// Identify instructions that can be folded into a MOVCC instruction, and |
2126 | /// return the defining instruction. |
2127 | MachineInstr * |
2128 | ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, |
2129 | const TargetInstrInfo *TII) const { |
2130 | if (!Reg.isVirtual()) |
2131 | return nullptr; |
2132 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2133 | return nullptr; |
2134 | MachineInstr *MI = MRI.getVRegDef(Reg); |
2135 | if (!MI) |
2136 | return nullptr; |
2137 | // Check if MI can be predicated and folded into the MOVCC. |
2138 | if (!isPredicable(MI: *MI)) |
2139 | return nullptr; |
2140 | // Check if MI has any non-dead defs or physreg uses. This also detects |
2141 | // predicated instructions which will be reading CPSR. |
2142 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 1)) { |
2143 | // Reject frame index operands, PEI can't handle the predicated pseudos. |
2144 | if (MO.isFI() || MO.isCPI() || MO.isJTI()) |
2145 | return nullptr; |
2146 | if (!MO.isReg()) |
2147 | continue; |
2148 | // MI can't have any tied operands, that would conflict with predication. |
2149 | if (MO.isTied()) |
2150 | return nullptr; |
2151 | if (MO.getReg().isPhysical()) |
2152 | return nullptr; |
2153 | if (MO.isDef() && !MO.isDead()) |
2154 | return nullptr; |
2155 | } |
2156 | bool DontMoveAcrossStores = true; |
2157 | if (!MI->isSafeToMove(SawStore&: DontMoveAcrossStores)) |
2158 | return nullptr; |
2159 | return MI; |
2160 | } |
2161 | |
2162 | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, |
2163 | SmallVectorImpl<MachineOperand> &Cond, |
2164 | unsigned &TrueOp, unsigned &FalseOp, |
2165 | bool &Optimizable) const { |
2166 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2167 | "Unknown select instruction" ); |
2168 | // MOVCC operands: |
2169 | // 0: Def. |
2170 | // 1: True use. |
2171 | // 2: False use. |
2172 | // 3: Condition code. |
2173 | // 4: CPSR use. |
2174 | TrueOp = 1; |
2175 | FalseOp = 2; |
2176 | Cond.push_back(Elt: MI.getOperand(i: 3)); |
2177 | Cond.push_back(Elt: MI.getOperand(i: 4)); |
2178 | // We can always fold a def. |
2179 | Optimizable = true; |
2180 | return false; |
2181 | } |
2182 | |
2183 | MachineInstr * |
2184 | ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, |
2185 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
2186 | bool PreferFalse) const { |
2187 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2188 | "Unknown select instruction" ); |
2189 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
2190 | MachineInstr *DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 2).getReg(), MRI, TII: this); |
2191 | bool Invert = !DefMI; |
2192 | if (!DefMI) |
2193 | DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 1).getReg(), MRI, TII: this); |
2194 | if (!DefMI) |
2195 | return nullptr; |
2196 | |
2197 | // Find new register class to use. |
2198 | MachineOperand FalseReg = MI.getOperand(i: Invert ? 2 : 1); |
2199 | MachineOperand TrueReg = MI.getOperand(i: Invert ? 1 : 2); |
2200 | Register DestReg = MI.getOperand(i: 0).getReg(); |
2201 | const TargetRegisterClass *FalseClass = MRI.getRegClass(Reg: FalseReg.getReg()); |
2202 | const TargetRegisterClass *TrueClass = MRI.getRegClass(Reg: TrueReg.getReg()); |
2203 | if (!MRI.constrainRegClass(Reg: DestReg, RC: FalseClass)) |
2204 | return nullptr; |
2205 | if (!MRI.constrainRegClass(Reg: DestReg, RC: TrueClass)) |
2206 | return nullptr; |
2207 | |
2208 | // Create a new predicated version of DefMI. |
2209 | // Rfalse is the first use. |
2210 | MachineInstrBuilder NewMI = |
2211 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: DefMI->getDesc(), DestReg); |
2212 | |
2213 | // Copy all the DefMI operands, excluding its (null) predicate. |
2214 | const MCInstrDesc &DefDesc = DefMI->getDesc(); |
2215 | for (unsigned i = 1, e = DefDesc.getNumOperands(); |
2216 | i != e && !DefDesc.operands()[i].isPredicate(); ++i) |
2217 | NewMI.add(MO: DefMI->getOperand(i)); |
2218 | |
2219 | unsigned CondCode = MI.getOperand(i: 3).getImm(); |
2220 | if (Invert) |
2221 | NewMI.addImm(Val: ARMCC::getOppositeCondition(CC: ARMCC::CondCodes(CondCode))); |
2222 | else |
2223 | NewMI.addImm(Val: CondCode); |
2224 | NewMI.add(MO: MI.getOperand(i: 4)); |
2225 | |
2226 | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. |
2227 | if (NewMI->hasOptionalDef()) |
2228 | NewMI.add(MO: condCodeOp()); |
2229 | |
2230 | // The output register value when the predicate is false is an implicit |
2231 | // register operand tied to the first def. |
2232 | // The tie makes the register allocator ensure the FalseReg is allocated the |
2233 | // same register as operand 0. |
2234 | FalseReg.setImplicit(); |
2235 | NewMI.add(MO: FalseReg); |
2236 | NewMI->tieOperands(DefIdx: 0, UseIdx: NewMI->getNumOperands() - 1); |
2237 | |
2238 | // Update SeenMIs set: register newly created MI and erase removed DefMI. |
2239 | SeenMIs.insert(Ptr: NewMI); |
2240 | SeenMIs.erase(Ptr: DefMI); |
2241 | |
2242 | // If MI is inside a loop, and DefMI is outside the loop, then kill flags on |
2243 | // DefMI would be invalid when tranferred inside the loop. Checking for a |
2244 | // loop is expensive, but at least remove kill flags if they are in different |
2245 | // BBs. |
2246 | if (DefMI->getParent() != MI.getParent()) |
2247 | NewMI->clearKillInfo(); |
2248 | |
2249 | // The caller will erase MI, but not DefMI. |
2250 | DefMI->eraseFromParent(); |
2251 | return NewMI; |
2252 | } |
2253 | |
2254 | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the |
2255 | /// instruction is encoded with an 'S' bit is determined by the optional CPSR |
2256 | /// def operand. |
2257 | /// |
2258 | /// This will go away once we can teach tblgen how to set the optional CPSR def |
2259 | /// operand itself. |
2260 | struct AddSubFlagsOpcodePair { |
2261 | uint16_t PseudoOpc; |
2262 | uint16_t MachineOpc; |
2263 | }; |
2264 | |
2265 | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { |
2266 | {.PseudoOpc: ARM::ADDSri, .MachineOpc: ARM::ADDri}, |
2267 | {.PseudoOpc: ARM::ADDSrr, .MachineOpc: ARM::ADDrr}, |
2268 | {.PseudoOpc: ARM::ADDSrsi, .MachineOpc: ARM::ADDrsi}, |
2269 | {.PseudoOpc: ARM::ADDSrsr, .MachineOpc: ARM::ADDrsr}, |
2270 | |
2271 | {.PseudoOpc: ARM::SUBSri, .MachineOpc: ARM::SUBri}, |
2272 | {.PseudoOpc: ARM::SUBSrr, .MachineOpc: ARM::SUBrr}, |
2273 | {.PseudoOpc: ARM::SUBSrsi, .MachineOpc: ARM::SUBrsi}, |
2274 | {.PseudoOpc: ARM::SUBSrsr, .MachineOpc: ARM::SUBrsr}, |
2275 | |
2276 | {.PseudoOpc: ARM::RSBSri, .MachineOpc: ARM::RSBri}, |
2277 | {.PseudoOpc: ARM::RSBSrsi, .MachineOpc: ARM::RSBrsi}, |
2278 | {.PseudoOpc: ARM::RSBSrsr, .MachineOpc: ARM::RSBrsr}, |
2279 | |
2280 | {.PseudoOpc: ARM::tADDSi3, .MachineOpc: ARM::tADDi3}, |
2281 | {.PseudoOpc: ARM::tADDSi8, .MachineOpc: ARM::tADDi8}, |
2282 | {.PseudoOpc: ARM::tADDSrr, .MachineOpc: ARM::tADDrr}, |
2283 | {.PseudoOpc: ARM::tADCS, .MachineOpc: ARM::tADC}, |
2284 | |
2285 | {.PseudoOpc: ARM::tSUBSi3, .MachineOpc: ARM::tSUBi3}, |
2286 | {.PseudoOpc: ARM::tSUBSi8, .MachineOpc: ARM::tSUBi8}, |
2287 | {.PseudoOpc: ARM::tSUBSrr, .MachineOpc: ARM::tSUBrr}, |
2288 | {.PseudoOpc: ARM::tSBCS, .MachineOpc: ARM::tSBC}, |
2289 | {.PseudoOpc: ARM::tRSBS, .MachineOpc: ARM::tRSB}, |
2290 | {.PseudoOpc: ARM::tLSLSri, .MachineOpc: ARM::tLSLri}, |
2291 | |
2292 | {.PseudoOpc: ARM::t2ADDSri, .MachineOpc: ARM::t2ADDri}, |
2293 | {.PseudoOpc: ARM::t2ADDSrr, .MachineOpc: ARM::t2ADDrr}, |
2294 | {.PseudoOpc: ARM::t2ADDSrs, .MachineOpc: ARM::t2ADDrs}, |
2295 | |
2296 | {.PseudoOpc: ARM::t2SUBSri, .MachineOpc: ARM::t2SUBri}, |
2297 | {.PseudoOpc: ARM::t2SUBSrr, .MachineOpc: ARM::t2SUBrr}, |
2298 | {.PseudoOpc: ARM::t2SUBSrs, .MachineOpc: ARM::t2SUBrs}, |
2299 | |
2300 | {.PseudoOpc: ARM::t2RSBSri, .MachineOpc: ARM::t2RSBri}, |
2301 | {.PseudoOpc: ARM::t2RSBSrs, .MachineOpc: ARM::t2RSBrs}, |
2302 | }; |
2303 | |
2304 | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { |
2305 | for (const auto &Entry : AddSubFlagsOpcodeMap) |
2306 | if (OldOpc == Entry.PseudoOpc) |
2307 | return Entry.MachineOpc; |
2308 | return 0; |
2309 | } |
2310 | |
2311 | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
2312 | MachineBasicBlock::iterator &MBBI, |
2313 | const DebugLoc &dl, Register DestReg, |
2314 | Register BaseReg, int NumBytes, |
2315 | ARMCC::CondCodes Pred, Register PredReg, |
2316 | const ARMBaseInstrInfo &TII, |
2317 | unsigned MIFlags) { |
2318 | if (NumBytes == 0 && DestReg != BaseReg) { |
2319 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg) |
2320 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
2321 | .add(MOs: predOps(Pred, PredReg)) |
2322 | .add(MO: condCodeOp()) |
2323 | .setMIFlags(MIFlags); |
2324 | return; |
2325 | } |
2326 | |
2327 | bool isSub = NumBytes < 0; |
2328 | if (isSub) NumBytes = -NumBytes; |
2329 | |
2330 | while (NumBytes) { |
2331 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: NumBytes); |
2332 | unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
2333 | assert(ThisVal && "Didn't extract field correctly" ); |
2334 | |
2335 | // We will handle these bits from offset, clear them. |
2336 | NumBytes &= ~ThisVal; |
2337 | |
2338 | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?" ); |
2339 | |
2340 | // Build the new ADD / SUB. |
2341 | unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; |
2342 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
2343 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
2344 | .addImm(Val: ThisVal) |
2345 | .add(MOs: predOps(Pred, PredReg)) |
2346 | .add(MO: condCodeOp()) |
2347 | .setMIFlags(MIFlags); |
2348 | BaseReg = DestReg; |
2349 | } |
2350 | } |
2351 | |
2352 | bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
2353 | MachineFunction &MF, MachineInstr *MI, |
2354 | unsigned NumBytes) { |
2355 | // This optimisation potentially adds lots of load and store |
2356 | // micro-operations, it's only really a great benefit to code-size. |
2357 | if (!Subtarget.hasMinSize()) |
2358 | return false; |
2359 | |
2360 | // If only one register is pushed/popped, LLVM can use an LDR/STR |
2361 | // instead. We can't modify those so make sure we're dealing with an |
2362 | // instruction we understand. |
2363 | bool IsPop = isPopOpcode(Opc: MI->getOpcode()); |
2364 | bool IsPush = isPushOpcode(Opc: MI->getOpcode()); |
2365 | if (!IsPush && !IsPop) |
2366 | return false; |
2367 | |
2368 | bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || |
2369 | MI->getOpcode() == ARM::VLDMDIA_UPD; |
2370 | bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || |
2371 | MI->getOpcode() == ARM::tPOP || |
2372 | MI->getOpcode() == ARM::tPOP_RET; |
2373 | |
2374 | assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && |
2375 | MI->getOperand(1).getReg() == ARM::SP)) && |
2376 | "trying to fold sp update into non-sp-updating push/pop" ); |
2377 | |
2378 | // The VFP push & pop act on D-registers, so we can only fold an adjustment |
2379 | // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try |
2380 | // if this is violated. |
2381 | if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) |
2382 | return false; |
2383 | |
2384 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ |
2385 | // pred) so the list starts at 4. Thumb1 starts after the predicate. |
2386 | int RegListIdx = IsT1PushPop ? 2 : 4; |
2387 | |
2388 | // Calculate the space we'll need in terms of registers. |
2389 | unsigned RegsNeeded; |
2390 | const TargetRegisterClass *RegClass; |
2391 | if (IsVFPPushPop) { |
2392 | RegsNeeded = NumBytes / 8; |
2393 | RegClass = &ARM::DPRRegClass; |
2394 | } else { |
2395 | RegsNeeded = NumBytes / 4; |
2396 | RegClass = &ARM::GPRRegClass; |
2397 | } |
2398 | |
2399 | // We're going to have to strip all list operands off before |
2400 | // re-adding them since the order matters, so save the existing ones |
2401 | // for later. |
2402 | SmallVector<MachineOperand, 4> RegList; |
2403 | |
2404 | // We're also going to need the first register transferred by this |
2405 | // instruction, which won't necessarily be the first register in the list. |
2406 | unsigned FirstRegEnc = -1; |
2407 | |
2408 | const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); |
2409 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { |
2410 | MachineOperand &MO = MI->getOperand(i); |
2411 | RegList.push_back(Elt: MO); |
2412 | |
2413 | if (MO.isReg() && !MO.isImplicit() && |
2414 | TRI->getEncodingValue(Reg: MO.getReg()) < FirstRegEnc) |
2415 | FirstRegEnc = TRI->getEncodingValue(Reg: MO.getReg()); |
2416 | } |
2417 | |
2418 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF: &MF); |
2419 | |
2420 | // Now try to find enough space in the reglist to allocate NumBytes. |
2421 | for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; |
2422 | --CurRegEnc) { |
2423 | MCRegister CurReg = RegClass->getRegister(i: CurRegEnc); |
2424 | if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(Reg: ARM::R7)) |
2425 | continue; |
2426 | if (!IsPop) { |
2427 | // Pushing any register is completely harmless, mark the register involved |
2428 | // as undef since we don't care about its value and must not restore it |
2429 | // during stack unwinding. |
2430 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: false, isImp: false, |
2431 | isKill: false, isDead: false, isUndef: true)); |
2432 | --RegsNeeded; |
2433 | continue; |
2434 | } |
2435 | |
2436 | // However, we can only pop an extra register if it's not live. For |
2437 | // registers live within the function we might clobber a return value |
2438 | // register; the other way a register can be live here is if it's |
2439 | // callee-saved. |
2440 | if (isCalleeSavedRegister(Reg: CurReg, CSRegs) || |
2441 | MI->getParent()->computeRegisterLiveness(TRI, Reg: CurReg, Before: MI) != |
2442 | MachineBasicBlock::LQR_Dead) { |
2443 | // VFP pops don't allow holes in the register list, so any skip is fatal |
2444 | // for our transformation. GPR pops do, so we should just keep looking. |
2445 | if (IsVFPPushPop) |
2446 | return false; |
2447 | else |
2448 | continue; |
2449 | } |
2450 | |
2451 | // Mark the unimportant registers as <def,dead> in the POP. |
2452 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: true, isImp: false, isKill: false, |
2453 | isDead: true)); |
2454 | --RegsNeeded; |
2455 | } |
2456 | |
2457 | if (RegsNeeded > 0) |
2458 | return false; |
2459 | |
2460 | // Finally we know we can profitably perform the optimisation so go |
2461 | // ahead: strip all existing registers off and add them back again |
2462 | // in the right order. |
2463 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) |
2464 | MI->removeOperand(OpNo: i); |
2465 | |
2466 | // Add the complete list back in. |
2467 | MachineInstrBuilder MIB(MF, &*MI); |
2468 | for (const MachineOperand &MO : llvm::reverse(C&: RegList)) |
2469 | MIB.add(MO); |
2470 | |
2471 | return true; |
2472 | } |
2473 | |
2474 | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
2475 | Register FrameReg, int &Offset, |
2476 | const ARMBaseInstrInfo &TII) { |
2477 | unsigned Opcode = MI.getOpcode(); |
2478 | const MCInstrDesc &Desc = MI.getDesc(); |
2479 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
2480 | bool isSub = false; |
2481 | |
2482 | // Memory operands in inline assembly always use AddrMode2. |
2483 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
2484 | AddrMode = ARMII::AddrMode2; |
2485 | |
2486 | if (Opcode == ARM::ADDri) { |
2487 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
2488 | if (Offset == 0) { |
2489 | // Turn it into a move. |
2490 | MI.setDesc(TII.get(Opcode: ARM::MOVr)); |
2491 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2492 | MI.removeOperand(OpNo: FrameRegIdx+1); |
2493 | Offset = 0; |
2494 | return true; |
2495 | } else if (Offset < 0) { |
2496 | Offset = -Offset; |
2497 | isSub = true; |
2498 | MI.setDesc(TII.get(Opcode: ARM::SUBri)); |
2499 | } |
2500 | |
2501 | // Common case: small offset, fits into instruction. |
2502 | if (ARM_AM::getSOImmVal(Arg: Offset) != -1) { |
2503 | // Replace the FrameIndex with sp / fp |
2504 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2505 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
2506 | Offset = 0; |
2507 | return true; |
2508 | } |
2509 | |
2510 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri |
2511 | // as possible. |
2512 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: Offset); |
2513 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
2514 | |
2515 | // We will handle these bits from offset, clear them. |
2516 | Offset &= ~ThisImmVal; |
2517 | |
2518 | // Get the properly encoded SOImmVal field. |
2519 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && |
2520 | "Bit extraction didn't work?" ); |
2521 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
2522 | } else { |
2523 | unsigned ImmIdx = 0; |
2524 | int InstrOffs = 0; |
2525 | unsigned NumBits = 0; |
2526 | unsigned Scale = 1; |
2527 | switch (AddrMode) { |
2528 | case ARMII::AddrMode_i12: |
2529 | ImmIdx = FrameRegIdx + 1; |
2530 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
2531 | NumBits = 12; |
2532 | break; |
2533 | case ARMII::AddrMode2: |
2534 | ImmIdx = FrameRegIdx+2; |
2535 | InstrOffs = ARM_AM::getAM2Offset(AM2Opc: MI.getOperand(i: ImmIdx).getImm()); |
2536 | if (ARM_AM::getAM2Op(AM2Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2537 | InstrOffs *= -1; |
2538 | NumBits = 12; |
2539 | break; |
2540 | case ARMII::AddrMode3: |
2541 | ImmIdx = FrameRegIdx+2; |
2542 | InstrOffs = ARM_AM::getAM3Offset(AM3Opc: MI.getOperand(i: ImmIdx).getImm()); |
2543 | if (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2544 | InstrOffs *= -1; |
2545 | NumBits = 8; |
2546 | break; |
2547 | case ARMII::AddrMode4: |
2548 | case ARMII::AddrMode6: |
2549 | // Can't fold any offset even if it's zero. |
2550 | return false; |
2551 | case ARMII::AddrMode5: |
2552 | ImmIdx = FrameRegIdx+1; |
2553 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
2554 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2555 | InstrOffs *= -1; |
2556 | NumBits = 8; |
2557 | Scale = 4; |
2558 | break; |
2559 | case ARMII::AddrMode5FP16: |
2560 | ImmIdx = FrameRegIdx+1; |
2561 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
2562 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2563 | InstrOffs *= -1; |
2564 | NumBits = 8; |
2565 | Scale = 2; |
2566 | break; |
2567 | case ARMII::AddrModeT2_i7: |
2568 | case ARMII::AddrModeT2_i7s2: |
2569 | case ARMII::AddrModeT2_i7s4: |
2570 | ImmIdx = FrameRegIdx+1; |
2571 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
2572 | NumBits = 7; |
2573 | Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : |
2574 | AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); |
2575 | break; |
2576 | default: |
2577 | llvm_unreachable("Unsupported addressing mode!" ); |
2578 | } |
2579 | |
2580 | Offset += InstrOffs * Scale; |
2581 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
2582 | if (Offset < 0) { |
2583 | Offset = -Offset; |
2584 | isSub = true; |
2585 | } |
2586 | |
2587 | // Attempt to fold address comp. if opcode has offset bits |
2588 | if (NumBits > 0) { |
2589 | // Common case: small offset, fits into instruction. |
2590 | MachineOperand &ImmOp = MI.getOperand(i: ImmIdx); |
2591 | int ImmedOffset = Offset / Scale; |
2592 | unsigned Mask = (1 << NumBits) - 1; |
2593 | if ((unsigned)Offset <= Mask * Scale) { |
2594 | // Replace the FrameIndex with sp |
2595 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2596 | // FIXME: When addrmode2 goes away, this will simplify (like the |
2597 | // T2 version), as the LDR.i12 versions don't need the encoding |
2598 | // tricks for the offset value. |
2599 | if (isSub) { |
2600 | if (AddrMode == ARMII::AddrMode_i12) |
2601 | ImmedOffset = -ImmedOffset; |
2602 | else |
2603 | ImmedOffset |= 1 << NumBits; |
2604 | } |
2605 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
2606 | Offset = 0; |
2607 | return true; |
2608 | } |
2609 | |
2610 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. |
2611 | ImmedOffset = ImmedOffset & Mask; |
2612 | if (isSub) { |
2613 | if (AddrMode == ARMII::AddrMode_i12) |
2614 | ImmedOffset = -ImmedOffset; |
2615 | else |
2616 | ImmedOffset |= 1 << NumBits; |
2617 | } |
2618 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
2619 | Offset &= ~(Mask*Scale); |
2620 | } |
2621 | } |
2622 | |
2623 | Offset = (isSub) ? -Offset : Offset; |
2624 | return Offset == 0; |
2625 | } |
2626 | |
2627 | /// analyzeCompare - For a comparison instruction, return the source registers |
2628 | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
2629 | /// compares against in CmpValue. Return true if the comparison instruction |
2630 | /// can be analyzed. |
2631 | bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
2632 | Register &SrcReg2, int64_t &CmpMask, |
2633 | int64_t &CmpValue) const { |
2634 | switch (MI.getOpcode()) { |
2635 | default: break; |
2636 | case ARM::CMPri: |
2637 | case ARM::t2CMPri: |
2638 | case ARM::tCMPi8: |
2639 | SrcReg = MI.getOperand(i: 0).getReg(); |
2640 | SrcReg2 = 0; |
2641 | CmpMask = ~0; |
2642 | CmpValue = MI.getOperand(i: 1).getImm(); |
2643 | return true; |
2644 | case ARM::CMPrr: |
2645 | case ARM::t2CMPrr: |
2646 | case ARM::tCMPr: |
2647 | SrcReg = MI.getOperand(i: 0).getReg(); |
2648 | SrcReg2 = MI.getOperand(i: 1).getReg(); |
2649 | CmpMask = ~0; |
2650 | CmpValue = 0; |
2651 | return true; |
2652 | case ARM::TSTri: |
2653 | case ARM::t2TSTri: |
2654 | SrcReg = MI.getOperand(i: 0).getReg(); |
2655 | SrcReg2 = 0; |
2656 | CmpMask = MI.getOperand(i: 1).getImm(); |
2657 | CmpValue = 0; |
2658 | return true; |
2659 | } |
2660 | |
2661 | return false; |
2662 | } |
2663 | |
2664 | /// isSuitableForMask - Identify a suitable 'and' instruction that |
2665 | /// operates on the given source register and applies the same mask |
2666 | /// as a 'tst' instruction. Provide a limited look-through for copies. |
2667 | /// When successful, MI will hold the found instruction. |
2668 | static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, |
2669 | int CmpMask, bool CommonUse) { |
2670 | switch (MI->getOpcode()) { |
2671 | case ARM::ANDri: |
2672 | case ARM::t2ANDri: |
2673 | if (CmpMask != MI->getOperand(i: 2).getImm()) |
2674 | return false; |
2675 | if (SrcReg == MI->getOperand(i: CommonUse ? 1 : 0).getReg()) |
2676 | return true; |
2677 | break; |
2678 | } |
2679 | |
2680 | return false; |
2681 | } |
2682 | |
2683 | /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return |
2684 | /// the condition code if we modify the instructions such that flags are |
2685 | /// set by ADD(a,b,X). |
2686 | inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { |
2687 | switch (CC) { |
2688 | default: return ARMCC::AL; |
2689 | case ARMCC::HS: return ARMCC::LO; |
2690 | case ARMCC::LO: return ARMCC::HS; |
2691 | case ARMCC::VS: return ARMCC::VS; |
2692 | case ARMCC::VC: return ARMCC::VC; |
2693 | } |
2694 | } |
2695 | |
2696 | /// isRedundantFlagInstr - check whether the first instruction, whose only |
2697 | /// purpose is to update flags, can be made redundant. |
2698 | /// CMPrr can be made redundant by SUBrr if the operands are the same. |
2699 | /// CMPri can be made redundant by SUBri if the operands are the same. |
2700 | /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). |
2701 | /// This function can be extended later on. |
2702 | inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, |
2703 | Register SrcReg, Register SrcReg2, |
2704 | int64_t ImmValue, |
2705 | const MachineInstr *OI, |
2706 | bool &IsThumb1) { |
2707 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
2708 | (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && |
2709 | ((OI->getOperand(i: 1).getReg() == SrcReg && |
2710 | OI->getOperand(i: 2).getReg() == SrcReg2) || |
2711 | (OI->getOperand(i: 1).getReg() == SrcReg2 && |
2712 | OI->getOperand(i: 2).getReg() == SrcReg))) { |
2713 | IsThumb1 = false; |
2714 | return true; |
2715 | } |
2716 | |
2717 | if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && |
2718 | ((OI->getOperand(i: 2).getReg() == SrcReg && |
2719 | OI->getOperand(i: 3).getReg() == SrcReg2) || |
2720 | (OI->getOperand(i: 2).getReg() == SrcReg2 && |
2721 | OI->getOperand(i: 3).getReg() == SrcReg))) { |
2722 | IsThumb1 = true; |
2723 | return true; |
2724 | } |
2725 | |
2726 | if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && |
2727 | (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && |
2728 | OI->getOperand(i: 1).getReg() == SrcReg && |
2729 | OI->getOperand(i: 2).getImm() == ImmValue) { |
2730 | IsThumb1 = false; |
2731 | return true; |
2732 | } |
2733 | |
2734 | if (CmpI->getOpcode() == ARM::tCMPi8 && |
2735 | (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && |
2736 | OI->getOperand(i: 2).getReg() == SrcReg && |
2737 | OI->getOperand(i: 3).getImm() == ImmValue) { |
2738 | IsThumb1 = true; |
2739 | return true; |
2740 | } |
2741 | |
2742 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
2743 | (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || |
2744 | OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && |
2745 | OI->getOperand(i: 0).isReg() && OI->getOperand(i: 1).isReg() && |
2746 | OI->getOperand(i: 0).getReg() == SrcReg && |
2747 | OI->getOperand(i: 1).getReg() == SrcReg2) { |
2748 | IsThumb1 = false; |
2749 | return true; |
2750 | } |
2751 | |
2752 | if (CmpI->getOpcode() == ARM::tCMPr && |
2753 | (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || |
2754 | OI->getOpcode() == ARM::tADDrr) && |
2755 | OI->getOperand(i: 0).getReg() == SrcReg && |
2756 | OI->getOperand(i: 2).getReg() == SrcReg2) { |
2757 | IsThumb1 = true; |
2758 | return true; |
2759 | } |
2760 | |
2761 | return false; |
2762 | } |
2763 | |
2764 | static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { |
2765 | switch (MI->getOpcode()) { |
2766 | default: return false; |
2767 | case ARM::tLSLri: |
2768 | case ARM::tLSRri: |
2769 | case ARM::tLSLrr: |
2770 | case ARM::tLSRrr: |
2771 | case ARM::tSUBrr: |
2772 | case ARM::tADDrr: |
2773 | case ARM::tADDi3: |
2774 | case ARM::tADDi8: |
2775 | case ARM::tSUBi3: |
2776 | case ARM::tSUBi8: |
2777 | case ARM::tMUL: |
2778 | case ARM::tADC: |
2779 | case ARM::tSBC: |
2780 | case ARM::tRSB: |
2781 | case ARM::tAND: |
2782 | case ARM::tORR: |
2783 | case ARM::tEOR: |
2784 | case ARM::tBIC: |
2785 | case ARM::tMVN: |
2786 | case ARM::tASRri: |
2787 | case ARM::tASRrr: |
2788 | case ARM::tROR: |
2789 | IsThumb1 = true; |
2790 | [[fallthrough]]; |
2791 | case ARM::RSBrr: |
2792 | case ARM::RSBri: |
2793 | case ARM::RSCrr: |
2794 | case ARM::RSCri: |
2795 | case ARM::ADDrr: |
2796 | case ARM::ADDri: |
2797 | case ARM::ADCrr: |
2798 | case ARM::ADCri: |
2799 | case ARM::SUBrr: |
2800 | case ARM::SUBri: |
2801 | case ARM::SBCrr: |
2802 | case ARM::SBCri: |
2803 | case ARM::t2RSBri: |
2804 | case ARM::t2ADDrr: |
2805 | case ARM::t2ADDri: |
2806 | case ARM::t2ADCrr: |
2807 | case ARM::t2ADCri: |
2808 | case ARM::t2SUBrr: |
2809 | case ARM::t2SUBri: |
2810 | case ARM::t2SBCrr: |
2811 | case ARM::t2SBCri: |
2812 | case ARM::ANDrr: |
2813 | case ARM::ANDri: |
2814 | case ARM::ANDrsr: |
2815 | case ARM::ANDrsi: |
2816 | case ARM::t2ANDrr: |
2817 | case ARM::t2ANDri: |
2818 | case ARM::t2ANDrs: |
2819 | case ARM::ORRrr: |
2820 | case ARM::ORRri: |
2821 | case ARM::ORRrsr: |
2822 | case ARM::ORRrsi: |
2823 | case ARM::t2ORRrr: |
2824 | case ARM::t2ORRri: |
2825 | case ARM::t2ORRrs: |
2826 | case ARM::EORrr: |
2827 | case ARM::EORri: |
2828 | case ARM::EORrsr: |
2829 | case ARM::EORrsi: |
2830 | case ARM::t2EORrr: |
2831 | case ARM::t2EORri: |
2832 | case ARM::t2EORrs: |
2833 | case ARM::BICri: |
2834 | case ARM::BICrr: |
2835 | case ARM::BICrsi: |
2836 | case ARM::BICrsr: |
2837 | case ARM::t2BICri: |
2838 | case ARM::t2BICrr: |
2839 | case ARM::t2BICrs: |
2840 | case ARM::t2LSRri: |
2841 | case ARM::t2LSRrr: |
2842 | case ARM::t2LSLri: |
2843 | case ARM::t2LSLrr: |
2844 | case ARM::MOVsr: |
2845 | case ARM::MOVsi: |
2846 | return true; |
2847 | } |
2848 | } |
2849 | |
2850 | /// optimizeCompareInstr - Convert the instruction supplying the argument to the |
2851 | /// comparison into one that sets the zero bit in the flags register; |
2852 | /// Remove a redundant Compare instruction if an earlier instruction can set the |
2853 | /// flags in the same way as Compare. |
2854 | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two |
2855 | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the |
2856 | /// condition code of instructions which use the flags. |
2857 | bool ARMBaseInstrInfo::optimizeCompareInstr( |
2858 | MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, |
2859 | int64_t CmpValue, const MachineRegisterInfo *MRI) const { |
2860 | // Get the unique definition of SrcReg. |
2861 | MachineInstr *MI = MRI->getUniqueVRegDef(Reg: SrcReg); |
2862 | if (!MI) return false; |
2863 | |
2864 | // Masked compares sometimes use the same register as the corresponding 'and'. |
2865 | if (CmpMask != ~0) { |
2866 | if (!isSuitableForMask(MI, SrcReg, CmpMask, CommonUse: false) || isPredicated(MI: *MI)) { |
2867 | MI = nullptr; |
2868 | for (MachineRegisterInfo::use_instr_iterator |
2869 | UI = MRI->use_instr_begin(RegNo: SrcReg), UE = MRI->use_instr_end(); |
2870 | UI != UE; ++UI) { |
2871 | if (UI->getParent() != CmpInstr.getParent()) |
2872 | continue; |
2873 | MachineInstr *PotentialAND = &*UI; |
2874 | if (!isSuitableForMask(MI&: PotentialAND, SrcReg, CmpMask, CommonUse: true) || |
2875 | isPredicated(MI: *PotentialAND)) |
2876 | continue; |
2877 | MI = PotentialAND; |
2878 | break; |
2879 | } |
2880 | if (!MI) return false; |
2881 | } |
2882 | } |
2883 | |
2884 | // Get ready to iterate backward from CmpInstr. |
2885 | MachineBasicBlock::iterator I = CmpInstr, E = MI, |
2886 | B = CmpInstr.getParent()->begin(); |
2887 | |
2888 | // Early exit if CmpInstr is at the beginning of the BB. |
2889 | if (I == B) return false; |
2890 | |
2891 | // There are two possible candidates which can be changed to set CPSR: |
2892 | // One is MI, the other is a SUB or ADD instruction. |
2893 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or |
2894 | // ADDr[ri](r1, r2, X). |
2895 | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). |
2896 | MachineInstr *SubAdd = nullptr; |
2897 | if (SrcReg2 != 0) |
2898 | // MI is not a candidate for CMPrr. |
2899 | MI = nullptr; |
2900 | else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { |
2901 | // Conservatively refuse to convert an instruction which isn't in the same |
2902 | // BB as the comparison. |
2903 | // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. |
2904 | // Thus we cannot return here. |
2905 | if (CmpInstr.getOpcode() == ARM::CMPri || |
2906 | CmpInstr.getOpcode() == ARM::t2CMPri || |
2907 | CmpInstr.getOpcode() == ARM::tCMPi8) |
2908 | MI = nullptr; |
2909 | else |
2910 | return false; |
2911 | } |
2912 | |
2913 | bool IsThumb1 = false; |
2914 | if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) |
2915 | return false; |
2916 | |
2917 | // We also want to do this peephole for cases like this: if (a*b == 0), |
2918 | // and optimise away the CMP instruction from the generated code sequence: |
2919 | // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values |
2920 | // resulting from the select instruction, but these MOVS instructions for |
2921 | // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. |
2922 | // However, if we only have MOVS instructions in between the CMP and the |
2923 | // other instruction (the MULS in this example), then the CPSR is dead so we |
2924 | // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this |
2925 | // reordering and then continue the analysis hoping we can eliminate the |
2926 | // CMP. This peephole works on the vregs, so is still in SSA form. As a |
2927 | // consequence, the movs won't redefine/kill the MUL operands which would |
2928 | // make this reordering illegal. |
2929 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
2930 | if (MI && IsThumb1) { |
2931 | --I; |
2932 | if (I != E && !MI->readsRegister(Reg: ARM::CPSR, TRI)) { |
2933 | bool CanReorder = true; |
2934 | for (; I != E; --I) { |
2935 | if (I->getOpcode() != ARM::tMOVi8) { |
2936 | CanReorder = false; |
2937 | break; |
2938 | } |
2939 | } |
2940 | if (CanReorder) { |
2941 | MI = MI->removeFromParent(); |
2942 | E = CmpInstr; |
2943 | CmpInstr.getParent()->insert(I: E, MI); |
2944 | } |
2945 | } |
2946 | I = CmpInstr; |
2947 | E = MI; |
2948 | } |
2949 | |
2950 | // Check that CPSR isn't set between the comparison instruction and the one we |
2951 | // want to change. At the same time, search for SubAdd. |
2952 | bool SubAddIsThumb1 = false; |
2953 | do { |
2954 | const MachineInstr &Instr = *--I; |
2955 | |
2956 | // Check whether CmpInstr can be made redundant by the current instruction. |
2957 | if (isRedundantFlagInstr(CmpI: &CmpInstr, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &Instr, |
2958 | IsThumb1&: SubAddIsThumb1)) { |
2959 | SubAdd = &*I; |
2960 | break; |
2961 | } |
2962 | |
2963 | // Allow E (which was initially MI) to be SubAdd but do not search before E. |
2964 | if (I == E) |
2965 | break; |
2966 | |
2967 | if (Instr.modifiesRegister(Reg: ARM::CPSR, TRI) || |
2968 | Instr.readsRegister(Reg: ARM::CPSR, TRI)) |
2969 | // This instruction modifies or uses CPSR after the one we want to |
2970 | // change. We can't do this transformation. |
2971 | return false; |
2972 | |
2973 | if (I == B) { |
2974 | // In some cases, we scan the use-list of an instruction for an AND; |
2975 | // that AND is in the same BB, but may not be scheduled before the |
2976 | // corresponding TST. In that case, bail out. |
2977 | // |
2978 | // FIXME: We could try to reschedule the AND. |
2979 | return false; |
2980 | } |
2981 | } while (true); |
2982 | |
2983 | // Return false if no candidates exist. |
2984 | if (!MI && !SubAdd) |
2985 | return false; |
2986 | |
2987 | // If we found a SubAdd, use it as it will be closer to the CMP |
2988 | if (SubAdd) { |
2989 | MI = SubAdd; |
2990 | IsThumb1 = SubAddIsThumb1; |
2991 | } |
2992 | |
2993 | // We can't use a predicated instruction - it doesn't always write the flags. |
2994 | if (isPredicated(MI: *MI)) |
2995 | return false; |
2996 | |
2997 | // Scan forward for the use of CPSR |
2998 | // When checking against MI: if it's a conditional code that requires |
2999 | // checking of the V bit or C bit, then this is not safe to do. |
3000 | // It is safe to remove CmpInstr if CPSR is redefined or killed. |
3001 | // If we are done with the basic block, we need to check whether CPSR is |
3002 | // live-out. |
3003 | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> |
3004 | OperandsToUpdate; |
3005 | bool isSafe = false; |
3006 | I = CmpInstr; |
3007 | E = CmpInstr.getParent()->end(); |
3008 | while (!isSafe && ++I != E) { |
3009 | const MachineInstr &Instr = *I; |
3010 | for (unsigned IO = 0, EO = Instr.getNumOperands(); |
3011 | !isSafe && IO != EO; ++IO) { |
3012 | const MachineOperand &MO = Instr.getOperand(i: IO); |
3013 | if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR)) { |
3014 | isSafe = true; |
3015 | break; |
3016 | } |
3017 | if (!MO.isReg() || MO.getReg() != ARM::CPSR) |
3018 | continue; |
3019 | if (MO.isDef()) { |
3020 | isSafe = true; |
3021 | break; |
3022 | } |
3023 | // Condition code is after the operand before CPSR except for VSELs. |
3024 | ARMCC::CondCodes CC; |
3025 | bool IsInstrVSel = true; |
3026 | switch (Instr.getOpcode()) { |
3027 | default: |
3028 | IsInstrVSel = false; |
3029 | CC = (ARMCC::CondCodes)Instr.getOperand(i: IO - 1).getImm(); |
3030 | break; |
3031 | case ARM::VSELEQD: |
3032 | case ARM::VSELEQS: |
3033 | case ARM::VSELEQH: |
3034 | CC = ARMCC::EQ; |
3035 | break; |
3036 | case ARM::VSELGTD: |
3037 | case ARM::VSELGTS: |
3038 | case ARM::VSELGTH: |
3039 | CC = ARMCC::GT; |
3040 | break; |
3041 | case ARM::VSELGED: |
3042 | case ARM::VSELGES: |
3043 | case ARM::VSELGEH: |
3044 | CC = ARMCC::GE; |
3045 | break; |
3046 | case ARM::VSELVSD: |
3047 | case ARM::VSELVSS: |
3048 | case ARM::VSELVSH: |
3049 | CC = ARMCC::VS; |
3050 | break; |
3051 | } |
3052 | |
3053 | if (SubAdd) { |
3054 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based |
3055 | // on CMP needs to be updated to be based on SUB. |
3056 | // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also |
3057 | // needs to be modified. |
3058 | // Push the condition code operands to OperandsToUpdate. |
3059 | // If it is safe to remove CmpInstr, the condition code of these |
3060 | // operands will be modified. |
3061 | unsigned Opc = SubAdd->getOpcode(); |
3062 | bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || |
3063 | Opc == ARM::SUBri || Opc == ARM::t2SUBri || |
3064 | Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || |
3065 | Opc == ARM::tSUBi8; |
3066 | unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; |
3067 | if (!IsSub || |
3068 | (SrcReg2 != 0 && SubAdd->getOperand(i: OpI).getReg() == SrcReg2 && |
3069 | SubAdd->getOperand(i: OpI + 1).getReg() == SrcReg)) { |
3070 | // VSel doesn't support condition code update. |
3071 | if (IsInstrVSel) |
3072 | return false; |
3073 | // Ensure we can swap the condition. |
3074 | ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); |
3075 | if (NewCC == ARMCC::AL) |
3076 | return false; |
3077 | OperandsToUpdate.push_back( |
3078 | Elt: std::make_pair(x: &((*I).getOperand(i: IO - 1)), y&: NewCC)); |
3079 | } |
3080 | } else { |
3081 | // No SubAdd, so this is x = <op> y, z; cmp x, 0. |
3082 | switch (CC) { |
3083 | case ARMCC::EQ: // Z |
3084 | case ARMCC::NE: // Z |
3085 | case ARMCC::MI: // N |
3086 | case ARMCC::PL: // N |
3087 | case ARMCC::AL: // none |
3088 | // CPSR can be used multiple times, we should continue. |
3089 | break; |
3090 | case ARMCC::HS: // C |
3091 | case ARMCC::LO: // C |
3092 | case ARMCC::VS: // V |
3093 | case ARMCC::VC: // V |
3094 | case ARMCC::HI: // C Z |
3095 | case ARMCC::LS: // C Z |
3096 | case ARMCC::GE: // N V |
3097 | case ARMCC::LT: // N V |
3098 | case ARMCC::GT: // Z N V |
3099 | case ARMCC::LE: // Z N V |
3100 | // The instruction uses the V bit or C bit which is not safe. |
3101 | return false; |
3102 | } |
3103 | } |
3104 | } |
3105 | } |
3106 | |
3107 | // If CPSR is not killed nor re-defined, we should check whether it is |
3108 | // live-out. If it is live-out, do not optimize. |
3109 | if (!isSafe) { |
3110 | MachineBasicBlock *MBB = CmpInstr.getParent(); |
3111 | for (MachineBasicBlock *Succ : MBB->successors()) |
3112 | if (Succ->isLiveIn(Reg: ARM::CPSR)) |
3113 | return false; |
3114 | } |
3115 | |
3116 | // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always |
3117 | // set CPSR so this is represented as an explicit output) |
3118 | if (!IsThumb1) { |
3119 | unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1; |
3120 | MI->getOperand(i: CPSRRegNum).setReg(ARM::CPSR); |
3121 | MI->getOperand(i: CPSRRegNum).setIsDef(true); |
3122 | } |
3123 | assert(!isPredicated(*MI) && "Can't use flags from predicated instruction" ); |
3124 | CmpInstr.eraseFromParent(); |
3125 | |
3126 | // Modify the condition code of operands in OperandsToUpdate. |
3127 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
3128 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
3129 | for (auto &[MO, Cond] : OperandsToUpdate) |
3130 | MO->setImm(Cond); |
3131 | |
3132 | MI->clearRegisterDeads(Reg: ARM::CPSR); |
3133 | |
3134 | return true; |
3135 | } |
3136 | |
3137 | bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { |
3138 | // Do not sink MI if it might be used to optimize a redundant compare. |
3139 | // We heuristically only look at the instruction immediately following MI to |
3140 | // avoid potentially searching the entire basic block. |
3141 | if (isPredicated(MI)) |
3142 | return true; |
3143 | MachineBasicBlock::const_iterator Next = &MI; |
3144 | ++Next; |
3145 | Register SrcReg, SrcReg2; |
3146 | int64_t CmpMask, CmpValue; |
3147 | bool IsThumb1; |
3148 | if (Next != MI.getParent()->end() && |
3149 | analyzeCompare(MI: *Next, SrcReg, SrcReg2, CmpMask, CmpValue) && |
3150 | isRedundantFlagInstr(CmpI: &*Next, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &MI, IsThumb1)) |
3151 | return false; |
3152 | return true; |
3153 | } |
3154 | |
3155 | bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
3156 | Register Reg, |
3157 | MachineRegisterInfo *MRI) const { |
3158 | // Fold large immediates into add, sub, or, xor. |
3159 | unsigned DefOpc = DefMI.getOpcode(); |
3160 | if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm && |
3161 | DefOpc != ARM::tMOVi32imm) |
3162 | return false; |
3163 | if (!DefMI.getOperand(i: 1).isImm()) |
3164 | // Could be t2MOVi32imm @xx |
3165 | return false; |
3166 | |
3167 | if (!MRI->hasOneNonDBGUse(RegNo: Reg)) |
3168 | return false; |
3169 | |
3170 | const MCInstrDesc &DefMCID = DefMI.getDesc(); |
3171 | if (DefMCID.hasOptionalDef()) { |
3172 | unsigned NumOps = DefMCID.getNumOperands(); |
3173 | const MachineOperand &MO = DefMI.getOperand(i: NumOps - 1); |
3174 | if (MO.getReg() == ARM::CPSR && !MO.isDead()) |
3175 | // If DefMI defines CPSR and it is not dead, it's obviously not safe |
3176 | // to delete DefMI. |
3177 | return false; |
3178 | } |
3179 | |
3180 | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
3181 | if (UseMCID.hasOptionalDef()) { |
3182 | unsigned NumOps = UseMCID.getNumOperands(); |
3183 | if (UseMI.getOperand(i: NumOps - 1).getReg() == ARM::CPSR) |
3184 | // If the instruction sets the flag, do not attempt this optimization |
3185 | // since it may change the semantics of the code. |
3186 | return false; |
3187 | } |
3188 | |
3189 | unsigned UseOpc = UseMI.getOpcode(); |
3190 | unsigned NewUseOpc = 0; |
3191 | uint32_t ImmVal = (uint32_t)DefMI.getOperand(i: 1).getImm(); |
3192 | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; |
3193 | bool Commute = false; |
3194 | switch (UseOpc) { |
3195 | default: return false; |
3196 | case ARM::SUBrr: |
3197 | case ARM::ADDrr: |
3198 | case ARM::ORRrr: |
3199 | case ARM::EORrr: |
3200 | case ARM::t2SUBrr: |
3201 | case ARM::t2ADDrr: |
3202 | case ARM::t2ORRrr: |
3203 | case ARM::t2EORrr: { |
3204 | Commute = UseMI.getOperand(i: 2).getReg() != Reg; |
3205 | switch (UseOpc) { |
3206 | default: break; |
3207 | case ARM::ADDrr: |
3208 | case ARM::SUBrr: |
3209 | if (UseOpc == ARM::SUBrr && Commute) |
3210 | return false; |
3211 | |
3212 | // ADD/SUB are special because they're essentially the same operation, so |
3213 | // we can handle a larger range of immediates. |
3214 | if (ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
3215 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; |
3216 | else if (ARM_AM::isSOImmTwoPartVal(V: -ImmVal)) { |
3217 | ImmVal = -ImmVal; |
3218 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; |
3219 | } else |
3220 | return false; |
3221 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
3222 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
3223 | break; |
3224 | case ARM::ORRrr: |
3225 | case ARM::EORrr: |
3226 | if (!ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
3227 | return false; |
3228 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
3229 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
3230 | switch (UseOpc) { |
3231 | default: break; |
3232 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; |
3233 | case ARM::EORrr: NewUseOpc = ARM::EORri; break; |
3234 | } |
3235 | break; |
3236 | case ARM::t2ADDrr: |
3237 | case ARM::t2SUBrr: { |
3238 | if (UseOpc == ARM::t2SUBrr && Commute) |
3239 | return false; |
3240 | |
3241 | // ADD/SUB are special because they're essentially the same operation, so |
3242 | // we can handle a larger range of immediates. |
3243 | const bool ToSP = DefMI.getOperand(i: 0).getReg() == ARM::SP; |
3244 | const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
3245 | const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
3246 | if (ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
3247 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; |
3248 | else if (ARM_AM::isT2SOImmTwoPartVal(Imm: -ImmVal)) { |
3249 | ImmVal = -ImmVal; |
3250 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; |
3251 | } else |
3252 | return false; |
3253 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
3254 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
3255 | break; |
3256 | } |
3257 | case ARM::t2ORRrr: |
3258 | case ARM::t2EORrr: |
3259 | if (!ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
3260 | return false; |
3261 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
3262 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
3263 | switch (UseOpc) { |
3264 | default: break; |
3265 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; |
3266 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; |
3267 | } |
3268 | break; |
3269 | } |
3270 | } |
3271 | } |
3272 | |
3273 | unsigned OpIdx = Commute ? 2 : 1; |
3274 | Register Reg1 = UseMI.getOperand(i: OpIdx).getReg(); |
3275 | bool isKill = UseMI.getOperand(i: OpIdx).isKill(); |
3276 | const TargetRegisterClass *TRC = MRI->getRegClass(Reg); |
3277 | Register NewReg = MRI->createVirtualRegister(RegClass: TRC); |
3278 | BuildMI(BB&: *UseMI.getParent(), I&: UseMI, MIMD: UseMI.getDebugLoc(), MCID: get(Opcode: NewUseOpc), |
3279 | DestReg: NewReg) |
3280 | .addReg(RegNo: Reg1, flags: getKillRegState(B: isKill)) |
3281 | .addImm(Val: SOImmValV1) |
3282 | .add(MOs: predOps(Pred: ARMCC::AL)) |
3283 | .add(MO: condCodeOp()); |
3284 | UseMI.setDesc(get(Opcode: NewUseOpc)); |
3285 | UseMI.getOperand(i: 1).setReg(NewReg); |
3286 | UseMI.getOperand(i: 1).setIsKill(); |
3287 | UseMI.getOperand(i: 2).ChangeToImmediate(ImmVal: SOImmValV2); |
3288 | DefMI.eraseFromParent(); |
3289 | // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. |
3290 | // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. |
3291 | // Then the below code will not be needed, as the input/output register |
3292 | // classes will be rgpr or gprSP. |
3293 | // For now, we fix the UseMI operand explicitly here: |
3294 | switch(NewUseOpc){ |
3295 | case ARM::t2ADDspImm: |
3296 | case ARM::t2SUBspImm: |
3297 | case ARM::t2ADDri: |
3298 | case ARM::t2SUBri: |
3299 | MRI->constrainRegClass(Reg: UseMI.getOperand(i: 0).getReg(), RC: TRC); |
3300 | } |
3301 | return true; |
3302 | } |
3303 | |
3304 | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, |
3305 | const MachineInstr &MI) { |
3306 | switch (MI.getOpcode()) { |
3307 | default: { |
3308 | const MCInstrDesc &Desc = MI.getDesc(); |
3309 | int UOps = ItinData->getNumMicroOps(ItinClassIndx: Desc.getSchedClass()); |
3310 | assert(UOps >= 0 && "bad # UOps" ); |
3311 | return UOps; |
3312 | } |
3313 | |
3314 | case ARM::LDRrs: |
3315 | case ARM::LDRBrs: |
3316 | case ARM::STRrs: |
3317 | case ARM::STRBrs: { |
3318 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
3319 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3320 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3321 | if (!isSub && |
3322 | (ShImm == 0 || |
3323 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3324 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3325 | return 1; |
3326 | return 2; |
3327 | } |
3328 | |
3329 | case ARM::LDRH: |
3330 | case ARM::STRH: { |
3331 | if (!MI.getOperand(i: 2).getReg()) |
3332 | return 1; |
3333 | |
3334 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
3335 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3336 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3337 | if (!isSub && |
3338 | (ShImm == 0 || |
3339 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3340 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3341 | return 1; |
3342 | return 2; |
3343 | } |
3344 | |
3345 | case ARM::LDRSB: |
3346 | case ARM::LDRSH: |
3347 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 3).getImm()) == ARM_AM::sub) ? 3 : 2; |
3348 | |
3349 | case ARM::LDRSB_POST: |
3350 | case ARM::LDRSH_POST: { |
3351 | Register Rt = MI.getOperand(i: 0).getReg(); |
3352 | Register Rm = MI.getOperand(i: 3).getReg(); |
3353 | return (Rt == Rm) ? 4 : 3; |
3354 | } |
3355 | |
3356 | case ARM::LDR_PRE_REG: |
3357 | case ARM::LDRB_PRE_REG: { |
3358 | Register Rt = MI.getOperand(i: 0).getReg(); |
3359 | Register Rm = MI.getOperand(i: 3).getReg(); |
3360 | if (Rt == Rm) |
3361 | return 3; |
3362 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3363 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3364 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3365 | if (!isSub && |
3366 | (ShImm == 0 || |
3367 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3368 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3369 | return 2; |
3370 | return 3; |
3371 | } |
3372 | |
3373 | case ARM::STR_PRE_REG: |
3374 | case ARM::STRB_PRE_REG: { |
3375 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3376 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3377 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3378 | if (!isSub && |
3379 | (ShImm == 0 || |
3380 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3381 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3382 | return 2; |
3383 | return 3; |
3384 | } |
3385 | |
3386 | case ARM::LDRH_PRE: |
3387 | case ARM::STRH_PRE: { |
3388 | Register Rt = MI.getOperand(i: 0).getReg(); |
3389 | Register Rm = MI.getOperand(i: 3).getReg(); |
3390 | if (!Rm) |
3391 | return 2; |
3392 | if (Rt == Rm) |
3393 | return 3; |
3394 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 3 : 2; |
3395 | } |
3396 | |
3397 | case ARM::LDR_POST_REG: |
3398 | case ARM::LDRB_POST_REG: |
3399 | case ARM::LDRH_POST: { |
3400 | Register Rt = MI.getOperand(i: 0).getReg(); |
3401 | Register Rm = MI.getOperand(i: 3).getReg(); |
3402 | return (Rt == Rm) ? 3 : 2; |
3403 | } |
3404 | |
3405 | case ARM::LDR_PRE_IMM: |
3406 | case ARM::LDRB_PRE_IMM: |
3407 | case ARM::LDR_POST_IMM: |
3408 | case ARM::LDRB_POST_IMM: |
3409 | case ARM::STRB_POST_IMM: |
3410 | case ARM::STRB_POST_REG: |
3411 | case ARM::STRB_PRE_IMM: |
3412 | case ARM::STRH_POST: |
3413 | case ARM::STR_POST_IMM: |
3414 | case ARM::STR_POST_REG: |
3415 | case ARM::STR_PRE_IMM: |
3416 | return 2; |
3417 | |
3418 | case ARM::LDRSB_PRE: |
3419 | case ARM::LDRSH_PRE: { |
3420 | Register Rm = MI.getOperand(i: 3).getReg(); |
3421 | if (Rm == 0) |
3422 | return 3; |
3423 | Register Rt = MI.getOperand(i: 0).getReg(); |
3424 | if (Rt == Rm) |
3425 | return 4; |
3426 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3427 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3428 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3429 | if (!isSub && |
3430 | (ShImm == 0 || |
3431 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3432 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3433 | return 3; |
3434 | return 4; |
3435 | } |
3436 | |
3437 | case ARM::LDRD: { |
3438 | Register Rt = MI.getOperand(i: 0).getReg(); |
3439 | Register Rn = MI.getOperand(i: 2).getReg(); |
3440 | Register Rm = MI.getOperand(i: 3).getReg(); |
3441 | if (Rm) |
3442 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
3443 | : 3; |
3444 | return (Rt == Rn) ? 3 : 2; |
3445 | } |
3446 | |
3447 | case ARM::STRD: { |
3448 | Register Rm = MI.getOperand(i: 3).getReg(); |
3449 | if (Rm) |
3450 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
3451 | : 3; |
3452 | return 2; |
3453 | } |
3454 | |
3455 | case ARM::LDRD_POST: |
3456 | case ARM::t2LDRD_POST: |
3457 | return 3; |
3458 | |
3459 | case ARM::STRD_POST: |
3460 | case ARM::t2STRD_POST: |
3461 | return 4; |
3462 | |
3463 | case ARM::LDRD_PRE: { |
3464 | Register Rt = MI.getOperand(i: 0).getReg(); |
3465 | Register Rn = MI.getOperand(i: 3).getReg(); |
3466 | Register Rm = MI.getOperand(i: 4).getReg(); |
3467 | if (Rm) |
3468 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
3469 | : 4; |
3470 | return (Rt == Rn) ? 4 : 3; |
3471 | } |
3472 | |
3473 | case ARM::t2LDRD_PRE: { |
3474 | Register Rt = MI.getOperand(i: 0).getReg(); |
3475 | Register Rn = MI.getOperand(i: 3).getReg(); |
3476 | return (Rt == Rn) ? 4 : 3; |
3477 | } |
3478 | |
3479 | case ARM::STRD_PRE: { |
3480 | Register Rm = MI.getOperand(i: 4).getReg(); |
3481 | if (Rm) |
3482 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
3483 | : 4; |
3484 | return 3; |
3485 | } |
3486 | |
3487 | case ARM::t2STRD_PRE: |
3488 | return 3; |
3489 | |
3490 | case ARM::t2LDR_POST: |
3491 | case ARM::t2LDRB_POST: |
3492 | case ARM::t2LDRB_PRE: |
3493 | case ARM::t2LDRSBi12: |
3494 | case ARM::t2LDRSBi8: |
3495 | case ARM::t2LDRSBpci: |
3496 | case ARM::t2LDRSBs: |
3497 | case ARM::t2LDRH_POST: |
3498 | case ARM::t2LDRH_PRE: |
3499 | case ARM::t2LDRSBT: |
3500 | case ARM::t2LDRSB_POST: |
3501 | case ARM::t2LDRSB_PRE: |
3502 | case ARM::t2LDRSH_POST: |
3503 | case ARM::t2LDRSH_PRE: |
3504 | case ARM::t2LDRSHi12: |
3505 | case ARM::t2LDRSHi8: |
3506 | case ARM::t2LDRSHpci: |
3507 | case ARM::t2LDRSHs: |
3508 | return 2; |
3509 | |
3510 | case ARM::t2LDRDi8: { |
3511 | Register Rt = MI.getOperand(i: 0).getReg(); |
3512 | Register Rn = MI.getOperand(i: 2).getReg(); |
3513 | return (Rt == Rn) ? 3 : 2; |
3514 | } |
3515 | |
3516 | case ARM::t2STRB_POST: |
3517 | case ARM::t2STRB_PRE: |
3518 | case ARM::t2STRBs: |
3519 | case ARM::t2STRDi8: |
3520 | case ARM::t2STRH_POST: |
3521 | case ARM::t2STRH_PRE: |
3522 | case ARM::t2STRHs: |
3523 | case ARM::t2STR_POST: |
3524 | case ARM::t2STR_PRE: |
3525 | case ARM::t2STRs: |
3526 | return 2; |
3527 | } |
3528 | } |
3529 | |
3530 | // Return the number of 32-bit words loaded by LDM or stored by STM. If this |
3531 | // can't be easily determined return 0 (missing MachineMemOperand). |
3532 | // |
3533 | // FIXME: The current MachineInstr design does not support relying on machine |
3534 | // mem operands to determine the width of a memory access. Instead, we expect |
3535 | // the target to provide this information based on the instruction opcode and |
3536 | // operands. However, using MachineMemOperand is the best solution now for |
3537 | // two reasons: |
3538 | // |
3539 | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI |
3540 | // operands. This is much more dangerous than using the MachineMemOperand |
3541 | // sizes because CodeGen passes can insert/remove optional machine operands. In |
3542 | // fact, it's totally incorrect for preRA passes and appears to be wrong for |
3543 | // postRA passes as well. |
3544 | // |
3545 | // 2) getNumLDMAddresses is only used by the scheduling machine model and any |
3546 | // machine model that calls this should handle the unknown (zero size) case. |
3547 | // |
3548 | // Long term, we should require a target hook that verifies MachineMemOperand |
3549 | // sizes during MC lowering. That target hook should be local to MC lowering |
3550 | // because we can't ensure that it is aware of other MI forms. Doing this will |
3551 | // ensure that MachineMemOperands are correctly propagated through all passes. |
3552 | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { |
3553 | unsigned Size = 0; |
3554 | for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), |
3555 | E = MI.memoperands_end(); |
3556 | I != E; ++I) { |
3557 | Size += (*I)->getSize().getValue(); |
3558 | } |
3559 | // FIXME: The scheduler currently can't handle values larger than 16. But |
3560 | // the values can actually go up to 32 for floating-point load/store |
3561 | // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory |
3562 | // operations isn't right; we could end up with "extra" memory operands for |
3563 | // various reasons, like tail merge merging two memory operations. |
3564 | return std::min(a: Size / 4, b: 16U); |
3565 | } |
3566 | |
3567 | static unsigned (unsigned Opc, |
3568 | unsigned NumRegs) { |
3569 | unsigned UOps = 1 + NumRegs; // 1 for address computation. |
3570 | switch (Opc) { |
3571 | default: |
3572 | break; |
3573 | case ARM::VLDMDIA_UPD: |
3574 | case ARM::VLDMDDB_UPD: |
3575 | case ARM::VLDMSIA_UPD: |
3576 | case ARM::VLDMSDB_UPD: |
3577 | case ARM::VSTMDIA_UPD: |
3578 | case ARM::VSTMDDB_UPD: |
3579 | case ARM::VSTMSIA_UPD: |
3580 | case ARM::VSTMSDB_UPD: |
3581 | case ARM::LDMIA_UPD: |
3582 | case ARM::LDMDA_UPD: |
3583 | case ARM::LDMDB_UPD: |
3584 | case ARM::LDMIB_UPD: |
3585 | case ARM::STMIA_UPD: |
3586 | case ARM::STMDA_UPD: |
3587 | case ARM::STMDB_UPD: |
3588 | case ARM::STMIB_UPD: |
3589 | case ARM::tLDMIA_UPD: |
3590 | case ARM::tSTMIA_UPD: |
3591 | case ARM::t2LDMIA_UPD: |
3592 | case ARM::t2LDMDB_UPD: |
3593 | case ARM::t2STMIA_UPD: |
3594 | case ARM::t2STMDB_UPD: |
3595 | ++UOps; // One for base register writeback. |
3596 | break; |
3597 | case ARM::LDMIA_RET: |
3598 | case ARM::tPOP_RET: |
3599 | case ARM::t2LDMIA_RET: |
3600 | UOps += 2; // One for base reg wb, one for write to pc. |
3601 | break; |
3602 | } |
3603 | return UOps; |
3604 | } |
3605 | |
3606 | unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, |
3607 | const MachineInstr &MI) const { |
3608 | if (!ItinData || ItinData->isEmpty()) |
3609 | return 1; |
3610 | |
3611 | const MCInstrDesc &Desc = MI.getDesc(); |
3612 | unsigned Class = Desc.getSchedClass(); |
3613 | int ItinUOps = ItinData->getNumMicroOps(ItinClassIndx: Class); |
3614 | if (ItinUOps >= 0) { |
3615 | if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) |
3616 | return getNumMicroOpsSwiftLdSt(ItinData, MI); |
3617 | |
3618 | return ItinUOps; |
3619 | } |
3620 | |
3621 | unsigned Opc = MI.getOpcode(); |
3622 | switch (Opc) { |
3623 | default: |
3624 | llvm_unreachable("Unexpected multi-uops instruction!" ); |
3625 | case ARM::VLDMQIA: |
3626 | case ARM::VSTMQIA: |
3627 | return 2; |
3628 | |
3629 | // The number of uOps for load / store multiple are determined by the number |
3630 | // registers. |
3631 | // |
3632 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the |
3633 | // same cycle. The scheduling for the first load / store must be done |
3634 | // separately by assuming the address is not 64-bit aligned. |
3635 | // |
3636 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address |
3637 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON |
3638 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. |
3639 | case ARM::VLDMDIA: |
3640 | case ARM::VLDMDIA_UPD: |
3641 | case ARM::VLDMDDB_UPD: |
3642 | case ARM::VLDMSIA: |
3643 | case ARM::VLDMSIA_UPD: |
3644 | case ARM::VLDMSDB_UPD: |
3645 | case ARM::VSTMDIA: |
3646 | case ARM::VSTMDIA_UPD: |
3647 | case ARM::VSTMDDB_UPD: |
3648 | case ARM::VSTMSIA: |
3649 | case ARM::VSTMSIA_UPD: |
3650 | case ARM::VSTMSDB_UPD: { |
3651 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); |
3652 | return (NumRegs / 2) + (NumRegs % 2) + 1; |
3653 | } |
3654 | |
3655 | case ARM::LDMIA_RET: |
3656 | case ARM::LDMIA: |
3657 | case ARM::LDMDA: |
3658 | case ARM::LDMDB: |
3659 | case ARM::LDMIB: |
3660 | case ARM::LDMIA_UPD: |
3661 | case ARM::LDMDA_UPD: |
3662 | case ARM::LDMDB_UPD: |
3663 | case ARM::LDMIB_UPD: |
3664 | case ARM::STMIA: |
3665 | case ARM::STMDA: |
3666 | case ARM::STMDB: |
3667 | case ARM::STMIB: |
3668 | case ARM::STMIA_UPD: |
3669 | case ARM::STMDA_UPD: |
3670 | case ARM::STMDB_UPD: |
3671 | case ARM::STMIB_UPD: |
3672 | case ARM::tLDMIA: |
3673 | case ARM::tLDMIA_UPD: |
3674 | case ARM::tSTMIA_UPD: |
3675 | case ARM::tPOP_RET: |
3676 | case ARM::tPOP: |
3677 | case ARM::tPUSH: |
3678 | case ARM::t2LDMIA_RET: |
3679 | case ARM::t2LDMIA: |
3680 | case ARM::t2LDMDB: |
3681 | case ARM::t2LDMIA_UPD: |
3682 | case ARM::t2LDMDB_UPD: |
3683 | case ARM::t2STMIA: |
3684 | case ARM::t2STMDB: |
3685 | case ARM::t2STMIA_UPD: |
3686 | case ARM::t2STMDB_UPD: { |
3687 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; |
3688 | switch (Subtarget.getLdStMultipleTiming()) { |
3689 | case ARMSubtarget::SingleIssuePlusExtras: |
3690 | return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); |
3691 | case ARMSubtarget::SingleIssue: |
3692 | // Assume the worst. |
3693 | return NumRegs; |
3694 | case ARMSubtarget::DoubleIssue: { |
3695 | if (NumRegs < 4) |
3696 | return 2; |
3697 | // 4 registers would be issued: 2, 2. |
3698 | // 5 registers would be issued: 2, 2, 1. |
3699 | unsigned UOps = (NumRegs / 2); |
3700 | if (NumRegs % 2) |
3701 | ++UOps; |
3702 | return UOps; |
3703 | } |
3704 | case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { |
3705 | unsigned UOps = (NumRegs / 2); |
3706 | // If there are odd number of registers or if it's not 64-bit aligned, |
3707 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3708 | if ((NumRegs % 2) || !MI.hasOneMemOperand() || |
3709 | (*MI.memoperands_begin())->getAlign() < Align(8)) |
3710 | ++UOps; |
3711 | return UOps; |
3712 | } |
3713 | } |
3714 | } |
3715 | } |
3716 | llvm_unreachable("Didn't find the number of microops" ); |
3717 | } |
3718 | |
3719 | std::optional<unsigned> |
3720 | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, |
3721 | const MCInstrDesc &DefMCID, unsigned DefClass, |
3722 | unsigned DefIdx, unsigned DefAlign) const { |
3723 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3724 | if (RegNo <= 0) |
3725 | // Def is the address writeback. |
3726 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3727 | |
3728 | unsigned DefCycle; |
3729 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3730 | // (regno / 2) + (regno % 2) + 1 |
3731 | DefCycle = RegNo / 2 + 1; |
3732 | if (RegNo % 2) |
3733 | ++DefCycle; |
3734 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3735 | DefCycle = RegNo; |
3736 | bool isSLoad = false; |
3737 | |
3738 | switch (DefMCID.getOpcode()) { |
3739 | default: break; |
3740 | case ARM::VLDMSIA: |
3741 | case ARM::VLDMSIA_UPD: |
3742 | case ARM::VLDMSDB_UPD: |
3743 | isSLoad = true; |
3744 | break; |
3745 | } |
3746 | |
3747 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3748 | // then it takes an extra cycle. |
3749 | if ((isSLoad && (RegNo % 2)) || DefAlign < 8) |
3750 | ++DefCycle; |
3751 | } else { |
3752 | // Assume the worst. |
3753 | DefCycle = RegNo + 2; |
3754 | } |
3755 | |
3756 | return DefCycle; |
3757 | } |
3758 | |
3759 | std::optional<unsigned> |
3760 | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, |
3761 | const MCInstrDesc &DefMCID, unsigned DefClass, |
3762 | unsigned DefIdx, unsigned DefAlign) const { |
3763 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3764 | if (RegNo <= 0) |
3765 | // Def is the address writeback. |
3766 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3767 | |
3768 | unsigned DefCycle; |
3769 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3770 | // 4 registers would be issued: 1, 2, 1. |
3771 | // 5 registers would be issued: 1, 2, 2. |
3772 | DefCycle = RegNo / 2; |
3773 | if (DefCycle < 1) |
3774 | DefCycle = 1; |
3775 | // Result latency is issue cycle + 2: E2. |
3776 | DefCycle += 2; |
3777 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3778 | DefCycle = (RegNo / 2); |
3779 | // If there are odd number of registers or if it's not 64-bit aligned, |
3780 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3781 | if ((RegNo % 2) || DefAlign < 8) |
3782 | ++DefCycle; |
3783 | // Result latency is AGU cycles + 2. |
3784 | DefCycle += 2; |
3785 | } else { |
3786 | // Assume the worst. |
3787 | DefCycle = RegNo + 2; |
3788 | } |
3789 | |
3790 | return DefCycle; |
3791 | } |
3792 | |
3793 | std::optional<unsigned> |
3794 | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, |
3795 | const MCInstrDesc &UseMCID, unsigned UseClass, |
3796 | unsigned UseIdx, unsigned UseAlign) const { |
3797 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3798 | if (RegNo <= 0) |
3799 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3800 | |
3801 | unsigned UseCycle; |
3802 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3803 | // (regno / 2) + (regno % 2) + 1 |
3804 | UseCycle = RegNo / 2 + 1; |
3805 | if (RegNo % 2) |
3806 | ++UseCycle; |
3807 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3808 | UseCycle = RegNo; |
3809 | bool isSStore = false; |
3810 | |
3811 | switch (UseMCID.getOpcode()) { |
3812 | default: break; |
3813 | case ARM::VSTMSIA: |
3814 | case ARM::VSTMSIA_UPD: |
3815 | case ARM::VSTMSDB_UPD: |
3816 | isSStore = true; |
3817 | break; |
3818 | } |
3819 | |
3820 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3821 | // then it takes an extra cycle. |
3822 | if ((isSStore && (RegNo % 2)) || UseAlign < 8) |
3823 | ++UseCycle; |
3824 | } else { |
3825 | // Assume the worst. |
3826 | UseCycle = RegNo + 2; |
3827 | } |
3828 | |
3829 | return UseCycle; |
3830 | } |
3831 | |
3832 | std::optional<unsigned> |
3833 | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, |
3834 | const MCInstrDesc &UseMCID, unsigned UseClass, |
3835 | unsigned UseIdx, unsigned UseAlign) const { |
3836 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3837 | if (RegNo <= 0) |
3838 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3839 | |
3840 | unsigned UseCycle; |
3841 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3842 | UseCycle = RegNo / 2; |
3843 | if (UseCycle < 2) |
3844 | UseCycle = 2; |
3845 | // Read in E3. |
3846 | UseCycle += 2; |
3847 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3848 | UseCycle = (RegNo / 2); |
3849 | // If there are odd number of registers or if it's not 64-bit aligned, |
3850 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3851 | if ((RegNo % 2) || UseAlign < 8) |
3852 | ++UseCycle; |
3853 | } else { |
3854 | // Assume the worst. |
3855 | UseCycle = 1; |
3856 | } |
3857 | return UseCycle; |
3858 | } |
3859 | |
3860 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
3861 | const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, |
3862 | unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID, |
3863 | unsigned UseIdx, unsigned UseAlign) const { |
3864 | unsigned DefClass = DefMCID.getSchedClass(); |
3865 | unsigned UseClass = UseMCID.getSchedClass(); |
3866 | |
3867 | if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) |
3868 | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); |
3869 | |
3870 | // This may be a def / use of a variable_ops instruction, the operand |
3871 | // latency might be determinable dynamically. Let the target try to |
3872 | // figure it out. |
3873 | std::optional<unsigned> DefCycle; |
3874 | bool LdmBypass = false; |
3875 | switch (DefMCID.getOpcode()) { |
3876 | default: |
3877 | DefCycle = ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3878 | break; |
3879 | |
3880 | case ARM::VLDMDIA: |
3881 | case ARM::VLDMDIA_UPD: |
3882 | case ARM::VLDMDDB_UPD: |
3883 | case ARM::VLDMSIA: |
3884 | case ARM::VLDMSIA_UPD: |
3885 | case ARM::VLDMSDB_UPD: |
3886 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
3887 | break; |
3888 | |
3889 | case ARM::LDMIA_RET: |
3890 | case ARM::LDMIA: |
3891 | case ARM::LDMDA: |
3892 | case ARM::LDMDB: |
3893 | case ARM::LDMIB: |
3894 | case ARM::LDMIA_UPD: |
3895 | case ARM::LDMDA_UPD: |
3896 | case ARM::LDMDB_UPD: |
3897 | case ARM::LDMIB_UPD: |
3898 | case ARM::tLDMIA: |
3899 | case ARM::tLDMIA_UPD: |
3900 | case ARM::tPUSH: |
3901 | case ARM::t2LDMIA_RET: |
3902 | case ARM::t2LDMIA: |
3903 | case ARM::t2LDMDB: |
3904 | case ARM::t2LDMIA_UPD: |
3905 | case ARM::t2LDMDB_UPD: |
3906 | LdmBypass = true; |
3907 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
3908 | break; |
3909 | } |
3910 | |
3911 | if (!DefCycle) |
3912 | // We can't seem to determine the result latency of the def, assume it's 2. |
3913 | DefCycle = 2; |
3914 | |
3915 | std::optional<unsigned> UseCycle; |
3916 | switch (UseMCID.getOpcode()) { |
3917 | default: |
3918 | UseCycle = ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3919 | break; |
3920 | |
3921 | case ARM::VSTMDIA: |
3922 | case ARM::VSTMDIA_UPD: |
3923 | case ARM::VSTMDDB_UPD: |
3924 | case ARM::VSTMSIA: |
3925 | case ARM::VSTMSIA_UPD: |
3926 | case ARM::VSTMSDB_UPD: |
3927 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
3928 | break; |
3929 | |
3930 | case ARM::STMIA: |
3931 | case ARM::STMDA: |
3932 | case ARM::STMDB: |
3933 | case ARM::STMIB: |
3934 | case ARM::STMIA_UPD: |
3935 | case ARM::STMDA_UPD: |
3936 | case ARM::STMDB_UPD: |
3937 | case ARM::STMIB_UPD: |
3938 | case ARM::tSTMIA_UPD: |
3939 | case ARM::tPOP_RET: |
3940 | case ARM::tPOP: |
3941 | case ARM::t2STMIA: |
3942 | case ARM::t2STMDB: |
3943 | case ARM::t2STMIA_UPD: |
3944 | case ARM::t2STMDB_UPD: |
3945 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
3946 | break; |
3947 | } |
3948 | |
3949 | if (!UseCycle) |
3950 | // Assume it's read in the first stage. |
3951 | UseCycle = 1; |
3952 | |
3953 | if (UseCycle > *DefCycle + 1) |
3954 | return std::nullopt; |
3955 | |
3956 | UseCycle = *DefCycle - *UseCycle + 1; |
3957 | if (UseCycle > 0u) { |
3958 | if (LdmBypass) { |
3959 | // It's a variable_ops instruction so we can't use DefIdx here. Just use |
3960 | // first def operand. |
3961 | if (ItinData->hasPipelineForwarding(DefClass, DefIdx: DefMCID.getNumOperands()-1, |
3962 | UseClass, UseIdx)) |
3963 | UseCycle = *UseCycle - 1; |
3964 | } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, |
3965 | UseClass, UseIdx)) { |
3966 | UseCycle = *UseCycle - 1; |
3967 | } |
3968 | } |
3969 | |
3970 | return UseCycle; |
3971 | } |
3972 | |
3973 | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, |
3974 | const MachineInstr *MI, unsigned Reg, |
3975 | unsigned &DefIdx, unsigned &Dist) { |
3976 | Dist = 0; |
3977 | |
3978 | MachineBasicBlock::const_iterator I = MI; ++I; |
3979 | MachineBasicBlock::const_instr_iterator II = std::prev(x: I.getInstrIterator()); |
3980 | assert(II->isInsideBundle() && "Empty bundle?" ); |
3981 | |
3982 | int Idx = -1; |
3983 | while (II->isInsideBundle()) { |
3984 | Idx = II->findRegisterDefOperandIdx(Reg, TRI, isDead: false, Overlap: true); |
3985 | if (Idx != -1) |
3986 | break; |
3987 | --II; |
3988 | ++Dist; |
3989 | } |
3990 | |
3991 | assert(Idx != -1 && "Cannot find bundled definition!" ); |
3992 | DefIdx = Idx; |
3993 | return &*II; |
3994 | } |
3995 | |
3996 | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, |
3997 | const MachineInstr &MI, unsigned Reg, |
3998 | unsigned &UseIdx, unsigned &Dist) { |
3999 | Dist = 0; |
4000 | |
4001 | MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); |
4002 | assert(II->isInsideBundle() && "Empty bundle?" ); |
4003 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4004 | |
4005 | // FIXME: This doesn't properly handle multiple uses. |
4006 | int Idx = -1; |
4007 | while (II != E && II->isInsideBundle()) { |
4008 | Idx = II->findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
4009 | if (Idx != -1) |
4010 | break; |
4011 | if (II->getOpcode() != ARM::t2IT) |
4012 | ++Dist; |
4013 | ++II; |
4014 | } |
4015 | |
4016 | if (Idx == -1) { |
4017 | Dist = 0; |
4018 | return nullptr; |
4019 | } |
4020 | |
4021 | UseIdx = Idx; |
4022 | return &*II; |
4023 | } |
4024 | |
4025 | /// Return the number of cycles to add to (or subtract from) the static |
4026 | /// itinerary based on the def opcode and alignment. The caller will ensure that |
4027 | /// adjusted latency is at least one cycle. |
4028 | static int adjustDefLatency(const ARMSubtarget &Subtarget, |
4029 | const MachineInstr &DefMI, |
4030 | const MCInstrDesc &DefMCID, unsigned DefAlign) { |
4031 | int Adjust = 0; |
4032 | if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { |
4033 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4034 | // variants are one cycle cheaper. |
4035 | switch (DefMCID.getOpcode()) { |
4036 | default: break; |
4037 | case ARM::LDRrs: |
4038 | case ARM::LDRBrs: { |
4039 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
4040 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4041 | if (ShImm == 0 || |
4042 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4043 | --Adjust; |
4044 | break; |
4045 | } |
4046 | case ARM::t2LDRs: |
4047 | case ARM::t2LDRBs: |
4048 | case ARM::t2LDRHs: |
4049 | case ARM::t2LDRSHs: { |
4050 | // Thumb2 mode: lsl only. |
4051 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
4052 | if (ShAmt == 0 || ShAmt == 2) |
4053 | --Adjust; |
4054 | break; |
4055 | } |
4056 | } |
4057 | } else if (Subtarget.isSwift()) { |
4058 | // FIXME: Properly handle all of the latency adjustments for address |
4059 | // writeback. |
4060 | switch (DefMCID.getOpcode()) { |
4061 | default: break; |
4062 | case ARM::LDRrs: |
4063 | case ARM::LDRBrs: { |
4064 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
4065 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
4066 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4067 | if (!isSub && |
4068 | (ShImm == 0 || |
4069 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4070 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
4071 | Adjust -= 2; |
4072 | else if (!isSub && |
4073 | ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
4074 | --Adjust; |
4075 | break; |
4076 | } |
4077 | case ARM::t2LDRs: |
4078 | case ARM::t2LDRBs: |
4079 | case ARM::t2LDRHs: |
4080 | case ARM::t2LDRSHs: { |
4081 | // Thumb2 mode: lsl only. |
4082 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
4083 | if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) |
4084 | Adjust -= 2; |
4085 | break; |
4086 | } |
4087 | } |
4088 | } |
4089 | |
4090 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { |
4091 | switch (DefMCID.getOpcode()) { |
4092 | default: break; |
4093 | case ARM::VLD1q8: |
4094 | case ARM::VLD1q16: |
4095 | case ARM::VLD1q32: |
4096 | case ARM::VLD1q64: |
4097 | case ARM::VLD1q8wb_fixed: |
4098 | case ARM::VLD1q16wb_fixed: |
4099 | case ARM::VLD1q32wb_fixed: |
4100 | case ARM::VLD1q64wb_fixed: |
4101 | case ARM::VLD1q8wb_register: |
4102 | case ARM::VLD1q16wb_register: |
4103 | case ARM::VLD1q32wb_register: |
4104 | case ARM::VLD1q64wb_register: |
4105 | case ARM::VLD2d8: |
4106 | case ARM::VLD2d16: |
4107 | case ARM::VLD2d32: |
4108 | case ARM::VLD2q8: |
4109 | case ARM::VLD2q16: |
4110 | case ARM::VLD2q32: |
4111 | case ARM::VLD2d8wb_fixed: |
4112 | case ARM::VLD2d16wb_fixed: |
4113 | case ARM::VLD2d32wb_fixed: |
4114 | case ARM::VLD2q8wb_fixed: |
4115 | case ARM::VLD2q16wb_fixed: |
4116 | case ARM::VLD2q32wb_fixed: |
4117 | case ARM::VLD2d8wb_register: |
4118 | case ARM::VLD2d16wb_register: |
4119 | case ARM::VLD2d32wb_register: |
4120 | case ARM::VLD2q8wb_register: |
4121 | case ARM::VLD2q16wb_register: |
4122 | case ARM::VLD2q32wb_register: |
4123 | case ARM::VLD3d8: |
4124 | case ARM::VLD3d16: |
4125 | case ARM::VLD3d32: |
4126 | case ARM::VLD1d64T: |
4127 | case ARM::VLD3d8_UPD: |
4128 | case ARM::VLD3d16_UPD: |
4129 | case ARM::VLD3d32_UPD: |
4130 | case ARM::VLD1d64Twb_fixed: |
4131 | case ARM::VLD1d64Twb_register: |
4132 | case ARM::VLD3q8_UPD: |
4133 | case ARM::VLD3q16_UPD: |
4134 | case ARM::VLD3q32_UPD: |
4135 | case ARM::VLD4d8: |
4136 | case ARM::VLD4d16: |
4137 | case ARM::VLD4d32: |
4138 | case ARM::VLD1d64Q: |
4139 | case ARM::VLD4d8_UPD: |
4140 | case ARM::VLD4d16_UPD: |
4141 | case ARM::VLD4d32_UPD: |
4142 | case ARM::VLD1d64Qwb_fixed: |
4143 | case ARM::VLD1d64Qwb_register: |
4144 | case ARM::VLD4q8_UPD: |
4145 | case ARM::VLD4q16_UPD: |
4146 | case ARM::VLD4q32_UPD: |
4147 | case ARM::VLD1DUPq8: |
4148 | case ARM::VLD1DUPq16: |
4149 | case ARM::VLD1DUPq32: |
4150 | case ARM::VLD1DUPq8wb_fixed: |
4151 | case ARM::VLD1DUPq16wb_fixed: |
4152 | case ARM::VLD1DUPq32wb_fixed: |
4153 | case ARM::VLD1DUPq8wb_register: |
4154 | case ARM::VLD1DUPq16wb_register: |
4155 | case ARM::VLD1DUPq32wb_register: |
4156 | case ARM::VLD2DUPd8: |
4157 | case ARM::VLD2DUPd16: |
4158 | case ARM::VLD2DUPd32: |
4159 | case ARM::VLD2DUPd8wb_fixed: |
4160 | case ARM::VLD2DUPd16wb_fixed: |
4161 | case ARM::VLD2DUPd32wb_fixed: |
4162 | case ARM::VLD2DUPd8wb_register: |
4163 | case ARM::VLD2DUPd16wb_register: |
4164 | case ARM::VLD2DUPd32wb_register: |
4165 | case ARM::VLD4DUPd8: |
4166 | case ARM::VLD4DUPd16: |
4167 | case ARM::VLD4DUPd32: |
4168 | case ARM::VLD4DUPd8_UPD: |
4169 | case ARM::VLD4DUPd16_UPD: |
4170 | case ARM::VLD4DUPd32_UPD: |
4171 | case ARM::VLD1LNd8: |
4172 | case ARM::VLD1LNd16: |
4173 | case ARM::VLD1LNd32: |
4174 | case ARM::VLD1LNd8_UPD: |
4175 | case ARM::VLD1LNd16_UPD: |
4176 | case ARM::VLD1LNd32_UPD: |
4177 | case ARM::VLD2LNd8: |
4178 | case ARM::VLD2LNd16: |
4179 | case ARM::VLD2LNd32: |
4180 | case ARM::VLD2LNq16: |
4181 | case ARM::VLD2LNq32: |
4182 | case ARM::VLD2LNd8_UPD: |
4183 | case ARM::VLD2LNd16_UPD: |
4184 | case ARM::VLD2LNd32_UPD: |
4185 | case ARM::VLD2LNq16_UPD: |
4186 | case ARM::VLD2LNq32_UPD: |
4187 | case ARM::VLD4LNd8: |
4188 | case ARM::VLD4LNd16: |
4189 | case ARM::VLD4LNd32: |
4190 | case ARM::VLD4LNq16: |
4191 | case ARM::VLD4LNq32: |
4192 | case ARM::VLD4LNd8_UPD: |
4193 | case ARM::VLD4LNd16_UPD: |
4194 | case ARM::VLD4LNd32_UPD: |
4195 | case ARM::VLD4LNq16_UPD: |
4196 | case ARM::VLD4LNq32_UPD: |
4197 | // If the address is not 64-bit aligned, the latencies of these |
4198 | // instructions increases by one. |
4199 | ++Adjust; |
4200 | break; |
4201 | } |
4202 | } |
4203 | return Adjust; |
4204 | } |
4205 | |
4206 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
4207 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4208 | unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { |
4209 | // No operand latency. The caller may fall back to getInstrLatency. |
4210 | if (!ItinData || ItinData->isEmpty()) |
4211 | return std::nullopt; |
4212 | |
4213 | const MachineOperand &DefMO = DefMI.getOperand(i: DefIdx); |
4214 | Register Reg = DefMO.getReg(); |
4215 | |
4216 | const MachineInstr *ResolvedDefMI = &DefMI; |
4217 | unsigned DefAdj = 0; |
4218 | if (DefMI.isBundle()) |
4219 | ResolvedDefMI = |
4220 | getBundledDefMI(TRI: &getRegisterInfo(), MI: &DefMI, Reg, DefIdx, Dist&: DefAdj); |
4221 | if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || |
4222 | ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { |
4223 | return 1; |
4224 | } |
4225 | |
4226 | const MachineInstr *ResolvedUseMI = &UseMI; |
4227 | unsigned UseAdj = 0; |
4228 | if (UseMI.isBundle()) { |
4229 | ResolvedUseMI = |
4230 | getBundledUseMI(TRI: &getRegisterInfo(), MI: UseMI, Reg, UseIdx, Dist&: UseAdj); |
4231 | if (!ResolvedUseMI) |
4232 | return std::nullopt; |
4233 | } |
4234 | |
4235 | return getOperandLatencyImpl( |
4236 | ItinData, DefMI: *ResolvedDefMI, DefIdx, DefMCID: ResolvedDefMI->getDesc(), DefAdj, DefMO, |
4237 | Reg, UseMI: *ResolvedUseMI, UseIdx, UseMCID: ResolvedUseMI->getDesc(), UseAdj); |
4238 | } |
4239 | |
4240 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl( |
4241 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4242 | unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, |
4243 | const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, |
4244 | unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { |
4245 | if (Reg == ARM::CPSR) { |
4246 | if (DefMI.getOpcode() == ARM::FMSTAT) { |
4247 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) |
4248 | return Subtarget.isLikeA9() ? 1 : 20; |
4249 | } |
4250 | |
4251 | // CPSR set and branch can be paired in the same cycle. |
4252 | if (UseMI.isBranch()) |
4253 | return 0; |
4254 | |
4255 | // Otherwise it takes the instruction latency (generally one). |
4256 | unsigned Latency = getInstrLatency(ItinData, MI: DefMI); |
4257 | |
4258 | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to |
4259 | // its uses. Instructions which are otherwise scheduled between them may |
4260 | // incur a code size penalty (not able to use the CPSR setting 16-bit |
4261 | // instructions). |
4262 | if (Latency > 0 && Subtarget.isThumb2()) { |
4263 | const MachineFunction *MF = DefMI.getParent()->getParent(); |
4264 | // FIXME: Use Function::hasOptSize(). |
4265 | if (MF->getFunction().hasFnAttribute(Kind: Attribute::OptimizeForSize)) |
4266 | --Latency; |
4267 | } |
4268 | return Latency; |
4269 | } |
4270 | |
4271 | if (DefMO.isImplicit() || UseMI.getOperand(i: UseIdx).isImplicit()) |
4272 | return std::nullopt; |
4273 | |
4274 | unsigned DefAlign = DefMI.hasOneMemOperand() |
4275 | ? (*DefMI.memoperands_begin())->getAlign().value() |
4276 | : 0; |
4277 | unsigned UseAlign = UseMI.hasOneMemOperand() |
4278 | ? (*UseMI.memoperands_begin())->getAlign().value() |
4279 | : 0; |
4280 | |
4281 | // Get the itinerary's latency if possible, and handle variable_ops. |
4282 | std::optional<unsigned> Latency = getOperandLatency( |
4283 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
4284 | // Unable to find operand latency. The caller may resort to getInstrLatency. |
4285 | if (!Latency) |
4286 | return std::nullopt; |
4287 | |
4288 | // Adjust for IT block position. |
4289 | int Adj = DefAdj + UseAdj; |
4290 | |
4291 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4292 | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); |
4293 | if (Adj >= 0 || (int)*Latency > -Adj) { |
4294 | return *Latency + Adj; |
4295 | } |
4296 | // Return the itinerary latency, which may be zero but not less than zero. |
4297 | return Latency; |
4298 | } |
4299 | |
4300 | std::optional<unsigned> |
4301 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
4302 | SDNode *DefNode, unsigned DefIdx, |
4303 | SDNode *UseNode, unsigned UseIdx) const { |
4304 | if (!DefNode->isMachineOpcode()) |
4305 | return 1; |
4306 | |
4307 | const MCInstrDesc &DefMCID = get(Opcode: DefNode->getMachineOpcode()); |
4308 | |
4309 | if (isZeroCost(Opcode: DefMCID.Opcode)) |
4310 | return 0; |
4311 | |
4312 | if (!ItinData || ItinData->isEmpty()) |
4313 | return DefMCID.mayLoad() ? 3 : 1; |
4314 | |
4315 | if (!UseNode->isMachineOpcode()) { |
4316 | std::optional<unsigned> Latency = |
4317 | ItinData->getOperandCycle(ItinClassIndx: DefMCID.getSchedClass(), OperandIdx: DefIdx); |
4318 | int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); |
4319 | int Threshold = 1 + Adj; |
4320 | return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj; |
4321 | } |
4322 | |
4323 | const MCInstrDesc &UseMCID = get(Opcode: UseNode->getMachineOpcode()); |
4324 | auto *DefMN = cast<MachineSDNode>(Val: DefNode); |
4325 | unsigned DefAlign = !DefMN->memoperands_empty() |
4326 | ? (*DefMN->memoperands_begin())->getAlign().value() |
4327 | : 0; |
4328 | auto *UseMN = cast<MachineSDNode>(Val: UseNode); |
4329 | unsigned UseAlign = !UseMN->memoperands_empty() |
4330 | ? (*UseMN->memoperands_begin())->getAlign().value() |
4331 | : 0; |
4332 | std::optional<unsigned> Latency = getOperandLatency( |
4333 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
4334 | if (!Latency) |
4335 | return std::nullopt; |
4336 | |
4337 | if (Latency > 1U && |
4338 | (Subtarget.isCortexA8() || Subtarget.isLikeA9() || |
4339 | Subtarget.isCortexA7())) { |
4340 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4341 | // variants are one cycle cheaper. |
4342 | switch (DefMCID.getOpcode()) { |
4343 | default: break; |
4344 | case ARM::LDRrs: |
4345 | case ARM::LDRBrs: { |
4346 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
4347 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4348 | if (ShImm == 0 || |
4349 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4350 | Latency = *Latency - 1; |
4351 | break; |
4352 | } |
4353 | case ARM::t2LDRs: |
4354 | case ARM::t2LDRBs: |
4355 | case ARM::t2LDRHs: |
4356 | case ARM::t2LDRSHs: { |
4357 | // Thumb2 mode: lsl only. |
4358 | unsigned ShAmt = DefNode->getConstantOperandVal(Num: 2); |
4359 | if (ShAmt == 0 || ShAmt == 2) |
4360 | Latency = *Latency - 1; |
4361 | break; |
4362 | } |
4363 | } |
4364 | } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) { |
4365 | // FIXME: Properly handle all of the latency adjustments for address |
4366 | // writeback. |
4367 | switch (DefMCID.getOpcode()) { |
4368 | default: break; |
4369 | case ARM::LDRrs: |
4370 | case ARM::LDRBrs: { |
4371 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
4372 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4373 | if (ShImm == 0 || |
4374 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4375 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4376 | Latency = *Latency - 2; |
4377 | else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
4378 | Latency = *Latency - 1; |
4379 | break; |
4380 | } |
4381 | case ARM::t2LDRs: |
4382 | case ARM::t2LDRBs: |
4383 | case ARM::t2LDRHs: |
4384 | case ARM::t2LDRSHs: |
4385 | // Thumb2 mode: lsl 0-3 only. |
4386 | Latency = *Latency - 2; |
4387 | break; |
4388 | } |
4389 | } |
4390 | |
4391 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) |
4392 | switch (DefMCID.getOpcode()) { |
4393 | default: break; |
4394 | case ARM::VLD1q8: |
4395 | case ARM::VLD1q16: |
4396 | case ARM::VLD1q32: |
4397 | case ARM::VLD1q64: |
4398 | case ARM::VLD1q8wb_register: |
4399 | case ARM::VLD1q16wb_register: |
4400 | case ARM::VLD1q32wb_register: |
4401 | case ARM::VLD1q64wb_register: |
4402 | case ARM::VLD1q8wb_fixed: |
4403 | case ARM::VLD1q16wb_fixed: |
4404 | case ARM::VLD1q32wb_fixed: |
4405 | case ARM::VLD1q64wb_fixed: |
4406 | case ARM::VLD2d8: |
4407 | case ARM::VLD2d16: |
4408 | case ARM::VLD2d32: |
4409 | case ARM::VLD2q8Pseudo: |
4410 | case ARM::VLD2q16Pseudo: |
4411 | case ARM::VLD2q32Pseudo: |
4412 | case ARM::VLD2d8wb_fixed: |
4413 | case ARM::VLD2d16wb_fixed: |
4414 | case ARM::VLD2d32wb_fixed: |
4415 | case ARM::VLD2q8PseudoWB_fixed: |
4416 | case ARM::VLD2q16PseudoWB_fixed: |
4417 | case ARM::VLD2q32PseudoWB_fixed: |
4418 | case ARM::VLD2d8wb_register: |
4419 | case ARM::VLD2d16wb_register: |
4420 | case ARM::VLD2d32wb_register: |
4421 | case ARM::VLD2q8PseudoWB_register: |
4422 | case ARM::VLD2q16PseudoWB_register: |
4423 | case ARM::VLD2q32PseudoWB_register: |
4424 | case ARM::VLD3d8Pseudo: |
4425 | case ARM::VLD3d16Pseudo: |
4426 | case ARM::VLD3d32Pseudo: |
4427 | case ARM::VLD1d8TPseudo: |
4428 | case ARM::VLD1d16TPseudo: |
4429 | case ARM::VLD1d32TPseudo: |
4430 | case ARM::VLD1d64TPseudo: |
4431 | case ARM::VLD1d64TPseudoWB_fixed: |
4432 | case ARM::VLD1d64TPseudoWB_register: |
4433 | case ARM::VLD3d8Pseudo_UPD: |
4434 | case ARM::VLD3d16Pseudo_UPD: |
4435 | case ARM::VLD3d32Pseudo_UPD: |
4436 | case ARM::VLD3q8Pseudo_UPD: |
4437 | case ARM::VLD3q16Pseudo_UPD: |
4438 | case ARM::VLD3q32Pseudo_UPD: |
4439 | case ARM::VLD3q8oddPseudo: |
4440 | case ARM::VLD3q16oddPseudo: |
4441 | case ARM::VLD3q32oddPseudo: |
4442 | case ARM::VLD3q8oddPseudo_UPD: |
4443 | case ARM::VLD3q16oddPseudo_UPD: |
4444 | case ARM::VLD3q32oddPseudo_UPD: |
4445 | case ARM::VLD4d8Pseudo: |
4446 | case ARM::VLD4d16Pseudo: |
4447 | case ARM::VLD4d32Pseudo: |
4448 | case ARM::VLD1d8QPseudo: |
4449 | case ARM::VLD1d16QPseudo: |
4450 | case ARM::VLD1d32QPseudo: |
4451 | case ARM::VLD1d64QPseudo: |
4452 | case ARM::VLD1d64QPseudoWB_fixed: |
4453 | case ARM::VLD1d64QPseudoWB_register: |
4454 | case ARM::VLD1q8HighQPseudo: |
4455 | case ARM::VLD1q8LowQPseudo_UPD: |
4456 | case ARM::VLD1q8HighTPseudo: |
4457 | case ARM::VLD1q8LowTPseudo_UPD: |
4458 | case ARM::VLD1q16HighQPseudo: |
4459 | case ARM::VLD1q16LowQPseudo_UPD: |
4460 | case ARM::VLD1q16HighTPseudo: |
4461 | case ARM::VLD1q16LowTPseudo_UPD: |
4462 | case ARM::VLD1q32HighQPseudo: |
4463 | case ARM::VLD1q32LowQPseudo_UPD: |
4464 | case ARM::VLD1q32HighTPseudo: |
4465 | case ARM::VLD1q32LowTPseudo_UPD: |
4466 | case ARM::VLD1q64HighQPseudo: |
4467 | case ARM::VLD1q64LowQPseudo_UPD: |
4468 | case ARM::VLD1q64HighTPseudo: |
4469 | case ARM::VLD1q64LowTPseudo_UPD: |
4470 | case ARM::VLD4d8Pseudo_UPD: |
4471 | case ARM::VLD4d16Pseudo_UPD: |
4472 | case ARM::VLD4d32Pseudo_UPD: |
4473 | case ARM::VLD4q8Pseudo_UPD: |
4474 | case ARM::VLD4q16Pseudo_UPD: |
4475 | case ARM::VLD4q32Pseudo_UPD: |
4476 | case ARM::VLD4q8oddPseudo: |
4477 | case ARM::VLD4q16oddPseudo: |
4478 | case ARM::VLD4q32oddPseudo: |
4479 | case ARM::VLD4q8oddPseudo_UPD: |
4480 | case ARM::VLD4q16oddPseudo_UPD: |
4481 | case ARM::VLD4q32oddPseudo_UPD: |
4482 | case ARM::VLD1DUPq8: |
4483 | case ARM::VLD1DUPq16: |
4484 | case ARM::VLD1DUPq32: |
4485 | case ARM::VLD1DUPq8wb_fixed: |
4486 | case ARM::VLD1DUPq16wb_fixed: |
4487 | case ARM::VLD1DUPq32wb_fixed: |
4488 | case ARM::VLD1DUPq8wb_register: |
4489 | case ARM::VLD1DUPq16wb_register: |
4490 | case ARM::VLD1DUPq32wb_register: |
4491 | case ARM::VLD2DUPd8: |
4492 | case ARM::VLD2DUPd16: |
4493 | case ARM::VLD2DUPd32: |
4494 | case ARM::VLD2DUPd8wb_fixed: |
4495 | case ARM::VLD2DUPd16wb_fixed: |
4496 | case ARM::VLD2DUPd32wb_fixed: |
4497 | case ARM::VLD2DUPd8wb_register: |
4498 | case ARM::VLD2DUPd16wb_register: |
4499 | case ARM::VLD2DUPd32wb_register: |
4500 | case ARM::VLD2DUPq8EvenPseudo: |
4501 | case ARM::VLD2DUPq8OddPseudo: |
4502 | case ARM::VLD2DUPq16EvenPseudo: |
4503 | case ARM::VLD2DUPq16OddPseudo: |
4504 | case ARM::VLD2DUPq32EvenPseudo: |
4505 | case ARM::VLD2DUPq32OddPseudo: |
4506 | case ARM::VLD3DUPq8EvenPseudo: |
4507 | case ARM::VLD3DUPq8OddPseudo: |
4508 | case ARM::VLD3DUPq16EvenPseudo: |
4509 | case ARM::VLD3DUPq16OddPseudo: |
4510 | case ARM::VLD3DUPq32EvenPseudo: |
4511 | case ARM::VLD3DUPq32OddPseudo: |
4512 | case ARM::VLD4DUPd8Pseudo: |
4513 | case ARM::VLD4DUPd16Pseudo: |
4514 | case ARM::VLD4DUPd32Pseudo: |
4515 | case ARM::VLD4DUPd8Pseudo_UPD: |
4516 | case ARM::VLD4DUPd16Pseudo_UPD: |
4517 | case ARM::VLD4DUPd32Pseudo_UPD: |
4518 | case ARM::VLD4DUPq8EvenPseudo: |
4519 | case ARM::VLD4DUPq8OddPseudo: |
4520 | case ARM::VLD4DUPq16EvenPseudo: |
4521 | case ARM::VLD4DUPq16OddPseudo: |
4522 | case ARM::VLD4DUPq32EvenPseudo: |
4523 | case ARM::VLD4DUPq32OddPseudo: |
4524 | case ARM::VLD1LNq8Pseudo: |
4525 | case ARM::VLD1LNq16Pseudo: |
4526 | case ARM::VLD1LNq32Pseudo: |
4527 | case ARM::VLD1LNq8Pseudo_UPD: |
4528 | case ARM::VLD1LNq16Pseudo_UPD: |
4529 | case ARM::VLD1LNq32Pseudo_UPD: |
4530 | case ARM::VLD2LNd8Pseudo: |
4531 | case ARM::VLD2LNd16Pseudo: |
4532 | case ARM::VLD2LNd32Pseudo: |
4533 | case ARM::VLD2LNq16Pseudo: |
4534 | case ARM::VLD2LNq32Pseudo: |
4535 | case ARM::VLD2LNd8Pseudo_UPD: |
4536 | case ARM::VLD2LNd16Pseudo_UPD: |
4537 | case ARM::VLD2LNd32Pseudo_UPD: |
4538 | case ARM::VLD2LNq16Pseudo_UPD: |
4539 | case ARM::VLD2LNq32Pseudo_UPD: |
4540 | case ARM::VLD4LNd8Pseudo: |
4541 | case ARM::VLD4LNd16Pseudo: |
4542 | case ARM::VLD4LNd32Pseudo: |
4543 | case ARM::VLD4LNq16Pseudo: |
4544 | case ARM::VLD4LNq32Pseudo: |
4545 | case ARM::VLD4LNd8Pseudo_UPD: |
4546 | case ARM::VLD4LNd16Pseudo_UPD: |
4547 | case ARM::VLD4LNd32Pseudo_UPD: |
4548 | case ARM::VLD4LNq16Pseudo_UPD: |
4549 | case ARM::VLD4LNq32Pseudo_UPD: |
4550 | // If the address is not 64-bit aligned, the latencies of these |
4551 | // instructions increases by one. |
4552 | Latency = *Latency + 1; |
4553 | break; |
4554 | } |
4555 | |
4556 | return Latency; |
4557 | } |
4558 | |
4559 | unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { |
4560 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4561 | MI.isImplicitDef()) |
4562 | return 0; |
4563 | |
4564 | if (MI.isBundle()) |
4565 | return 0; |
4566 | |
4567 | const MCInstrDesc &MCID = MI.getDesc(); |
4568 | |
4569 | if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) && |
4570 | !Subtarget.cheapPredicableCPSRDef())) { |
4571 | // When predicated, CPSR is an additional source operand for CPSR updating |
4572 | // instructions, this apparently increases their latencies. |
4573 | return 1; |
4574 | } |
4575 | return 0; |
4576 | } |
4577 | |
4578 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4579 | const MachineInstr &MI, |
4580 | unsigned *PredCost) const { |
4581 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4582 | MI.isImplicitDef()) |
4583 | return 1; |
4584 | |
4585 | // An instruction scheduler typically runs on unbundled instructions, however |
4586 | // other passes may query the latency of a bundled instruction. |
4587 | if (MI.isBundle()) { |
4588 | unsigned Latency = 0; |
4589 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
4590 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4591 | while (++I != E && I->isInsideBundle()) { |
4592 | if (I->getOpcode() != ARM::t2IT) |
4593 | Latency += getInstrLatency(ItinData, MI: *I, PredCost); |
4594 | } |
4595 | return Latency; |
4596 | } |
4597 | |
4598 | const MCInstrDesc &MCID = MI.getDesc(); |
4599 | if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) && |
4600 | !Subtarget.cheapPredicableCPSRDef()))) { |
4601 | // When predicated, CPSR is an additional source operand for CPSR updating |
4602 | // instructions, this apparently increases their latencies. |
4603 | *PredCost = 1; |
4604 | } |
4605 | // Be sure to call getStageLatency for an empty itinerary in case it has a |
4606 | // valid MinLatency property. |
4607 | if (!ItinData) |
4608 | return MI.mayLoad() ? 3 : 1; |
4609 | |
4610 | unsigned Class = MCID.getSchedClass(); |
4611 | |
4612 | // For instructions with variable uops, use uops as latency. |
4613 | if (!ItinData->isEmpty() && ItinData->getNumMicroOps(ItinClassIndx: Class) < 0) |
4614 | return getNumMicroOps(ItinData, MI); |
4615 | |
4616 | // For the common case, fall back on the itinerary's latency. |
4617 | unsigned Latency = ItinData->getStageLatency(ItinClassIndx: Class); |
4618 | |
4619 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4620 | unsigned DefAlign = |
4621 | MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0; |
4622 | int Adj = adjustDefLatency(Subtarget, DefMI: MI, DefMCID: MCID, DefAlign); |
4623 | if (Adj >= 0 || (int)Latency > -Adj) { |
4624 | return Latency + Adj; |
4625 | } |
4626 | return Latency; |
4627 | } |
4628 | |
4629 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4630 | SDNode *Node) const { |
4631 | if (!Node->isMachineOpcode()) |
4632 | return 1; |
4633 | |
4634 | if (!ItinData || ItinData->isEmpty()) |
4635 | return 1; |
4636 | |
4637 | unsigned Opcode = Node->getMachineOpcode(); |
4638 | switch (Opcode) { |
4639 | default: |
4640 | return ItinData->getStageLatency(ItinClassIndx: get(Opcode).getSchedClass()); |
4641 | case ARM::VLDMQIA: |
4642 | case ARM::VSTMQIA: |
4643 | return 2; |
4644 | } |
4645 | } |
4646 | |
4647 | bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, |
4648 | const MachineRegisterInfo *MRI, |
4649 | const MachineInstr &DefMI, |
4650 | unsigned DefIdx, |
4651 | const MachineInstr &UseMI, |
4652 | unsigned UseIdx) const { |
4653 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4654 | unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; |
4655 | if (Subtarget.nonpipelinedVFP() && |
4656 | (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) |
4657 | return true; |
4658 | |
4659 | // Hoist VFP / NEON instructions with 4 or higher latency. |
4660 | unsigned Latency = |
4661 | SchedModel.computeOperandLatency(DefMI: &DefMI, DefOperIdx: DefIdx, UseMI: &UseMI, UseOperIdx: UseIdx); |
4662 | if (Latency <= 3) |
4663 | return false; |
4664 | return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || |
4665 | UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; |
4666 | } |
4667 | |
4668 | bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, |
4669 | const MachineInstr &DefMI, |
4670 | unsigned DefIdx) const { |
4671 | const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); |
4672 | if (!ItinData || ItinData->isEmpty()) |
4673 | return false; |
4674 | |
4675 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4676 | if (DDomain == ARMII::DomainGeneral) { |
4677 | unsigned DefClass = DefMI.getDesc().getSchedClass(); |
4678 | std::optional<unsigned> DefCycle = |
4679 | ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
4680 | return DefCycle && DefCycle <= 2U; |
4681 | } |
4682 | return false; |
4683 | } |
4684 | |
4685 | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, |
4686 | StringRef &ErrInfo) const { |
4687 | if (convertAddSubFlagsOpcode(OldOpc: MI.getOpcode())) { |
4688 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG" ; |
4689 | return false; |
4690 | } |
4691 | if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { |
4692 | // Make sure we don't generate a lo-lo mov that isn't supported. |
4693 | if (!ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) && |
4694 | !ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 1).getReg())) { |
4695 | ErrInfo = "Non-flag-setting Thumb1 mov is v6-only" ; |
4696 | return false; |
4697 | } |
4698 | } |
4699 | if (MI.getOpcode() == ARM::tPUSH || |
4700 | MI.getOpcode() == ARM::tPOP || |
4701 | MI.getOpcode() == ARM::tPOP_RET) { |
4702 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands(), N: 2)) { |
4703 | if (MO.isImplicit() || !MO.isReg()) |
4704 | continue; |
4705 | Register Reg = MO.getReg(); |
4706 | if (Reg < ARM::R0 || Reg > ARM::R7) { |
4707 | if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && |
4708 | !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { |
4709 | ErrInfo = "Unsupported register in Thumb1 push/pop" ; |
4710 | return false; |
4711 | } |
4712 | } |
4713 | } |
4714 | } |
4715 | if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) { |
4716 | assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm()); |
4717 | if ((MI.getOperand(i: 4).getImm() != 2 && MI.getOperand(i: 4).getImm() != 3) || |
4718 | MI.getOperand(i: 4).getImm() != MI.getOperand(i: 5).getImm() + 2) { |
4719 | ErrInfo = "Incorrect array index for MVE_VMOV_q_rr" ; |
4720 | return false; |
4721 | } |
4722 | } |
4723 | |
4724 | // Check the address model by taking the first Imm operand and checking it is |
4725 | // legal for that addressing mode. |
4726 | ARMII::AddrMode AddrMode = |
4727 | (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask); |
4728 | switch (AddrMode) { |
4729 | default: |
4730 | break; |
4731 | case ARMII::AddrModeT2_i7: |
4732 | case ARMII::AddrModeT2_i7s2: |
4733 | case ARMII::AddrModeT2_i7s4: |
4734 | case ARMII::AddrModeT2_i8: |
4735 | case ARMII::AddrModeT2_i8pos: |
4736 | case ARMII::AddrModeT2_i8neg: |
4737 | case ARMII::AddrModeT2_i8s4: |
4738 | case ARMII::AddrModeT2_i12: { |
4739 | uint32_t Imm = 0; |
4740 | for (auto Op : MI.operands()) { |
4741 | if (Op.isImm()) { |
4742 | Imm = Op.getImm(); |
4743 | break; |
4744 | } |
4745 | } |
4746 | if (!isLegalAddressImm(Opcode: MI.getOpcode(), Imm, TII: this)) { |
4747 | ErrInfo = "Incorrect AddrMode Imm for instruction" ; |
4748 | return false; |
4749 | } |
4750 | break; |
4751 | } |
4752 | } |
4753 | return true; |
4754 | } |
4755 | |
4756 | void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
4757 | unsigned LoadImmOpc, |
4758 | unsigned LoadOpc) const { |
4759 | assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && |
4760 | "ROPI/RWPI not currently supported with stack guard" ); |
4761 | |
4762 | MachineBasicBlock &MBB = *MI->getParent(); |
4763 | DebugLoc DL = MI->getDebugLoc(); |
4764 | Register Reg = MI->getOperand(i: 0).getReg(); |
4765 | MachineInstrBuilder MIB; |
4766 | unsigned int Offset = 0; |
4767 | |
4768 | if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) { |
4769 | assert(!Subtarget.isReadTPSoft() && |
4770 | "TLS stack protector requires hardware TLS register" ); |
4771 | |
4772 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
4773 | .addImm(Val: 15) |
4774 | .addImm(Val: 0) |
4775 | .addImm(Val: 13) |
4776 | .addImm(Val: 0) |
4777 | .addImm(Val: 3) |
4778 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4779 | |
4780 | Module &M = *MBB.getParent()->getFunction().getParent(); |
4781 | Offset = M.getStackProtectorGuardOffset(); |
4782 | if (Offset & ~0xfffU) { |
4783 | // The offset won't fit in the LDR's 12-bit immediate field, so emit an |
4784 | // extra ADD to cover the delta. This gives us a guaranteed 8 additional |
4785 | // bits, resulting in a range of 0 to +1 MiB for the guard offset. |
4786 | unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri; |
4787 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: AddOpc), DestReg: Reg) |
4788 | .addReg(RegNo: Reg, flags: RegState::Kill) |
4789 | .addImm(Val: Offset & ~0xfffU) |
4790 | .add(MOs: predOps(Pred: ARMCC::AL)) |
4791 | .addReg(RegNo: 0); |
4792 | Offset &= 0xfffU; |
4793 | } |
4794 | } else { |
4795 | const GlobalValue *GV = |
4796 | cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
4797 | bool IsIndirect = Subtarget.isGVIndirectSymbol(GV); |
4798 | |
4799 | unsigned TargetFlags = ARMII::MO_NO_FLAG; |
4800 | if (Subtarget.isTargetMachO()) { |
4801 | TargetFlags |= ARMII::MO_NONLAZY; |
4802 | } else if (Subtarget.isTargetCOFF()) { |
4803 | if (GV->hasDLLImportStorageClass()) |
4804 | TargetFlags |= ARMII::MO_DLLIMPORT; |
4805 | else if (IsIndirect) |
4806 | TargetFlags |= ARMII::MO_COFFSTUB; |
4807 | } else if (IsIndirect) { |
4808 | TargetFlags |= ARMII::MO_GOT; |
4809 | } |
4810 | |
4811 | if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only |
4812 | Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register |
4813 | auto APSREncoding = |
4814 | ARMSysReg::lookupMClassSysRegByName(Name: "apsr_nzcvq" )->Encoding; |
4815 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MRS_M), DestReg: CPSRSaveReg) |
4816 | .addImm(Val: APSREncoding) |
4817 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4818 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
4819 | .addGlobalAddress(GV, Offset: 0, TargetFlags); |
4820 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MSR_M)) |
4821 | .addImm(Val: APSREncoding) |
4822 | .addReg(RegNo: CPSRSaveReg, flags: RegState::Kill) |
4823 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4824 | } else { |
4825 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
4826 | .addGlobalAddress(GV, Offset: 0, TargetFlags); |
4827 | } |
4828 | |
4829 | if (IsIndirect) { |
4830 | MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg); |
4831 | MIB.addReg(RegNo: Reg, flags: RegState::Kill).addImm(Val: 0); |
4832 | auto Flags = MachineMemOperand::MOLoad | |
4833 | MachineMemOperand::MODereferenceable | |
4834 | MachineMemOperand::MOInvariant; |
4835 | MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( |
4836 | PtrInfo: MachinePointerInfo::getGOT(MF&: *MBB.getParent()), F: Flags, Size: 4, BaseAlignment: Align(4)); |
4837 | MIB.addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
4838 | } |
4839 | } |
4840 | |
4841 | MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg); |
4842 | MIB.addReg(RegNo: Reg, flags: RegState::Kill) |
4843 | .addImm(Val: Offset) |
4844 | .cloneMemRefs(OtherMI: *MI) |
4845 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4846 | } |
4847 | |
4848 | bool |
4849 | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
4850 | unsigned &AddSubOpc, |
4851 | bool &NegAcc, bool &HasLane) const { |
4852 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Val: Opcode); |
4853 | if (I == MLxEntryMap.end()) |
4854 | return false; |
4855 | |
4856 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; |
4857 | MulOpc = Entry.MulOpc; |
4858 | AddSubOpc = Entry.AddSubOpc; |
4859 | NegAcc = Entry.NegAcc; |
4860 | HasLane = Entry.HasLane; |
4861 | return true; |
4862 | } |
4863 | |
4864 | //===----------------------------------------------------------------------===// |
4865 | // Execution domains. |
4866 | //===----------------------------------------------------------------------===// |
4867 | // |
4868 | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, |
4869 | // and some can go down both. The vmov instructions go down the VFP pipeline, |
4870 | // but they can be changed to vorr equivalents that are executed by the NEON |
4871 | // pipeline. |
4872 | // |
4873 | // We use the following execution domain numbering: |
4874 | // |
4875 | enum ARMExeDomain { |
4876 | ExeGeneric = 0, |
4877 | ExeVFP = 1, |
4878 | ExeNEON = 2 |
4879 | }; |
4880 | |
4881 | // |
4882 | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h |
4883 | // |
4884 | std::pair<uint16_t, uint16_t> |
4885 | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { |
4886 | // If we don't have access to NEON instructions then we won't be able |
4887 | // to swizzle anything to the NEON domain. Check to make sure. |
4888 | if (Subtarget.hasNEON()) { |
4889 | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON |
4890 | // if they are not predicated. |
4891 | if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) |
4892 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
4893 | |
4894 | // CortexA9 is particularly picky about mixing the two and wants these |
4895 | // converted. |
4896 | if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && |
4897 | (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || |
4898 | MI.getOpcode() == ARM::VMOVS)) |
4899 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
4900 | } |
4901 | // No other instructions can be swizzled, so just determine their domain. |
4902 | unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; |
4903 | |
4904 | if (Domain & ARMII::DomainNEON) |
4905 | return std::make_pair(x: ExeNEON, y: 0); |
4906 | |
4907 | // Certain instructions can go either way on Cortex-A8. |
4908 | // Treat them as NEON instructions. |
4909 | if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) |
4910 | return std::make_pair(x: ExeNEON, y: 0); |
4911 | |
4912 | if (Domain & ARMII::DomainVFP) |
4913 | return std::make_pair(x: ExeVFP, y: 0); |
4914 | |
4915 | return std::make_pair(x: ExeGeneric, y: 0); |
4916 | } |
4917 | |
4918 | static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, |
4919 | unsigned SReg, unsigned &Lane) { |
4920 | MCRegister DReg = |
4921 | TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
4922 | Lane = 0; |
4923 | |
4924 | if (DReg) |
4925 | return DReg; |
4926 | |
4927 | Lane = 1; |
4928 | DReg = TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_1, RC: &ARM::DPRRegClass); |
4929 | |
4930 | assert(DReg && "S-register with no D super-register?" ); |
4931 | return DReg; |
4932 | } |
4933 | |
4934 | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
4935 | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
4936 | /// zero if no register needs to be defined as implicit-use. |
4937 | /// |
4938 | /// If the function cannot determine if an SPR should be marked implicit use or |
4939 | /// not, it returns false. |
4940 | /// |
4941 | /// This function handles cases where an instruction is being modified from taking |
4942 | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
4943 | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
4944 | /// lane of the DPR). |
4945 | /// |
4946 | /// If the other SPR is defined, an implicit-use of it should be added. Else, |
4947 | /// (including the case where the DPR itself is defined), it should not. |
4948 | /// |
4949 | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
4950 | MachineInstr &MI, MCRegister DReg, |
4951 | unsigned Lane, |
4952 | MCRegister &ImplicitSReg) { |
4953 | // If the DPR is defined or used already, the other SPR lane will be chained |
4954 | // correctly, so there is nothing to be done. |
4955 | if (MI.definesRegister(Reg: DReg, TRI) || MI.readsRegister(Reg: DReg, TRI)) { |
4956 | ImplicitSReg = MCRegister(); |
4957 | return true; |
4958 | } |
4959 | |
4960 | // Otherwise we need to go searching to see if the SPR is set explicitly. |
4961 | ImplicitSReg = TRI->getSubReg(Reg: DReg, |
4962 | Idx: (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); |
4963 | MachineBasicBlock::LivenessQueryResult LQR = |
4964 | MI.getParent()->computeRegisterLiveness(TRI, Reg: ImplicitSReg, Before: MI); |
4965 | |
4966 | if (LQR == MachineBasicBlock::LQR_Live) |
4967 | return true; |
4968 | else if (LQR == MachineBasicBlock::LQR_Unknown) |
4969 | return false; |
4970 | |
4971 | // If the register is known not to be live, there is no need to add an |
4972 | // implicit-use. |
4973 | ImplicitSReg = MCRegister(); |
4974 | return true; |
4975 | } |
4976 | |
4977 | void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, |
4978 | unsigned Domain) const { |
4979 | unsigned DstReg, SrcReg; |
4980 | MCRegister DReg; |
4981 | unsigned Lane; |
4982 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
4983 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
4984 | switch (MI.getOpcode()) { |
4985 | default: |
4986 | llvm_unreachable("cannot handle opcode!" ); |
4987 | break; |
4988 | case ARM::VMOVD: |
4989 | if (Domain != ExeNEON) |
4990 | break; |
4991 | |
4992 | // Zap the predicate operands. |
4993 | assert(!isPredicated(MI) && "Cannot predicate a VORRd" ); |
4994 | |
4995 | // Make sure we've got NEON instructions. |
4996 | assert(Subtarget.hasNEON() && "VORRd requires NEON" ); |
4997 | |
4998 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) |
4999 | DstReg = MI.getOperand(i: 0).getReg(); |
5000 | SrcReg = MI.getOperand(i: 1).getReg(); |
5001 | |
5002 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5003 | MI.removeOperand(OpNo: i - 1); |
5004 | |
5005 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) |
5006 | MI.setDesc(get(Opcode: ARM::VORRd)); |
5007 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
5008 | .addReg(RegNo: SrcReg) |
5009 | .addReg(RegNo: SrcReg) |
5010 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5011 | break; |
5012 | case ARM::VMOVRS: |
5013 | if (Domain != ExeNEON) |
5014 | break; |
5015 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN" ); |
5016 | |
5017 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) |
5018 | DstReg = MI.getOperand(i: 0).getReg(); |
5019 | SrcReg = MI.getOperand(i: 1).getReg(); |
5020 | |
5021 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5022 | MI.removeOperand(OpNo: i - 1); |
5023 | |
5024 | DReg = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane); |
5025 | |
5026 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) |
5027 | // Note that DSrc has been widened and the other lane may be undef, which |
5028 | // contaminates the entire register. |
5029 | MI.setDesc(get(Opcode: ARM::VGETLNi32)); |
5030 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
5031 | .addReg(RegNo: DReg, flags: RegState::Undef) |
5032 | .addImm(Val: Lane) |
5033 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5034 | |
5035 | // The old source should be an implicit use, otherwise we might think it |
5036 | // was dead before here. |
5037 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5038 | break; |
5039 | case ARM::VMOVSR: { |
5040 | if (Domain != ExeNEON) |
5041 | break; |
5042 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN" ); |
5043 | |
5044 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) |
5045 | DstReg = MI.getOperand(i: 0).getReg(); |
5046 | SrcReg = MI.getOperand(i: 1).getReg(); |
5047 | |
5048 | DReg = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane); |
5049 | |
5050 | MCRegister ImplicitSReg; |
5051 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) |
5052 | break; |
5053 | |
5054 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5055 | MI.removeOperand(OpNo: i - 1); |
5056 | |
5057 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) |
5058 | // Again DDst may be undefined at the beginning of this instruction. |
5059 | MI.setDesc(get(Opcode: ARM::VSETLNi32)); |
5060 | MIB.addReg(RegNo: DReg, flags: RegState::Define) |
5061 | .addReg(RegNo: DReg, flags: getUndefRegState(B: !MI.readsRegister(Reg: DReg, TRI))) |
5062 | .addReg(RegNo: SrcReg) |
5063 | .addImm(Val: Lane) |
5064 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5065 | |
5066 | // The narrower destination must be marked as set to keep previous chains |
5067 | // in place. |
5068 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
5069 | if (ImplicitSReg) |
5070 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5071 | break; |
5072 | } |
5073 | case ARM::VMOVS: { |
5074 | if (Domain != ExeNEON) |
5075 | break; |
5076 | |
5077 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) |
5078 | DstReg = MI.getOperand(i: 0).getReg(); |
5079 | SrcReg = MI.getOperand(i: 1).getReg(); |
5080 | |
5081 | unsigned DstLane = 0, SrcLane = 0; |
5082 | MCRegister DDst, DSrc; |
5083 | DDst = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane&: DstLane); |
5084 | DSrc = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane&: SrcLane); |
5085 | |
5086 | MCRegister ImplicitSReg; |
5087 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg: DSrc, Lane: SrcLane, ImplicitSReg)) |
5088 | break; |
5089 | |
5090 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5091 | MI.removeOperand(OpNo: i - 1); |
5092 | |
5093 | if (DSrc == DDst) { |
5094 | // Destination can be: |
5095 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) |
5096 | MI.setDesc(get(Opcode: ARM::VDUPLN32d)); |
5097 | MIB.addReg(RegNo: DDst, flags: RegState::Define) |
5098 | .addReg(RegNo: DDst, flags: getUndefRegState(B: !MI.readsRegister(Reg: DDst, TRI))) |
5099 | .addImm(Val: SrcLane) |
5100 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5101 | |
5102 | // Neither the source or the destination are naturally represented any |
5103 | // more, so add them in manually. |
5104 | MIB.addReg(RegNo: DstReg, flags: RegState::Implicit | RegState::Define); |
5105 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5106 | if (ImplicitSReg) |
5107 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5108 | break; |
5109 | } |
5110 | |
5111 | // In general there's no single instruction that can perform an S <-> S |
5112 | // move in NEON space, but a pair of VEXT instructions *can* do the |
5113 | // job. It turns out that the VEXTs needed will only use DSrc once, with |
5114 | // the position based purely on the combination of lane-0 and lane-1 |
5115 | // involved. For example |
5116 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 |
5117 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 |
5118 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 |
5119 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 |
5120 | // |
5121 | // Pattern of the MachineInstrs is: |
5122 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) |
5123 | MachineInstrBuilder NewMIB; |
5124 | NewMIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::VEXTd32), |
5125 | DestReg: DDst); |
5126 | |
5127 | // On the first instruction, both DSrc and DDst may be undef if present. |
5128 | // Specifically when the original instruction didn't have them as an |
5129 | // <imp-use>. |
5130 | MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; |
5131 | bool CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
5132 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
5133 | |
5134 | CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; |
5135 | CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
5136 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
5137 | .addImm(Val: 1) |
5138 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5139 | |
5140 | if (SrcLane == DstLane) |
5141 | NewMIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5142 | |
5143 | MI.setDesc(get(Opcode: ARM::VEXTd32)); |
5144 | MIB.addReg(RegNo: DDst, flags: RegState::Define); |
5145 | |
5146 | // On the second instruction, DDst has definitely been defined above, so |
5147 | // it is not undef. DSrc, if present, can be undef as above. |
5148 | CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; |
5149 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
5150 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
5151 | |
5152 | CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; |
5153 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
5154 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
5155 | .addImm(Val: 1) |
5156 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5157 | |
5158 | if (SrcLane != DstLane) |
5159 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5160 | |
5161 | // As before, the original destination is no longer represented, add it |
5162 | // implicitly. |
5163 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
5164 | if (ImplicitSReg != 0) |
5165 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5166 | break; |
5167 | } |
5168 | } |
5169 | } |
5170 | |
5171 | //===----------------------------------------------------------------------===// |
5172 | // Partial register updates |
5173 | //===----------------------------------------------------------------------===// |
5174 | // |
5175 | // Swift renames NEON registers with 64-bit granularity. That means any |
5176 | // instruction writing an S-reg implicitly reads the containing D-reg. The |
5177 | // problem is mostly avoided by translating f32 operations to v2f32 operations |
5178 | // on D-registers, but f32 loads are still a problem. |
5179 | // |
5180 | // These instructions can load an f32 into a NEON register: |
5181 | // |
5182 | // VLDRS - Only writes S, partial D update. |
5183 | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. |
5184 | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. |
5185 | // |
5186 | // FCONSTD can be used as a dependency-breaking instruction. |
5187 | unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( |
5188 | const MachineInstr &MI, unsigned OpNum, |
5189 | const TargetRegisterInfo *TRI) const { |
5190 | auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); |
5191 | if (!PartialUpdateClearance) |
5192 | return 0; |
5193 | |
5194 | assert(TRI && "Need TRI instance" ); |
5195 | |
5196 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
5197 | if (MO.readsReg()) |
5198 | return 0; |
5199 | Register Reg = MO.getReg(); |
5200 | int UseOp = -1; |
5201 | |
5202 | switch (MI.getOpcode()) { |
5203 | // Normal instructions writing only an S-register. |
5204 | case ARM::VLDRS: |
5205 | case ARM::FCONSTS: |
5206 | case ARM::VMOVSR: |
5207 | case ARM::VMOVv8i8: |
5208 | case ARM::VMOVv4i16: |
5209 | case ARM::VMOVv2i32: |
5210 | case ARM::VMOVv2f32: |
5211 | case ARM::VMOVv1i64: |
5212 | UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
5213 | break; |
5214 | |
5215 | // Explicitly reads the dependency. |
5216 | case ARM::VLD1LNd32: |
5217 | UseOp = 3; |
5218 | break; |
5219 | default: |
5220 | return 0; |
5221 | } |
5222 | |
5223 | // If this instruction actually reads a value from Reg, there is no unwanted |
5224 | // dependency. |
5225 | if (UseOp != -1 && MI.getOperand(i: UseOp).readsReg()) |
5226 | return 0; |
5227 | |
5228 | // We must be able to clobber the whole D-reg. |
5229 | if (Reg.isVirtual()) { |
5230 | // Virtual register must be a def undef foo:ssub_0 operand. |
5231 | if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) |
5232 | return 0; |
5233 | } else if (ARM::SPRRegClass.contains(Reg)) { |
5234 | // Physical register: MI must define the full D-reg. |
5235 | MCRegister DReg = |
5236 | TRI->getMatchingSuperReg(Reg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
5237 | if (!DReg || !MI.definesRegister(Reg: DReg, TRI)) |
5238 | return 0; |
5239 | } |
5240 | |
5241 | // MI has an unwanted D-register dependency. |
5242 | // Avoid defs in the previous N instructrions. |
5243 | return PartialUpdateClearance; |
5244 | } |
5245 | |
5246 | // Break a partial register dependency after getPartialRegUpdateClearance |
5247 | // returned non-zero. |
5248 | void ARMBaseInstrInfo::breakPartialRegDependency( |
5249 | MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { |
5250 | assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def" ); |
5251 | assert(TRI && "Need TRI instance" ); |
5252 | |
5253 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
5254 | Register Reg = MO.getReg(); |
5255 | assert(Reg.isPhysical() && "Can't break virtual register dependencies." ); |
5256 | unsigned DReg = Reg; |
5257 | |
5258 | // If MI defines an S-reg, find the corresponding D super-register. |
5259 | if (ARM::SPRRegClass.contains(Reg)) { |
5260 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; |
5261 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken" ); |
5262 | } |
5263 | |
5264 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps" ); |
5265 | assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg" ); |
5266 | |
5267 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines |
5268 | // the full D-register by loading the same value to both lanes. The |
5269 | // instruction is micro-coded with 2 uops, so don't do this until we can |
5270 | // properly schedule micro-coded instructions. The dispatcher stalls cause |
5271 | // too big regressions. |
5272 | |
5273 | // Insert the dependency-breaking FCONSTD before MI. |
5274 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. |
5275 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::FCONSTD), DestReg: DReg) |
5276 | .addImm(Val: 96) |
5277 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5278 | MI.addRegisterKilled(IncomingReg: DReg, RegInfo: TRI, AddIfNotFound: true); |
5279 | } |
5280 | |
5281 | bool ARMBaseInstrInfo::hasNOP() const { |
5282 | return Subtarget.hasFeature(Feature: ARM::HasV6KOps); |
5283 | } |
5284 | |
5285 | bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |
5286 | if (MI->getNumOperands() < 4) |
5287 | return true; |
5288 | unsigned ShOpVal = MI->getOperand(i: 3).getImm(); |
5289 | unsigned ShImm = ARM_AM::getSORegOffset(Op: ShOpVal); |
5290 | // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. |
5291 | if ((ShImm == 1 && ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsr) || |
5292 | ((ShImm == 1 || ShImm == 2) && |
5293 | ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsl)) |
5294 | return true; |
5295 | |
5296 | return false; |
5297 | } |
5298 | |
5299 | bool ARMBaseInstrInfo::getRegSequenceLikeInputs( |
5300 | const MachineInstr &MI, unsigned DefIdx, |
5301 | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { |
5302 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5303 | assert(MI.isRegSequenceLike() && "Invalid kind of instruction" ); |
5304 | |
5305 | switch (MI.getOpcode()) { |
5306 | case ARM::VMOVDRR: |
5307 | // dX = VMOVDRR rY, rZ |
5308 | // is the same as: |
5309 | // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 |
5310 | // Populate the InputRegs accordingly. |
5311 | // rY |
5312 | const MachineOperand *MOReg = &MI.getOperand(i: 1); |
5313 | if (!MOReg->isUndef()) |
5314 | InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(), |
5315 | MOReg->getSubReg(), ARM::ssub_0)); |
5316 | // rZ |
5317 | MOReg = &MI.getOperand(i: 2); |
5318 | if (!MOReg->isUndef()) |
5319 | InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(), |
5320 | MOReg->getSubReg(), ARM::ssub_1)); |
5321 | return true; |
5322 | } |
5323 | llvm_unreachable("Target dependent opcode missing" ); |
5324 | } |
5325 | |
5326 | bool ARMBaseInstrInfo::getExtractSubregLikeInputs( |
5327 | const MachineInstr &MI, unsigned DefIdx, |
5328 | RegSubRegPairAndIdx &InputReg) const { |
5329 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5330 | assert(MI.isExtractSubregLike() && "Invalid kind of instruction" ); |
5331 | |
5332 | switch (MI.getOpcode()) { |
5333 | case ARM::VMOVRRD: |
5334 | // rX, rY = VMOVRRD dZ |
5335 | // is the same as: |
5336 | // rX = EXTRACT_SUBREG dZ, ssub_0 |
5337 | // rY = EXTRACT_SUBREG dZ, ssub_1 |
5338 | const MachineOperand &MOReg = MI.getOperand(i: 2); |
5339 | if (MOReg.isUndef()) |
5340 | return false; |
5341 | InputReg.Reg = MOReg.getReg(); |
5342 | InputReg.SubReg = MOReg.getSubReg(); |
5343 | InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; |
5344 | return true; |
5345 | } |
5346 | llvm_unreachable("Target dependent opcode missing" ); |
5347 | } |
5348 | |
5349 | bool ARMBaseInstrInfo::getInsertSubregLikeInputs( |
5350 | const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, |
5351 | RegSubRegPairAndIdx &InsertedReg) const { |
5352 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5353 | assert(MI.isInsertSubregLike() && "Invalid kind of instruction" ); |
5354 | |
5355 | switch (MI.getOpcode()) { |
5356 | case ARM::VSETLNi32: |
5357 | case ARM::MVE_VMOV_to_lane_32: |
5358 | // dX = VSETLNi32 dY, rZ, imm |
5359 | // qX = MVE_VMOV_to_lane_32 qY, rZ, imm |
5360 | const MachineOperand &MOBaseReg = MI.getOperand(i: 1); |
5361 | const MachineOperand &MOInsertedReg = MI.getOperand(i: 2); |
5362 | if (MOInsertedReg.isUndef()) |
5363 | return false; |
5364 | const MachineOperand &MOIndex = MI.getOperand(i: 3); |
5365 | BaseReg.Reg = MOBaseReg.getReg(); |
5366 | BaseReg.SubReg = MOBaseReg.getSubReg(); |
5367 | |
5368 | InsertedReg.Reg = MOInsertedReg.getReg(); |
5369 | InsertedReg.SubReg = MOInsertedReg.getSubReg(); |
5370 | InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm(); |
5371 | return true; |
5372 | } |
5373 | llvm_unreachable("Target dependent opcode missing" ); |
5374 | } |
5375 | |
5376 | std::pair<unsigned, unsigned> |
5377 | ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
5378 | const unsigned Mask = ARMII::MO_OPTION_MASK; |
5379 | return std::make_pair(x: TF & Mask, y: TF & ~Mask); |
5380 | } |
5381 | |
5382 | ArrayRef<std::pair<unsigned, const char *>> |
5383 | ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
5384 | using namespace ARMII; |
5385 | |
5386 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5387 | {MO_LO16, "arm-lo16" }, {MO_HI16, "arm-hi16" }, |
5388 | {MO_LO_0_7, "arm-lo-0-7" }, {MO_HI_0_7, "arm-hi-0-7" }, |
5389 | {MO_LO_8_15, "arm-lo-8-15" }, {MO_HI_8_15, "arm-hi-8-15" }, |
5390 | }; |
5391 | return ArrayRef(TargetFlags); |
5392 | } |
5393 | |
5394 | ArrayRef<std::pair<unsigned, const char *>> |
5395 | ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { |
5396 | using namespace ARMII; |
5397 | |
5398 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5399 | {MO_COFFSTUB, "arm-coffstub" }, |
5400 | {MO_GOT, "arm-got" }, |
5401 | {MO_SBREL, "arm-sbrel" }, |
5402 | {MO_DLLIMPORT, "arm-dllimport" }, |
5403 | {MO_SECREL, "arm-secrel" }, |
5404 | {MO_NONLAZY, "arm-nonlazy" }}; |
5405 | return ArrayRef(TargetFlags); |
5406 | } |
5407 | |
5408 | std::optional<RegImmPair> |
5409 | ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const { |
5410 | int Sign = 1; |
5411 | unsigned Opcode = MI.getOpcode(); |
5412 | int64_t Offset = 0; |
5413 | |
5414 | // TODO: Handle cases where Reg is a super- or sub-register of the |
5415 | // destination register. |
5416 | const MachineOperand &Op0 = MI.getOperand(i: 0); |
5417 | if (!Op0.isReg() || Reg != Op0.getReg()) |
5418 | return std::nullopt; |
5419 | |
5420 | // We describe SUBri or ADDri instructions. |
5421 | if (Opcode == ARM::SUBri) |
5422 | Sign = -1; |
5423 | else if (Opcode != ARM::ADDri) |
5424 | return std::nullopt; |
5425 | |
5426 | // TODO: Third operand can be global address (usually some string). Since |
5427 | // strings can be relocated we cannot calculate their offsets for |
5428 | // now. |
5429 | if (!MI.getOperand(i: 1).isReg() || !MI.getOperand(i: 2).isImm()) |
5430 | return std::nullopt; |
5431 | |
5432 | Offset = MI.getOperand(i: 2).getImm() * Sign; |
5433 | return RegImmPair{MI.getOperand(i: 1).getReg(), Offset}; |
5434 | } |
5435 | |
5436 | bool llvm::registerDefinedBetween(unsigned Reg, |
5437 | MachineBasicBlock::iterator From, |
5438 | MachineBasicBlock::iterator To, |
5439 | const TargetRegisterInfo *TRI) { |
5440 | for (auto I = From; I != To; ++I) |
5441 | if (I->modifiesRegister(Reg, TRI)) |
5442 | return true; |
5443 | return false; |
5444 | } |
5445 | |
5446 | MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, |
5447 | const TargetRegisterInfo *TRI) { |
5448 | // Search backwards to the instruction that defines CSPR. This may or not |
5449 | // be a CMP, we check that after this loop. If we find another instruction |
5450 | // that reads cpsr, we return nullptr. |
5451 | MachineBasicBlock::iterator CmpMI = Br; |
5452 | while (CmpMI != Br->getParent()->begin()) { |
5453 | --CmpMI; |
5454 | if (CmpMI->modifiesRegister(Reg: ARM::CPSR, TRI)) |
5455 | break; |
5456 | if (CmpMI->readsRegister(Reg: ARM::CPSR, TRI)) |
5457 | break; |
5458 | } |
5459 | |
5460 | // Check that this inst is a CMP r[0-7], #0 and that the register |
5461 | // is not redefined between the cmp and the br. |
5462 | if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) |
5463 | return nullptr; |
5464 | Register Reg = CmpMI->getOperand(i: 0).getReg(); |
5465 | Register PredReg; |
5466 | ARMCC::CondCodes Pred = getInstrPredicate(MI: *CmpMI, PredReg); |
5467 | if (Pred != ARMCC::AL || CmpMI->getOperand(i: 1).getImm() != 0) |
5468 | return nullptr; |
5469 | if (!isARMLowRegister(Reg)) |
5470 | return nullptr; |
5471 | if (registerDefinedBetween(Reg, From: CmpMI->getNextNode(), To: Br, TRI)) |
5472 | return nullptr; |
5473 | |
5474 | return &*CmpMI; |
5475 | } |
5476 | |
5477 | unsigned llvm::ConstantMaterializationCost(unsigned Val, |
5478 | const ARMSubtarget *Subtarget, |
5479 | bool ForCodesize) { |
5480 | if (Subtarget->isThumb()) { |
5481 | if (Val <= 255) // MOV |
5482 | return ForCodesize ? 2 : 1; |
5483 | if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV |
5484 | ARM_AM::getT2SOImmVal(Arg: Val) != -1 || // MOVW |
5485 | ARM_AM::getT2SOImmVal(Arg: ~Val) != -1)) // MVN |
5486 | return ForCodesize ? 4 : 1; |
5487 | if (Val <= 510) // MOV + ADDi8 |
5488 | return ForCodesize ? 4 : 2; |
5489 | if (~Val <= 255) // MOV + MVN |
5490 | return ForCodesize ? 4 : 2; |
5491 | if (ARM_AM::isThumbImmShiftedVal(V: Val)) // MOV + LSL |
5492 | return ForCodesize ? 4 : 2; |
5493 | } else { |
5494 | if (ARM_AM::getSOImmVal(Arg: Val) != -1) // MOV |
5495 | return ForCodesize ? 4 : 1; |
5496 | if (ARM_AM::getSOImmVal(Arg: ~Val) != -1) // MVN |
5497 | return ForCodesize ? 4 : 1; |
5498 | if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW |
5499 | return ForCodesize ? 4 : 1; |
5500 | if (ARM_AM::isSOImmTwoPartVal(V: Val)) // two instrs |
5501 | return ForCodesize ? 8 : 2; |
5502 | if (ARM_AM::isSOImmTwoPartValNeg(V: Val)) // two instrs |
5503 | return ForCodesize ? 8 : 2; |
5504 | } |
5505 | if (Subtarget->useMovt()) // MOVW + MOVT |
5506 | return ForCodesize ? 8 : 2; |
5507 | return ForCodesize ? 8 : 3; // Literal pool load |
5508 | } |
5509 | |
5510 | bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, |
5511 | const ARMSubtarget *Subtarget, |
5512 | bool ForCodesize) { |
5513 | // Check with ForCodesize |
5514 | unsigned Cost1 = ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize); |
5515 | unsigned Cost2 = ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize); |
5516 | if (Cost1 < Cost2) |
5517 | return true; |
5518 | if (Cost1 > Cost2) |
5519 | return false; |
5520 | |
5521 | // If they are equal, try with !ForCodesize |
5522 | return ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize: !ForCodesize) < |
5523 | ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize: !ForCodesize); |
5524 | } |
5525 | |
5526 | /// Constants defining how certain sequences should be outlined. |
5527 | /// This encompasses how an outlined function should be called, and what kind of |
5528 | /// frame should be emitted for that outlined function. |
5529 | /// |
5530 | /// \p MachineOutlinerTailCall implies that the function is being created from |
5531 | /// a sequence of instructions ending in a return. |
5532 | /// |
5533 | /// That is, |
5534 | /// |
5535 | /// I1 OUTLINED_FUNCTION: |
5536 | /// I2 --> B OUTLINED_FUNCTION I1 |
5537 | /// BX LR I2 |
5538 | /// BX LR |
5539 | /// |
5540 | /// +-------------------------+--------+-----+ |
5541 | /// | | Thumb2 | ARM | |
5542 | /// +-------------------------+--------+-----+ |
5543 | /// | Call overhead in Bytes | 4 | 4 | |
5544 | /// | Frame overhead in Bytes | 0 | 0 | |
5545 | /// | Stack fixup required | No | No | |
5546 | /// +-------------------------+--------+-----+ |
5547 | /// |
5548 | /// \p MachineOutlinerThunk implies that the function is being created from |
5549 | /// a sequence of instructions ending in a call. The outlined function is |
5550 | /// called with a BL instruction, and the outlined function tail-calls the |
5551 | /// original call destination. |
5552 | /// |
5553 | /// That is, |
5554 | /// |
5555 | /// I1 OUTLINED_FUNCTION: |
5556 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5557 | /// BL f I2 |
5558 | /// B f |
5559 | /// |
5560 | /// +-------------------------+--------+-----+ |
5561 | /// | | Thumb2 | ARM | |
5562 | /// +-------------------------+--------+-----+ |
5563 | /// | Call overhead in Bytes | 4 | 4 | |
5564 | /// | Frame overhead in Bytes | 0 | 0 | |
5565 | /// | Stack fixup required | No | No | |
5566 | /// +-------------------------+--------+-----+ |
5567 | /// |
5568 | /// \p MachineOutlinerNoLRSave implies that the function should be called using |
5569 | /// a BL instruction, but doesn't require LR to be saved and restored. This |
5570 | /// happens when LR is known to be dead. |
5571 | /// |
5572 | /// That is, |
5573 | /// |
5574 | /// I1 OUTLINED_FUNCTION: |
5575 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5576 | /// I3 I2 |
5577 | /// I3 |
5578 | /// BX LR |
5579 | /// |
5580 | /// +-------------------------+--------+-----+ |
5581 | /// | | Thumb2 | ARM | |
5582 | /// +-------------------------+--------+-----+ |
5583 | /// | Call overhead in Bytes | 4 | 4 | |
5584 | /// | Frame overhead in Bytes | 2 | 4 | |
5585 | /// | Stack fixup required | No | No | |
5586 | /// +-------------------------+--------+-----+ |
5587 | /// |
5588 | /// \p MachineOutlinerRegSave implies that the function should be called with a |
5589 | /// save and restore of LR to an available register. This allows us to avoid |
5590 | /// stack fixups. Note that this outlining variant is compatible with the |
5591 | /// NoLRSave case. |
5592 | /// |
5593 | /// That is, |
5594 | /// |
5595 | /// I1 Save LR OUTLINED_FUNCTION: |
5596 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5597 | /// I3 Restore LR I2 |
5598 | /// I3 |
5599 | /// BX LR |
5600 | /// |
5601 | /// +-------------------------+--------+-----+ |
5602 | /// | | Thumb2 | ARM | |
5603 | /// +-------------------------+--------+-----+ |
5604 | /// | Call overhead in Bytes | 8 | 12 | |
5605 | /// | Frame overhead in Bytes | 2 | 4 | |
5606 | /// | Stack fixup required | No | No | |
5607 | /// +-------------------------+--------+-----+ |
5608 | /// |
5609 | /// \p MachineOutlinerDefault implies that the function should be called with |
5610 | /// a save and restore of LR to the stack. |
5611 | /// |
5612 | /// That is, |
5613 | /// |
5614 | /// I1 Save LR OUTLINED_FUNCTION: |
5615 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5616 | /// I3 Restore LR I2 |
5617 | /// I3 |
5618 | /// BX LR |
5619 | /// |
5620 | /// +-------------------------+--------+-----+ |
5621 | /// | | Thumb2 | ARM | |
5622 | /// +-------------------------+--------+-----+ |
5623 | /// | Call overhead in Bytes | 8 | 12 | |
5624 | /// | Frame overhead in Bytes | 2 | 4 | |
5625 | /// | Stack fixup required | Yes | Yes | |
5626 | /// +-------------------------+--------+-----+ |
5627 | |
5628 | enum MachineOutlinerClass { |
5629 | MachineOutlinerTailCall, |
5630 | MachineOutlinerThunk, |
5631 | MachineOutlinerNoLRSave, |
5632 | MachineOutlinerRegSave, |
5633 | MachineOutlinerDefault |
5634 | }; |
5635 | |
5636 | enum MachineOutlinerMBBFlags { |
5637 | LRUnavailableSomewhere = 0x2, |
5638 | HasCalls = 0x4, |
5639 | UnsafeRegsDead = 0x8 |
5640 | }; |
5641 | |
5642 | struct OutlinerCosts { |
5643 | int CallTailCall; |
5644 | int FrameTailCall; |
5645 | int CallThunk; |
5646 | int FrameThunk; |
5647 | int CallNoLRSave; |
5648 | int FrameNoLRSave; |
5649 | int CallRegSave; |
5650 | int FrameRegSave; |
5651 | int CallDefault; |
5652 | int FrameDefault; |
5653 | int SaveRestoreLROnStack; |
5654 | |
5655 | OutlinerCosts(const ARMSubtarget &target) |
5656 | : CallTailCall(target.isThumb() ? 4 : 4), |
5657 | FrameTailCall(target.isThumb() ? 0 : 0), |
5658 | CallThunk(target.isThumb() ? 4 : 4), |
5659 | FrameThunk(target.isThumb() ? 0 : 0), |
5660 | CallNoLRSave(target.isThumb() ? 4 : 4), |
5661 | FrameNoLRSave(target.isThumb() ? 2 : 4), |
5662 | CallRegSave(target.isThumb() ? 8 : 12), |
5663 | FrameRegSave(target.isThumb() ? 2 : 4), |
5664 | CallDefault(target.isThumb() ? 8 : 12), |
5665 | FrameDefault(target.isThumb() ? 2 : 4), |
5666 | SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} |
5667 | }; |
5668 | |
5669 | Register |
5670 | ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { |
5671 | MachineFunction *MF = C.getMF(); |
5672 | const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); |
5673 | const ARMBaseRegisterInfo *ARI = |
5674 | static_cast<const ARMBaseRegisterInfo *>(&TRI); |
5675 | |
5676 | BitVector regsReserved = ARI->getReservedRegs(MF: *MF); |
5677 | // Check if there is an available register across the sequence that we can |
5678 | // use. |
5679 | for (Register Reg : ARM::rGPRRegClass) { |
5680 | if (!(Reg < regsReserved.size() && regsReserved.test(Idx: Reg)) && |
5681 | Reg != ARM::LR && // LR is not reserved, but don't use it. |
5682 | Reg != ARM::R12 && // R12 is not guaranteed to be preserved. |
5683 | C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && |
5684 | C.isAvailableInsideSeq(Reg, TRI)) |
5685 | return Reg; |
5686 | } |
5687 | return Register(); |
5688 | } |
5689 | |
5690 | // Compute liveness of LR at the point after the interval [I, E), which |
5691 | // denotes a *backward* iteration through instructions. Used only for return |
5692 | // basic blocks, which do not end with a tail call. |
5693 | static bool isLRAvailable(const TargetRegisterInfo &TRI, |
5694 | MachineBasicBlock::reverse_iterator I, |
5695 | MachineBasicBlock::reverse_iterator E) { |
5696 | // At the end of the function LR dead. |
5697 | bool Live = false; |
5698 | for (; I != E; ++I) { |
5699 | const MachineInstr &MI = *I; |
5700 | |
5701 | // Check defs of LR. |
5702 | if (MI.modifiesRegister(Reg: ARM::LR, TRI: &TRI)) |
5703 | Live = false; |
5704 | |
5705 | // Check uses of LR. |
5706 | unsigned Opcode = MI.getOpcode(); |
5707 | if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR || |
5708 | Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET || |
5709 | Opcode == ARM::tBXNS_RET) { |
5710 | // These instructions use LR, but it's not an (explicit or implicit) |
5711 | // operand. |
5712 | Live = true; |
5713 | continue; |
5714 | } |
5715 | if (MI.readsRegister(Reg: ARM::LR, TRI: &TRI)) |
5716 | Live = true; |
5717 | } |
5718 | return !Live; |
5719 | } |
5720 | |
5721 | std::optional<std::unique_ptr<outliner::OutlinedFunction>> |
5722 | ARMBaseInstrInfo::getOutliningCandidateInfo( |
5723 | const MachineModuleInfo &MMI, |
5724 | std::vector<outliner::Candidate> &RepeatedSequenceLocs, |
5725 | unsigned MinRepeats) const { |
5726 | unsigned SequenceSize = 0; |
5727 | for (auto &MI : RepeatedSequenceLocs[0]) |
5728 | SequenceSize += getInstSizeInBytes(MI); |
5729 | |
5730 | // Properties about candidate MBBs that hold for all of them. |
5731 | unsigned FlagsSetInAll = 0xF; |
5732 | |
5733 | // Compute liveness information for each candidate, and set FlagsSetInAll. |
5734 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
5735 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
5736 | FlagsSetInAll &= C.Flags; |
5737 | |
5738 | // According to the ARM Procedure Call Standard, the following are |
5739 | // undefined on entry/exit from a function call: |
5740 | // |
5741 | // * Register R12(IP), |
5742 | // * Condition codes (and thus the CPSR register) |
5743 | // |
5744 | // Since we control the instructions which are part of the outlined regions |
5745 | // we don't need to be fully compliant with the AAPCS, but we have to |
5746 | // guarantee that if a veneer is inserted at link time the code is still |
5747 | // correct. Because of this, we can't outline any sequence of instructions |
5748 | // where one of these registers is live into/across it. Thus, we need to |
5749 | // delete those candidates. |
5750 | auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { |
5751 | // If the unsafe registers in this block are all dead, then we don't need |
5752 | // to compute liveness here. |
5753 | if (C.Flags & UnsafeRegsDead) |
5754 | return false; |
5755 | return C.isAnyUnavailableAcrossOrOutOfSeq(Regs: {ARM::R12, ARM::CPSR}, TRI); |
5756 | }; |
5757 | |
5758 | // Are there any candidates where those registers are live? |
5759 | if (!(FlagsSetInAll & UnsafeRegsDead)) { |
5760 | // Erase every candidate that violates the restrictions above. (It could be |
5761 | // true that we have viable candidates, so it's not worth bailing out in |
5762 | // the case that, say, 1 out of 20 candidates violate the restructions.) |
5763 | llvm::erase_if(C&: RepeatedSequenceLocs, P: CantGuaranteeValueAcrossCall); |
5764 | |
5765 | // If the sequence doesn't have enough candidates left, then we're done. |
5766 | if (RepeatedSequenceLocs.size() < MinRepeats) |
5767 | return std::nullopt; |
5768 | } |
5769 | |
5770 | // We expect the majority of the outlining candidates to be in consensus with |
5771 | // regard to return address sign and authentication, and branch target |
5772 | // enforcement, in other words, partitioning according to all the four |
5773 | // possible combinations of PAC-RET and BTI is going to yield one big subset |
5774 | // and three small (likely empty) subsets. That allows us to cull incompatible |
5775 | // candidates separately for PAC-RET and BTI. |
5776 | |
5777 | // Partition the candidates in two sets: one with BTI enabled and one with BTI |
5778 | // disabled. Remove the candidates from the smaller set. If they are the same |
5779 | // number prefer the non-BTI ones for outlining, since they have less |
5780 | // overhead. |
5781 | auto NoBTI = |
5782 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
5783 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
5784 | return AFI.branchTargetEnforcement(); |
5785 | }); |
5786 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoBTI) > |
5787 | std::distance(first: NoBTI, last: RepeatedSequenceLocs.end())) |
5788 | RepeatedSequenceLocs.erase(first: NoBTI, last: RepeatedSequenceLocs.end()); |
5789 | else |
5790 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoBTI); |
5791 | |
5792 | if (RepeatedSequenceLocs.size() < MinRepeats) |
5793 | return std::nullopt; |
5794 | |
5795 | // Likewise, partition the candidates according to PAC-RET enablement. |
5796 | auto NoPAC = |
5797 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
5798 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
5799 | // If the function happens to not spill the LR, do not disqualify it |
5800 | // from the outlining. |
5801 | return AFI.shouldSignReturnAddress(SpillsLR: true); |
5802 | }); |
5803 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoPAC) > |
5804 | std::distance(first: NoPAC, last: RepeatedSequenceLocs.end())) |
5805 | RepeatedSequenceLocs.erase(first: NoPAC, last: RepeatedSequenceLocs.end()); |
5806 | else |
5807 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoPAC); |
5808 | |
5809 | if (RepeatedSequenceLocs.size() < MinRepeats) |
5810 | return std::nullopt; |
5811 | |
5812 | // At this point, we have only "safe" candidates to outline. Figure out |
5813 | // frame + call instruction information. |
5814 | |
5815 | unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode(); |
5816 | |
5817 | // Helper lambda which sets call information for every candidate. |
5818 | auto SetCandidateCallInfo = |
5819 | [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { |
5820 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
5821 | C.setCallInfo(CID: CallID, CO: NumBytesForCall); |
5822 | }; |
5823 | |
5824 | OutlinerCosts Costs(Subtarget); |
5825 | |
5826 | const auto &SomeMFI = |
5827 | *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>(); |
5828 | // Adjust costs to account for the BTI instructions. |
5829 | if (SomeMFI.branchTargetEnforcement()) { |
5830 | Costs.FrameDefault += 4; |
5831 | Costs.FrameNoLRSave += 4; |
5832 | Costs.FrameRegSave += 4; |
5833 | Costs.FrameTailCall += 4; |
5834 | Costs.FrameThunk += 4; |
5835 | } |
5836 | |
5837 | // Adjust costs to account for sign and authentication instructions. |
5838 | if (SomeMFI.shouldSignReturnAddress(SpillsLR: true)) { |
5839 | Costs.CallDefault += 8; // +PAC instr, +AUT instr |
5840 | Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr |
5841 | } |
5842 | |
5843 | unsigned FrameID = MachineOutlinerDefault; |
5844 | unsigned NumBytesToCreateFrame = Costs.FrameDefault; |
5845 | |
5846 | // If the last instruction in any candidate is a terminator, then we should |
5847 | // tail call all of the candidates. |
5848 | if (RepeatedSequenceLocs[0].back().isTerminator()) { |
5849 | FrameID = MachineOutlinerTailCall; |
5850 | NumBytesToCreateFrame = Costs.FrameTailCall; |
5851 | SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall); |
5852 | } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || |
5853 | LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL || |
5854 | LastInstrOpcode == ARM::tBLXr || |
5855 | LastInstrOpcode == ARM::tBLXr_noip || |
5856 | LastInstrOpcode == ARM::tBLXi) { |
5857 | FrameID = MachineOutlinerThunk; |
5858 | NumBytesToCreateFrame = Costs.FrameThunk; |
5859 | SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk); |
5860 | } else { |
5861 | // We need to decide how to emit calls + frames. We can always emit the same |
5862 | // frame if we don't need to save to the stack. If we have to save to the |
5863 | // stack, then we need a different frame. |
5864 | unsigned NumBytesNoStackCalls = 0; |
5865 | std::vector<outliner::Candidate> CandidatesWithoutStackFixups; |
5866 | |
5867 | for (outliner::Candidate &C : RepeatedSequenceLocs) { |
5868 | // LR liveness is overestimated in return blocks, unless they end with a |
5869 | // tail call. |
5870 | const auto Last = C.getMBB()->rbegin(); |
5871 | const bool LRIsAvailable = |
5872 | C.getMBB()->isReturnBlock() && !Last->isCall() |
5873 | ? isLRAvailable(TRI, I: Last, |
5874 | E: (MachineBasicBlock::reverse_iterator)C.begin()) |
5875 | : C.isAvailableAcrossAndOutOfSeq(Reg: ARM::LR, TRI); |
5876 | if (LRIsAvailable) { |
5877 | FrameID = MachineOutlinerNoLRSave; |
5878 | NumBytesNoStackCalls += Costs.CallNoLRSave; |
5879 | C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: Costs.CallNoLRSave); |
5880 | CandidatesWithoutStackFixups.push_back(x: C); |
5881 | } |
5882 | |
5883 | // Is an unused register available? If so, we won't modify the stack, so |
5884 | // we can outline with the same frame type as those that don't save LR. |
5885 | else if (findRegisterToSaveLRTo(C)) { |
5886 | FrameID = MachineOutlinerRegSave; |
5887 | NumBytesNoStackCalls += Costs.CallRegSave; |
5888 | C.setCallInfo(CID: MachineOutlinerRegSave, CO: Costs.CallRegSave); |
5889 | CandidatesWithoutStackFixups.push_back(x: C); |
5890 | } |
5891 | |
5892 | // Is SP used in the sequence at all? If not, we don't have to modify |
5893 | // the stack, so we are guaranteed to get the same frame. |
5894 | else if (C.isAvailableInsideSeq(Reg: ARM::SP, TRI)) { |
5895 | NumBytesNoStackCalls += Costs.CallDefault; |
5896 | C.setCallInfo(CID: MachineOutlinerDefault, CO: Costs.CallDefault); |
5897 | CandidatesWithoutStackFixups.push_back(x: C); |
5898 | } |
5899 | |
5900 | // If we outline this, we need to modify the stack. Pretend we don't |
5901 | // outline this by saving all of its bytes. |
5902 | else |
5903 | NumBytesNoStackCalls += SequenceSize; |
5904 | } |
5905 | |
5906 | // If there are no places where we have to save LR, then note that we don't |
5907 | // have to update the stack. Otherwise, give every candidate the default |
5908 | // call type |
5909 | if (NumBytesNoStackCalls <= |
5910 | RepeatedSequenceLocs.size() * Costs.CallDefault) { |
5911 | RepeatedSequenceLocs = CandidatesWithoutStackFixups; |
5912 | FrameID = MachineOutlinerNoLRSave; |
5913 | if (RepeatedSequenceLocs.size() < MinRepeats) |
5914 | return std::nullopt; |
5915 | } else |
5916 | SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); |
5917 | } |
5918 | |
5919 | // Does every candidate's MBB contain a call? If so, then we might have a |
5920 | // call in the range. |
5921 | if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { |
5922 | // check if the range contains a call. These require a save + restore of |
5923 | // the link register. |
5924 | outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; |
5925 | if (any_of(Range: drop_end(RangeOrContainer&: FirstCand), |
5926 | P: [](const MachineInstr &MI) { return MI.isCall(); })) |
5927 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
5928 | |
5929 | // Handle the last instruction separately. If it is tail call, then the |
5930 | // last instruction is a call, we don't want to save + restore in this |
5931 | // case. However, it could be possible that the last instruction is a |
5932 | // call without it being valid to tail call this sequence. We should |
5933 | // consider this as well. |
5934 | else if (FrameID != MachineOutlinerThunk && |
5935 | FrameID != MachineOutlinerTailCall && FirstCand.back().isCall()) |
5936 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
5937 | } |
5938 | |
5939 | return std::make_unique<outliner::OutlinedFunction>( |
5940 | args&: RepeatedSequenceLocs, args&: SequenceSize, args&: NumBytesToCreateFrame, args&: FrameID); |
5941 | } |
5942 | |
5943 | bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, |
5944 | int64_t Fixup, |
5945 | bool Updt) const { |
5946 | int SPIdx = MI->findRegisterUseOperandIdx(Reg: ARM::SP, /*TRI=*/nullptr); |
5947 | unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); |
5948 | if (SPIdx < 0) |
5949 | // No SP operand |
5950 | return true; |
5951 | else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2)) |
5952 | // If SP is not the base register we can't do much |
5953 | return false; |
5954 | |
5955 | // Stack might be involved but addressing mode doesn't handle any offset. |
5956 | // Rq: AddrModeT1_[1|2|4] don't operate on SP |
5957 | if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions |
5958 | AddrMode == ARMII::AddrMode4 || // Load/Store Multiple |
5959 | AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple |
5960 | AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register |
5961 | AddrMode == ARMII::AddrModeT2_pc || // PCrel access |
5962 | AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST |
5963 | AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE |
5964 | AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE |
5965 | AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR |
5966 | AddrMode == ARMII::AddrModeNone || |
5967 | AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions |
5968 | AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm |
5969 | return false; |
5970 | |
5971 | unsigned NumOps = MI->getDesc().getNumOperands(); |
5972 | unsigned ImmIdx = NumOps - 3; |
5973 | |
5974 | const MachineOperand &Offset = MI->getOperand(i: ImmIdx); |
5975 | assert(Offset.isImm() && "Is not an immediate" ); |
5976 | int64_t OffVal = Offset.getImm(); |
5977 | |
5978 | if (OffVal < 0) |
5979 | // Don't override data if the are below SP. |
5980 | return false; |
5981 | |
5982 | unsigned NumBits = 0; |
5983 | unsigned Scale = 1; |
5984 | |
5985 | switch (AddrMode) { |
5986 | case ARMII::AddrMode3: |
5987 | if (ARM_AM::getAM3Op(AM3Opc: OffVal) == ARM_AM::sub) |
5988 | return false; |
5989 | OffVal = ARM_AM::getAM3Offset(AM3Opc: OffVal); |
5990 | NumBits = 8; |
5991 | break; |
5992 | case ARMII::AddrMode5: |
5993 | if (ARM_AM::getAM5Op(AM5Opc: OffVal) == ARM_AM::sub) |
5994 | return false; |
5995 | OffVal = ARM_AM::getAM5Offset(AM5Opc: OffVal); |
5996 | NumBits = 8; |
5997 | Scale = 4; |
5998 | break; |
5999 | case ARMII::AddrMode5FP16: |
6000 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffVal) == ARM_AM::sub) |
6001 | return false; |
6002 | OffVal = ARM_AM::getAM5FP16Offset(AM5Opc: OffVal); |
6003 | NumBits = 8; |
6004 | Scale = 2; |
6005 | break; |
6006 | case ARMII::AddrModeT2_i8pos: |
6007 | NumBits = 8; |
6008 | break; |
6009 | case ARMII::AddrModeT2_i8s4: |
6010 | // FIXME: Values are already scaled in this addressing mode. |
6011 | assert((Fixup & 3) == 0 && "Can't encode this offset!" ); |
6012 | NumBits = 10; |
6013 | break; |
6014 | case ARMII::AddrModeT2_ldrex: |
6015 | NumBits = 8; |
6016 | Scale = 4; |
6017 | break; |
6018 | case ARMII::AddrModeT2_i12: |
6019 | case ARMII::AddrMode_i12: |
6020 | NumBits = 12; |
6021 | break; |
6022 | case ARMII::AddrModeT1_s: // SP-relative LD/ST |
6023 | NumBits = 8; |
6024 | Scale = 4; |
6025 | break; |
6026 | default: |
6027 | llvm_unreachable("Unsupported addressing mode!" ); |
6028 | } |
6029 | // Make sure the offset is encodable for instructions that scale the |
6030 | // immediate. |
6031 | assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && |
6032 | "Can't encode this offset!" ); |
6033 | OffVal += Fixup / Scale; |
6034 | |
6035 | unsigned Mask = (1 << NumBits) - 1; |
6036 | |
6037 | if (OffVal <= Mask) { |
6038 | if (Updt) |
6039 | MI->getOperand(i: ImmIdx).setImm(OffVal); |
6040 | return true; |
6041 | } |
6042 | |
6043 | return false; |
6044 | } |
6045 | |
6046 | void ARMBaseInstrInfo::mergeOutliningCandidateAttributes( |
6047 | Function &F, std::vector<outliner::Candidate> &Candidates) const { |
6048 | outliner::Candidate &C = Candidates.front(); |
6049 | // branch-target-enforcement is guaranteed to be consistent between all |
6050 | // candidates, so we only need to look at one. |
6051 | const Function &CFn = C.getMF()->getFunction(); |
6052 | if (CFn.hasFnAttribute(Kind: "branch-target-enforcement" )) |
6053 | F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "branch-target-enforcement" )); |
6054 | |
6055 | if (CFn.hasFnAttribute(Kind: "sign-return-address" )) |
6056 | F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address" )); |
6057 | |
6058 | ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); |
6059 | } |
6060 | |
6061 | bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( |
6062 | MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { |
6063 | const Function &F = MF.getFunction(); |
6064 | |
6065 | // Can F be deduplicated by the linker? If it can, don't outline from it. |
6066 | if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) |
6067 | return false; |
6068 | |
6069 | // Don't outline from functions with section markings; the program could |
6070 | // expect that all the code is in the named section. |
6071 | // FIXME: Allow outlining from multiple functions with the same section |
6072 | // marking. |
6073 | if (F.hasSection()) |
6074 | return false; |
6075 | |
6076 | // FIXME: Thumb1 outlining is not handled |
6077 | if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction()) |
6078 | return false; |
6079 | |
6080 | // It's safe to outline from MF. |
6081 | return true; |
6082 | } |
6083 | |
6084 | bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, |
6085 | unsigned &Flags) const { |
6086 | // Check if LR is available through all of the MBB. If it's not, then set |
6087 | // a flag. |
6088 | assert(MBB.getParent()->getRegInfo().tracksLiveness() && |
6089 | "Suitable Machine Function for outlining must track liveness" ); |
6090 | |
6091 | LiveRegUnits LRU(getRegisterInfo()); |
6092 | |
6093 | for (MachineInstr &MI : llvm::reverse(C&: MBB)) |
6094 | LRU.accumulate(MI); |
6095 | |
6096 | // Check if each of the unsafe registers are available... |
6097 | bool R12AvailableInBlock = LRU.available(Reg: ARM::R12); |
6098 | bool CPSRAvailableInBlock = LRU.available(Reg: ARM::CPSR); |
6099 | |
6100 | // If all of these are dead (and not live out), we know we don't have to check |
6101 | // them later. |
6102 | if (R12AvailableInBlock && CPSRAvailableInBlock) |
6103 | Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; |
6104 | |
6105 | // Now, add the live outs to the set. |
6106 | LRU.addLiveOuts(MBB); |
6107 | |
6108 | // If any of these registers is available in the MBB, but also a live out of |
6109 | // the block, then we know outlining is unsafe. |
6110 | if (R12AvailableInBlock && !LRU.available(Reg: ARM::R12)) |
6111 | return false; |
6112 | if (CPSRAvailableInBlock && !LRU.available(Reg: ARM::CPSR)) |
6113 | return false; |
6114 | |
6115 | // Check if there's a call inside this MachineBasicBlock. If there is, then |
6116 | // set a flag. |
6117 | if (any_of(Range&: MBB, P: [](MachineInstr &MI) { return MI.isCall(); })) |
6118 | Flags |= MachineOutlinerMBBFlags::HasCalls; |
6119 | |
6120 | // LR liveness is overestimated in return blocks. |
6121 | |
6122 | bool LRIsAvailable = |
6123 | MBB.isReturnBlock() && !MBB.back().isCall() |
6124 | ? isLRAvailable(TRI: getRegisterInfo(), I: MBB.rbegin(), E: MBB.rend()) |
6125 | : LRU.available(Reg: ARM::LR); |
6126 | if (!LRIsAvailable) |
6127 | Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; |
6128 | |
6129 | return true; |
6130 | } |
6131 | |
6132 | outliner::InstrType |
6133 | ARMBaseInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, |
6134 | MachineBasicBlock::iterator &MIT, |
6135 | unsigned Flags) const { |
6136 | MachineInstr &MI = *MIT; |
6137 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
6138 | |
6139 | // PIC instructions contain labels, outlining them would break offset |
6140 | // computing. unsigned Opc = MI.getOpcode(); |
6141 | unsigned Opc = MI.getOpcode(); |
6142 | if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR || |
6143 | Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR || |
6144 | Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB || |
6145 | Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic || |
6146 | Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel || |
6147 | Opc == ARM::t2MOV_ga_pcrel) |
6148 | return outliner::InstrType::Illegal; |
6149 | |
6150 | // Be conservative with ARMv8.1 MVE instructions. |
6151 | if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart || |
6152 | Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart || |
6153 | Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP || |
6154 | Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd || |
6155 | Opc == ARM::t2LoopEndDec) |
6156 | return outliner::InstrType::Illegal; |
6157 | |
6158 | const MCInstrDesc &MCID = MI.getDesc(); |
6159 | uint64_t MIFlags = MCID.TSFlags; |
6160 | if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE) |
6161 | return outliner::InstrType::Illegal; |
6162 | |
6163 | // Is this a terminator for a basic block? |
6164 | if (MI.isTerminator()) |
6165 | // TargetInstrInfo::getOutliningType has already filtered out anything |
6166 | // that would break this, so we can allow it here. |
6167 | return outliner::InstrType::Legal; |
6168 | |
6169 | // Don't outline if link register or program counter value are used. |
6170 | if (MI.readsRegister(Reg: ARM::LR, TRI) || MI.readsRegister(Reg: ARM::PC, TRI)) |
6171 | return outliner::InstrType::Illegal; |
6172 | |
6173 | if (MI.isCall()) { |
6174 | // Get the function associated with the call. Look at each operand and find |
6175 | // the one that represents the calle and get its name. |
6176 | const Function *Callee = nullptr; |
6177 | for (const MachineOperand &MOP : MI.operands()) { |
6178 | if (MOP.isGlobal()) { |
6179 | Callee = dyn_cast<Function>(Val: MOP.getGlobal()); |
6180 | break; |
6181 | } |
6182 | } |
6183 | |
6184 | // Dont't outline calls to "mcount" like functions, in particular Linux |
6185 | // kernel function tracing relies on it. |
6186 | if (Callee && |
6187 | (Callee->getName() == "\01__gnu_mcount_nc" || |
6188 | Callee->getName() == "\01mcount" || Callee->getName() == "__mcount" )) |
6189 | return outliner::InstrType::Illegal; |
6190 | |
6191 | // If we don't know anything about the callee, assume it depends on the |
6192 | // stack layout of the caller. In that case, it's only legal to outline |
6193 | // as a tail-call. Explicitly list the call instructions we know about so |
6194 | // we don't get unexpected results with call pseudo-instructions. |
6195 | auto UnknownCallOutlineType = outliner::InstrType::Illegal; |
6196 | if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || |
6197 | Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip || |
6198 | Opc == ARM::tBLXi) |
6199 | UnknownCallOutlineType = outliner::InstrType::LegalTerminator; |
6200 | |
6201 | if (!Callee) |
6202 | return UnknownCallOutlineType; |
6203 | |
6204 | // We have a function we have information about. Check if it's something we |
6205 | // can safely outline. |
6206 | MachineFunction *CalleeMF = MMI.getMachineFunction(F: *Callee); |
6207 | |
6208 | // We don't know what's going on with the callee at all. Don't touch it. |
6209 | if (!CalleeMF) |
6210 | return UnknownCallOutlineType; |
6211 | |
6212 | // Check if we know anything about the callee saves on the function. If we |
6213 | // don't, then don't touch it, since that implies that we haven't computed |
6214 | // anything about its stack frame yet. |
6215 | MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); |
6216 | if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || |
6217 | MFI.getNumObjects() > 0) |
6218 | return UnknownCallOutlineType; |
6219 | |
6220 | // At this point, we can say that CalleeMF ought to not pass anything on the |
6221 | // stack. Therefore, we can outline it. |
6222 | return outliner::InstrType::Legal; |
6223 | } |
6224 | |
6225 | // Since calls are handled, don't touch LR or PC |
6226 | if (MI.modifiesRegister(Reg: ARM::LR, TRI) || MI.modifiesRegister(Reg: ARM::PC, TRI)) |
6227 | return outliner::InstrType::Illegal; |
6228 | |
6229 | // Does this use the stack? |
6230 | if (MI.modifiesRegister(Reg: ARM::SP, TRI) || MI.readsRegister(Reg: ARM::SP, TRI)) { |
6231 | // True if there is no chance that any outlined candidate from this range |
6232 | // could require stack fixups. That is, both |
6233 | // * LR is available in the range (No save/restore around call) |
6234 | // * The range doesn't include calls (No save/restore in outlined frame) |
6235 | // are true. |
6236 | // These conditions also ensure correctness of the return address |
6237 | // authentication - we insert sign and authentication instructions only if |
6238 | // we save/restore LR on stack, but then this condition ensures that the |
6239 | // outlined range does not modify the SP, therefore the SP value used for |
6240 | // signing is the same as the one used for authentication. |
6241 | // FIXME: This is very restrictive; the flags check the whole block, |
6242 | // not just the bit we will try to outline. |
6243 | bool MightNeedStackFixUp = |
6244 | (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | |
6245 | MachineOutlinerMBBFlags::HasCalls)); |
6246 | |
6247 | if (!MightNeedStackFixUp) |
6248 | return outliner::InstrType::Legal; |
6249 | |
6250 | // Any modification of SP will break our code to save/restore LR. |
6251 | // FIXME: We could handle some instructions which add a constant offset to |
6252 | // SP, with a bit more work. |
6253 | if (MI.modifiesRegister(Reg: ARM::SP, TRI)) |
6254 | return outliner::InstrType::Illegal; |
6255 | |
6256 | // At this point, we have a stack instruction that we might need to fix up. |
6257 | // up. We'll handle it if it's a load or store. |
6258 | if (checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), |
6259 | Updt: false)) |
6260 | return outliner::InstrType::Legal; |
6261 | |
6262 | // We can't fix it up, so don't outline it. |
6263 | return outliner::InstrType::Illegal; |
6264 | } |
6265 | |
6266 | // Be conservative with IT blocks. |
6267 | if (MI.readsRegister(Reg: ARM::ITSTATE, TRI) || |
6268 | MI.modifiesRegister(Reg: ARM::ITSTATE, TRI)) |
6269 | return outliner::InstrType::Illegal; |
6270 | |
6271 | // Don't outline CFI instructions. |
6272 | if (MI.isCFIInstruction()) |
6273 | return outliner::InstrType::Illegal; |
6274 | |
6275 | return outliner::InstrType::Legal; |
6276 | } |
6277 | |
6278 | void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { |
6279 | for (MachineInstr &MI : MBB) { |
6280 | checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), Updt: true); |
6281 | } |
6282 | } |
6283 | |
6284 | void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, |
6285 | MachineBasicBlock::iterator It, bool CFI, |
6286 | bool Auth) const { |
6287 | int Align = std::max(a: Subtarget.getStackAlignment().value(), b: uint64_t(8)); |
6288 | unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0; |
6289 | assert(Align >= 8 && Align <= 256); |
6290 | if (Auth) { |
6291 | assert(Subtarget.isThumb2()); |
6292 | // Compute PAC in R12. Outlining ensures R12 is dead across the outlined |
6293 | // sequence. |
6294 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2PAC)).setMIFlags(MIFlags); |
6295 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2STRD_PRE), DestReg: ARM::SP) |
6296 | .addReg(RegNo: ARM::R12, flags: RegState::Kill) |
6297 | .addReg(RegNo: ARM::LR, flags: RegState::Kill) |
6298 | .addReg(RegNo: ARM::SP) |
6299 | .addImm(Val: -Align) |
6300 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6301 | .setMIFlags(MIFlags); |
6302 | } else { |
6303 | unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; |
6304 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::SP) |
6305 | .addReg(RegNo: ARM::LR, flags: RegState::Kill) |
6306 | .addReg(RegNo: ARM::SP) |
6307 | .addImm(Val: -Align) |
6308 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6309 | .setMIFlags(MIFlags); |
6310 | } |
6311 | |
6312 | if (!CFI) |
6313 | return; |
6314 | |
6315 | // Add a CFI, saying CFA is offset by Align bytes from SP. |
6316 | CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup); |
6317 | CFIBuilder.buildDefCFAOffset(Offset: Align); |
6318 | |
6319 | // Add a CFI saying that the LR that we want to find is now higher than |
6320 | // before. |
6321 | int LROffset = Auth ? Align - 4 : Align; |
6322 | CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -LROffset); |
6323 | if (Auth) { |
6324 | // Add a CFI for the location of the return adddress PAC. |
6325 | CFIBuilder.buildOffset(Reg: ARM::RA_AUTH_CODE, Offset: -Align); |
6326 | } |
6327 | } |
6328 | |
6329 | void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB, |
6330 | MachineBasicBlock::iterator It, |
6331 | bool CFI, bool Auth) const { |
6332 | int Align = Subtarget.getStackAlignment().value(); |
6333 | unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0; |
6334 | if (Auth) { |
6335 | assert(Subtarget.isThumb2()); |
6336 | // Restore return address PAC and LR. |
6337 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2LDRD_POST)) |
6338 | .addReg(RegNo: ARM::R12, flags: RegState::Define) |
6339 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
6340 | .addReg(RegNo: ARM::SP, flags: RegState::Define) |
6341 | .addReg(RegNo: ARM::SP) |
6342 | .addImm(Val: Align) |
6343 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6344 | .setMIFlags(MIFlags); |
6345 | // LR authentication is after the CFI instructions, below. |
6346 | } else { |
6347 | unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; |
6348 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::LR) |
6349 | .addReg(RegNo: ARM::SP, flags: RegState::Define) |
6350 | .addReg(RegNo: ARM::SP); |
6351 | if (!Subtarget.isThumb()) |
6352 | MIB.addReg(RegNo: 0); |
6353 | MIB.addImm(Val: Subtarget.getStackAlignment().value()) |
6354 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6355 | .setMIFlags(MIFlags); |
6356 | } |
6357 | |
6358 | if (CFI) { |
6359 | // Now stack has moved back up and we have restored LR. |
6360 | CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy); |
6361 | CFIBuilder.buildDefCFAOffset(Offset: 0); |
6362 | CFIBuilder.buildRestore(Reg: ARM::LR); |
6363 | if (Auth) |
6364 | CFIBuilder.buildUndefined(Reg: ARM::RA_AUTH_CODE); |
6365 | } |
6366 | |
6367 | if (Auth) |
6368 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2AUT)); |
6369 | } |
6370 | |
6371 | void ARMBaseInstrInfo::buildOutlinedFrame( |
6372 | MachineBasicBlock &MBB, MachineFunction &MF, |
6373 | const outliner::OutlinedFunction &OF) const { |
6374 | // For thunk outlining, rewrite the last instruction from a call to a |
6375 | // tail-call. |
6376 | if (OF.FrameConstructionID == MachineOutlinerThunk) { |
6377 | MachineInstr *Call = &*--MBB.instr_end(); |
6378 | bool isThumb = Subtarget.isThumb(); |
6379 | unsigned FuncOp = isThumb ? 2 : 0; |
6380 | unsigned Opc = Call->getOperand(i: FuncOp).isReg() |
6381 | ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr |
6382 | : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd |
6383 | : ARM::tTAILJMPdND |
6384 | : ARM::TAILJMPd; |
6385 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Opc)) |
6386 | .add(MO: Call->getOperand(i: FuncOp)); |
6387 | if (isThumb && !Call->getOperand(i: FuncOp).isReg()) |
6388 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6389 | Call->eraseFromParent(); |
6390 | } |
6391 | |
6392 | // Is there a call in the outlined range? |
6393 | auto IsNonTailCall = [](MachineInstr &MI) { |
6394 | return MI.isCall() && !MI.isReturn(); |
6395 | }; |
6396 | if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) { |
6397 | MachineBasicBlock::iterator It = MBB.begin(); |
6398 | MachineBasicBlock::iterator Et = MBB.end(); |
6399 | |
6400 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
6401 | OF.FrameConstructionID == MachineOutlinerThunk) |
6402 | Et = std::prev(x: MBB.end()); |
6403 | |
6404 | // We have to save and restore LR, we need to add it to the liveins if it |
6405 | // is not already part of the set. This is suffient since outlined |
6406 | // functions only have one block. |
6407 | if (!MBB.isLiveIn(Reg: ARM::LR)) |
6408 | MBB.addLiveIn(PhysReg: ARM::LR); |
6409 | |
6410 | // Insert a save before the outlined region |
6411 | bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(SpillsLR: true); |
6412 | saveLROnStack(MBB, It, CFI: true, Auth); |
6413 | |
6414 | // Fix up the instructions in the range, since we're going to modify the |
6415 | // stack. |
6416 | assert(OF.FrameConstructionID != MachineOutlinerDefault && |
6417 | "Can only fix up stack references once" ); |
6418 | fixupPostOutline(MBB); |
6419 | |
6420 | // Insert a restore before the terminator for the function. Restore LR. |
6421 | restoreLRFromStack(MBB, It: Et, CFI: true, Auth); |
6422 | } |
6423 | |
6424 | // If this is a tail call outlined function, then there's already a return. |
6425 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
6426 | OF.FrameConstructionID == MachineOutlinerThunk) |
6427 | return; |
6428 | |
6429 | // Here we have to insert the return ourselves. Get the correct opcode from |
6430 | // current feature set. |
6431 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Subtarget.getReturnOpcode())) |
6432 | .add(MOs: predOps(Pred: ARMCC::AL)); |
6433 | |
6434 | // Did we have to modify the stack by saving the link register? |
6435 | if (OF.FrameConstructionID != MachineOutlinerDefault && |
6436 | OF.Candidates[0].CallConstructionID != MachineOutlinerDefault) |
6437 | return; |
6438 | |
6439 | // We modified the stack. |
6440 | // Walk over the basic block and fix up all the stack accesses. |
6441 | fixupPostOutline(MBB); |
6442 | } |
6443 | |
6444 | MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( |
6445 | Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, |
6446 | MachineFunction &MF, outliner::Candidate &C) const { |
6447 | MachineInstrBuilder MIB; |
6448 | MachineBasicBlock::iterator CallPt; |
6449 | unsigned Opc; |
6450 | bool isThumb = Subtarget.isThumb(); |
6451 | |
6452 | // Are we tail calling? |
6453 | if (C.CallConstructionID == MachineOutlinerTailCall) { |
6454 | // If yes, then we can just branch to the label. |
6455 | Opc = isThumb |
6456 | ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND |
6457 | : ARM::TAILJMPd; |
6458 | MIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc)) |
6459 | .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
6460 | if (isThumb) |
6461 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6462 | It = MBB.insert(I: It, MI: MIB); |
6463 | return It; |
6464 | } |
6465 | |
6466 | // Create the call instruction. |
6467 | Opc = isThumb ? ARM::tBL : ARM::BL; |
6468 | MachineInstrBuilder CallMIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc)); |
6469 | if (isThumb) |
6470 | CallMIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6471 | CallMIB.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
6472 | |
6473 | if (C.CallConstructionID == MachineOutlinerNoLRSave || |
6474 | C.CallConstructionID == MachineOutlinerThunk) { |
6475 | // No, so just insert the call. |
6476 | It = MBB.insert(I: It, MI: CallMIB); |
6477 | return It; |
6478 | } |
6479 | |
6480 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
6481 | // Can we save to a register? |
6482 | if (C.CallConstructionID == MachineOutlinerRegSave) { |
6483 | Register Reg = findRegisterToSaveLRTo(C); |
6484 | assert(Reg != 0 && "No callee-saved register available?" ); |
6485 | |
6486 | // Save and restore LR from that register. |
6487 | copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: Reg, SrcReg: ARM::LR, KillSrc: true); |
6488 | if (!AFI.isLRSpilled()) |
6489 | CFIInstBuilder(MBB, It, MachineInstr::FrameSetup) |
6490 | .buildRegister(Reg1: ARM::LR, Reg2: Reg); |
6491 | CallPt = MBB.insert(I: It, MI: CallMIB); |
6492 | copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: ARM::LR, SrcReg: Reg, KillSrc: true); |
6493 | if (!AFI.isLRSpilled()) |
6494 | CFIInstBuilder(MBB, It, MachineInstr::FrameDestroy).buildRestore(Reg: ARM::LR); |
6495 | It--; |
6496 | return CallPt; |
6497 | } |
6498 | // We have the default case. Save and restore from SP. |
6499 | if (!MBB.isLiveIn(Reg: ARM::LR)) |
6500 | MBB.addLiveIn(PhysReg: ARM::LR); |
6501 | bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(SpillsLR: true); |
6502 | saveLROnStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
6503 | CallPt = MBB.insert(I: It, MI: CallMIB); |
6504 | restoreLRFromStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
6505 | It--; |
6506 | return CallPt; |
6507 | } |
6508 | |
6509 | bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( |
6510 | MachineFunction &MF) const { |
6511 | return Subtarget.isMClass() && MF.getFunction().hasMinSize(); |
6512 | } |
6513 | |
6514 | bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( |
6515 | const MachineInstr &MI) const { |
6516 | // Try hard to rematerialize any VCTPs because if we spill P0, it will block |
6517 | // the tail predication conversion. This means that the element count |
6518 | // register has to be live for longer, but that has to be better than |
6519 | // spill/restore and VPT predication. |
6520 | return (isVCTP(MI: &MI) && !isPredicated(MI)) || |
6521 | TargetInstrInfo::isReallyTriviallyReMaterializable(MI); |
6522 | } |
6523 | |
6524 | unsigned llvm::getBLXOpcode(const MachineFunction &MF) { |
6525 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip |
6526 | : ARM::BLX; |
6527 | } |
6528 | |
6529 | unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) { |
6530 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip |
6531 | : ARM::tBLXr; |
6532 | } |
6533 | |
6534 | unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { |
6535 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip |
6536 | : ARM::BLX_pred; |
6537 | } |
6538 | |
6539 | namespace { |
6540 | class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { |
6541 | MachineInstr *EndLoop, *LoopCount; |
6542 | MachineFunction *MF; |
6543 | const TargetInstrInfo *TII; |
6544 | |
6545 | // Bitset[0 .. MAX_STAGES-1] ... iterations needed |
6546 | // [LAST_IS_USE] : last reference to register in schedule is a use |
6547 | // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live |
6548 | static int constexpr MAX_STAGES = 30; |
6549 | static int constexpr LAST_IS_USE = MAX_STAGES; |
6550 | static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1; |
6551 | typedef std::bitset<MAX_STAGES + 2> IterNeed; |
6552 | typedef std::map<unsigned, IterNeed> IterNeeds; |
6553 | |
6554 | void bumpCrossIterationPressure(RegPressureTracker &RPT, |
6555 | const IterNeeds &CIN); |
6556 | bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS); |
6557 | |
6558 | // Meanings of the various stuff with loop types: |
6559 | // t2Bcc: |
6560 | // EndLoop = branch at end of original BB that will become a kernel |
6561 | // LoopCount = CC setter live into branch |
6562 | // t2LoopEnd: |
6563 | // EndLoop = branch at end of original BB |
6564 | // LoopCount = t2LoopDec |
6565 | public: |
6566 | ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) |
6567 | : EndLoop(EndLoop), LoopCount(LoopCount), |
6568 | MF(EndLoop->getParent()->getParent()), |
6569 | TII(MF->getSubtarget().getInstrInfo()) {} |
6570 | |
6571 | bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { |
6572 | // Only ignore the terminator. |
6573 | return MI == EndLoop || MI == LoopCount; |
6574 | } |
6575 | |
6576 | bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override { |
6577 | if (tooMuchRegisterPressure(SSD, SMS)) |
6578 | return false; |
6579 | |
6580 | return true; |
6581 | } |
6582 | |
6583 | std::optional<bool> createTripCountGreaterCondition( |
6584 | int TC, MachineBasicBlock &MBB, |
6585 | SmallVectorImpl<MachineOperand> &Cond) override { |
6586 | |
6587 | if (isCondBranchOpcode(Opc: EndLoop->getOpcode())) { |
6588 | Cond.push_back(Elt: EndLoop->getOperand(i: 1)); |
6589 | Cond.push_back(Elt: EndLoop->getOperand(i: 2)); |
6590 | if (EndLoop->getOperand(i: 0).getMBB() == EndLoop->getParent()) { |
6591 | TII->reverseBranchCondition(Cond); |
6592 | } |
6593 | return {}; |
6594 | } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { |
6595 | // General case just lets the unrolled t2LoopDec do the subtraction and |
6596 | // therefore just needs to check if zero has been reached. |
6597 | MachineInstr *LoopDec = nullptr; |
6598 | for (auto &I : MBB.instrs()) |
6599 | if (I.getOpcode() == ARM::t2LoopDec) |
6600 | LoopDec = &I; |
6601 | assert(LoopDec && "Unable to find copied LoopDec" ); |
6602 | // Check if we're done with the loop. |
6603 | BuildMI(BB: &MBB, MIMD: LoopDec->getDebugLoc(), MCID: TII->get(Opcode: ARM::t2CMPri)) |
6604 | .addReg(RegNo: LoopDec->getOperand(i: 0).getReg()) |
6605 | .addImm(Val: 0) |
6606 | .addImm(Val: ARMCC::AL) |
6607 | .addReg(RegNo: ARM::NoRegister); |
6608 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: ARMCC::EQ)); |
6609 | Cond.push_back(Elt: MachineOperand::CreateReg(Reg: ARM::CPSR, isDef: false)); |
6610 | return {}; |
6611 | } else |
6612 | llvm_unreachable("Unknown EndLoop" ); |
6613 | } |
6614 | |
6615 | void (MachineBasicBlock *) override {} |
6616 | |
6617 | void adjustTripCount(int TripCountAdjust) override {} |
6618 | }; |
6619 | |
6620 | void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT, |
6621 | const IterNeeds &CIN) { |
6622 | // Increase pressure by the amounts in CrossIterationNeeds |
6623 | for (const auto &N : CIN) { |
6624 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
6625 | for (int I = 0; I < Cnt; ++I) |
6626 | RPT.increaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getNone(), |
6627 | NewMask: LaneBitmask::getAll()); |
6628 | } |
6629 | // Decrease pressure by the amounts in CrossIterationNeeds |
6630 | for (const auto &N : CIN) { |
6631 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
6632 | for (int I = 0; I < Cnt; ++I) |
6633 | RPT.decreaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getAll(), |
6634 | NewMask: LaneBitmask::getNone()); |
6635 | } |
6636 | } |
6637 | |
6638 | bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, |
6639 | SMSchedule &SMS) { |
6640 | IterNeeds CrossIterationNeeds; |
6641 | |
6642 | // Determine which values will be loop-carried after the schedule is |
6643 | // applied |
6644 | |
6645 | for (auto &SU : SSD.SUnits) { |
6646 | const MachineInstr *MI = SU.getInstr(); |
6647 | int Stg = SMS.stageScheduled(SU: const_cast<SUnit *>(&SU)); |
6648 | for (auto &S : SU.Succs) |
6649 | if (MI->isPHI() && S.getKind() == SDep::Anti) { |
6650 | Register Reg = S.getReg(); |
6651 | if (Reg.isVirtual()) |
6652 | CrossIterationNeeds[Reg.id()].set(position: 0); |
6653 | } else if (S.isAssignedRegDep()) { |
6654 | int OStg = SMS.stageScheduled(SU: S.getSUnit()); |
6655 | if (OStg >= 0 && OStg != Stg) { |
6656 | Register Reg = S.getReg(); |
6657 | if (Reg.isVirtual()) |
6658 | CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1); |
6659 | } |
6660 | } |
6661 | } |
6662 | |
6663 | // Determine more-or-less what the proposed schedule (reversed) is going to |
6664 | // be; it might not be quite the same because the within-cycle ordering |
6665 | // created by SMSchedule depends upon changes to help with address offsets and |
6666 | // the like. |
6667 | std::vector<SUnit *> ProposedSchedule; |
6668 | for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle) |
6669 | for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd; |
6670 | ++Stage) { |
6671 | std::deque<SUnit *> Instrs = |
6672 | SMS.getInstructions(cycle: Cycle + Stage * SMS.getInitiationInterval()); |
6673 | std::sort(first: Instrs.begin(), last: Instrs.end(), |
6674 | comp: [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; }); |
6675 | llvm::append_range(C&: ProposedSchedule, R&: Instrs); |
6676 | } |
6677 | |
6678 | // Learn whether the last use/def of each cross-iteration register is a use or |
6679 | // def. If it is a def, RegisterPressure will implicitly increase max pressure |
6680 | // and we do not have to add the pressure. |
6681 | for (auto *SU : ProposedSchedule) |
6682 | for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid(); |
6683 | ++OperI) { |
6684 | auto MO = *OperI; |
6685 | if (!MO.isReg() || !MO.getReg()) |
6686 | continue; |
6687 | Register Reg = MO.getReg(); |
6688 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6689 | if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] || |
6690 | CIter->second[SEEN_AS_LIVE]) |
6691 | continue; |
6692 | if (MO.isDef() && !MO.isDead()) |
6693 | CIter->second.set(position: SEEN_AS_LIVE); |
6694 | else if (MO.isUse()) |
6695 | CIter->second.set(position: LAST_IS_USE); |
6696 | } |
6697 | for (auto &CI : CrossIterationNeeds) |
6698 | CI.second.reset(position: LAST_IS_USE); |
6699 | |
6700 | RegionPressure RecRegPressure; |
6701 | RegPressureTracker RPTracker(RecRegPressure); |
6702 | RegisterClassInfo RegClassInfo; |
6703 | RegClassInfo.runOnMachineFunction(MF: *MF); |
6704 | RPTracker.init(mf: MF, rci: &RegClassInfo, lis: nullptr, mbb: EndLoop->getParent(), |
6705 | pos: EndLoop->getParent()->end(), TrackLaneMasks: false, TrackUntiedDefs: false); |
6706 | |
6707 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
6708 | |
6709 | for (auto *SU : ProposedSchedule) { |
6710 | MachineBasicBlock::const_iterator CurInstI = SU->getInstr(); |
6711 | RPTracker.setPos(std::next(x: CurInstI)); |
6712 | RPTracker.recede(); |
6713 | |
6714 | // Track what cross-iteration registers would be seen as live |
6715 | for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) { |
6716 | auto MO = *OperI; |
6717 | if (!MO.isReg() || !MO.getReg()) |
6718 | continue; |
6719 | Register Reg = MO.getReg(); |
6720 | if (MO.isDef() && !MO.isDead()) { |
6721 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6722 | if (CIter != CrossIterationNeeds.end()) { |
6723 | CIter->second.reset(position: 0); |
6724 | CIter->second.reset(position: SEEN_AS_LIVE); |
6725 | } |
6726 | } |
6727 | } |
6728 | for (auto &S : SU->Preds) { |
6729 | auto Stg = SMS.stageScheduled(SU); |
6730 | if (S.isAssignedRegDep()) { |
6731 | Register Reg = S.getReg(); |
6732 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6733 | if (CIter != CrossIterationNeeds.end()) { |
6734 | auto Stg2 = SMS.stageScheduled(SU: const_cast<SUnit *>(S.getSUnit())); |
6735 | assert(Stg2 <= Stg && "Data dependence upon earlier stage" ); |
6736 | if (Stg - Stg2 < MAX_STAGES) |
6737 | CIter->second.set(position: Stg - Stg2); |
6738 | CIter->second.set(position: SEEN_AS_LIVE); |
6739 | } |
6740 | } |
6741 | } |
6742 | |
6743 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
6744 | } |
6745 | |
6746 | auto &P = RPTracker.getPressure().MaxSetPressure; |
6747 | for (unsigned I = 0, E = P.size(); I < E; ++I) { |
6748 | // Exclude some Neon register classes. |
6749 | if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 || |
6750 | I == ARM::DTriple_with_qsub_0_in_QPR) |
6751 | continue; |
6752 | |
6753 | if (P[I] > RegClassInfo.getRegPressureSetLimit(Idx: I)) { |
6754 | return true; |
6755 | } |
6756 | } |
6757 | return false; |
6758 | } |
6759 | |
6760 | } // namespace |
6761 | |
6762 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
6763 | ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { |
6764 | MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); |
6765 | MachineBasicBlock * = *LoopBB->pred_begin(); |
6766 | if (Preheader == LoopBB) |
6767 | Preheader = *std::next(x: LoopBB->pred_begin()); |
6768 | |
6769 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { |
6770 | // If the branch is a Bcc, then the CPSR should be set somewhere within the |
6771 | // block. We need to determine the reaching definition of CPSR so that |
6772 | // it can be marked as non-pipelineable, allowing the pipeliner to force |
6773 | // it into stage 0 or give up if it cannot or will not do so. |
6774 | MachineInstr *CCSetter = nullptr; |
6775 | for (auto &L : LoopBB->instrs()) { |
6776 | if (L.isCall()) |
6777 | return nullptr; |
6778 | if (isCPSRDefined(MI: L)) |
6779 | CCSetter = &L; |
6780 | } |
6781 | if (CCSetter) |
6782 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: CCSetter); |
6783 | else |
6784 | return nullptr; // Unable to find the CC setter, so unable to guarantee |
6785 | // that pipeline will work |
6786 | } |
6787 | |
6788 | // Recognize: |
6789 | // preheader: |
6790 | // %1 = t2DoopLoopStart %0 |
6791 | // loop: |
6792 | // %2 = phi %1, <not loop>, %..., %loop |
6793 | // %3 = t2LoopDec %2, <imm> |
6794 | // t2LoopEnd %3, %loop |
6795 | |
6796 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { |
6797 | for (auto &L : LoopBB->instrs()) |
6798 | if (L.isCall()) |
6799 | return nullptr; |
6800 | else if (isVCTP(MI: &L)) |
6801 | return nullptr; |
6802 | Register LoopDecResult = I->getOperand(i: 0).getReg(); |
6803 | MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); |
6804 | MachineInstr *LoopDec = MRI.getUniqueVRegDef(Reg: LoopDecResult); |
6805 | if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) |
6806 | return nullptr; |
6807 | MachineInstr *LoopStart = nullptr; |
6808 | for (auto &J : Preheader->instrs()) |
6809 | if (J.getOpcode() == ARM::t2DoLoopStart) |
6810 | LoopStart = &J; |
6811 | if (!LoopStart) |
6812 | return nullptr; |
6813 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: LoopDec); |
6814 | } |
6815 | return nullptr; |
6816 | } |
6817 | |