1//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the Base ARM implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARMBaseInstrInfo.h"
14#include "ARMBaseRegisterInfo.h"
15#include "ARMConstantPoolValue.h"
16#include "ARMFeatures.h"
17#include "ARMHazardRecognizer.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMSubtarget.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "MCTargetDesc/ARMBaseInfo.h"
22#include "MVETailPredUtils.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/CodeGen/CFIInstBuilder.h"
28#include "llvm/CodeGen/DFAPacketizer.h"
29#include "llvm/CodeGen/LiveVariables.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstr.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineModuleInfo.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachinePipeliner.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/MachineScheduler.h"
42#include "llvm/CodeGen/MultiHazardRecognizer.h"
43#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetInstrInfo.h"
46#include "llvm/CodeGen/TargetRegisterInfo.h"
47#include "llvm/CodeGen/TargetSchedule.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/Function.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/Module.h"
53#include "llvm/MC/MCAsmInfo.h"
54#include "llvm/MC/MCInstrDesc.h"
55#include "llvm/MC/MCInstrItineraries.h"
56#include "llvm/Support/BranchProbability.h"
57#include "llvm/Support/Casting.h"
58#include "llvm/Support/Compiler.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62#include "llvm/Target/TargetMachine.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <iterator>
67#include <new>
68#include <utility>
69#include <vector>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "arm-instrinfo"
74
75#define GET_INSTRINFO_CTOR_DTOR
76#include "ARMGenInstrInfo.inc"
77
78/// ARM_MLxEntry - Record information about MLA / MLS instructions.
79struct ARM_MLxEntry {
80 uint16_t MLxOpc; // MLA / MLS opcode
81 uint16_t MulOpc; // Expanded multiplication opcode
82 uint16_t AddSubOpc; // Expanded add / sub opcode
83 bool NegAcc; // True if the acc is negated before the add / sub.
84 bool HasLane; // True if instruction has an extra "lane" operand.
85};
86
87static const ARM_MLxEntry ARM_MLxTable[] = {
88 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
89 // fp scalar ops
90 { .MLxOpc: ARM::VMLAS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VADDS, .NegAcc: false, .HasLane: false },
91 { .MLxOpc: ARM::VMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: false, .HasLane: false },
92 { .MLxOpc: ARM::VMLAD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VADDD, .NegAcc: false, .HasLane: false },
93 { .MLxOpc: ARM::VMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: false, .HasLane: false },
94 { .MLxOpc: ARM::VNMLAS, .MulOpc: ARM::VNMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false },
95 { .MLxOpc: ARM::VNMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false },
96 { .MLxOpc: ARM::VNMLAD, .MulOpc: ARM::VNMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false },
97 { .MLxOpc: ARM::VNMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false },
98
99 // fp SIMD ops
100 { .MLxOpc: ARM::VMLAfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: false },
101 { .MLxOpc: ARM::VMLSfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: false },
102 { .MLxOpc: ARM::VMLAfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: false },
103 { .MLxOpc: ARM::VMLSfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: false },
104 { .MLxOpc: ARM::VMLAslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: true },
105 { .MLxOpc: ARM::VMLSslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: true },
106 { .MLxOpc: ARM::VMLAslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: true },
107 { .MLxOpc: ARM::VMLSslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: true },
108};
109
110ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
111 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
112 Subtarget(STI) {
113 for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
114 if (!MLxEntryMap.insert(KV: std::make_pair(x: ARM_MLxTable[i].MLxOpc, y&: i)).second)
115 llvm_unreachable("Duplicated entries?");
116 MLxHazardOpcodes.insert(V: ARM_MLxTable[i].AddSubOpc);
117 MLxHazardOpcodes.insert(V: ARM_MLxTable[i].MulOpc);
118 }
119}
120
121// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
122// currently defaults to no prepass hazard recognizer.
123ScheduleHazardRecognizer *
124ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
125 const ScheduleDAG *DAG) const {
126 if (usePreRAHazardRecognizer()) {
127 const InstrItineraryData *II =
128 static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
129 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
130 }
131 return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
132}
133
134// Called during:
135// - pre-RA scheduling
136// - post-RA scheduling when FeatureUseMISched is set
137ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
138 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
139 MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
140
141 // We would like to restrict this hazard recognizer to only
142 // post-RA scheduling; we can tell that we're post-RA because we don't
143 // track VRegLiveness.
144 // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
145 // banks banked on bit 2. Assume that TCMs are in use.
146 if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
147 MHR->AddHazardRecognizer(
148 std::make_unique<ARMBankConflictHazardRecognizer>(args&: DAG, args: 0x4, args: true));
149
150 // Not inserting ARMHazardRecognizerFPMLx because that would change
151 // legacy behavior
152
153 auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
154 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
155 return MHR;
156}
157
158// Called during post-RA scheduling when FeatureUseMISched is not set
159ScheduleHazardRecognizer *ARMBaseInstrInfo::
160CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
161 const ScheduleDAG *DAG) const {
162 MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
163
164 if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
165 MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
166
167 auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
168 if (BHR)
169 MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
170 return MHR;
171}
172
173// Branch analysis.
174// Cond vector output format:
175// 0 elements indicates an unconditional branch
176// 2 elements indicates a conditional branch; the elements are
177// the condition to check and the CPSR.
178// 3 elements indicates a hardware loop end; the elements
179// are the opcode, the operand value to test, and a dummy
180// operand used to pad out to 3 operands.
181bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
182 MachineBasicBlock *&TBB,
183 MachineBasicBlock *&FBB,
184 SmallVectorImpl<MachineOperand> &Cond,
185 bool AllowModify) const {
186 TBB = nullptr;
187 FBB = nullptr;
188
189 MachineBasicBlock::instr_iterator I = MBB.instr_end();
190 if (I == MBB.instr_begin())
191 return false; // Empty blocks are easy.
192 --I;
193
194 // Walk backwards from the end of the basic block until the branch is
195 // analyzed or we give up.
196 while (isPredicated(MI: *I) || I->isTerminator() || I->isDebugValue()) {
197 // Flag to be raised on unanalyzeable instructions. This is useful in cases
198 // where we want to clean up on the end of the basic block before we bail
199 // out.
200 bool CantAnalyze = false;
201
202 // Skip over DEBUG values, predicated nonterminators and speculation
203 // barrier terminators.
204 while (I->isDebugInstr() || !I->isTerminator() ||
205 isSpeculationBarrierEndBBOpcode(Opc: I->getOpcode()) ||
206 I->getOpcode() == ARM::t2DoLoopStartTP){
207 if (I == MBB.instr_begin())
208 return false;
209 --I;
210 }
211
212 if (isIndirectBranchOpcode(Opc: I->getOpcode()) ||
213 isJumpTableBranchOpcode(Opc: I->getOpcode())) {
214 // Indirect branches and jump tables can't be analyzed, but we still want
215 // to clean up any instructions at the tail of the basic block.
216 CantAnalyze = true;
217 } else if (isUncondBranchOpcode(Opc: I->getOpcode())) {
218 TBB = I->getOperand(i: 0).getMBB();
219 } else if (isCondBranchOpcode(Opc: I->getOpcode())) {
220 // Bail out if we encounter multiple conditional branches.
221 if (!Cond.empty())
222 return true;
223
224 assert(!FBB && "FBB should have been null.");
225 FBB = TBB;
226 TBB = I->getOperand(i: 0).getMBB();
227 Cond.push_back(Elt: I->getOperand(i: 1));
228 Cond.push_back(Elt: I->getOperand(i: 2));
229 } else if (I->isReturn()) {
230 // Returns can't be analyzed, but we should run cleanup.
231 CantAnalyze = true;
232 } else if (I->getOpcode() == ARM::t2LoopEnd &&
233 MBB.getParent()
234 ->getSubtarget<ARMSubtarget>()
235 .enableMachinePipeliner()) {
236 if (!Cond.empty())
237 return true;
238 FBB = TBB;
239 TBB = I->getOperand(i: 1).getMBB();
240 Cond.push_back(Elt: MachineOperand::CreateImm(Val: I->getOpcode()));
241 Cond.push_back(Elt: I->getOperand(i: 0));
242 Cond.push_back(Elt: MachineOperand::CreateImm(Val: 0));
243 } else {
244 // We encountered other unrecognized terminator. Bail out immediately.
245 return true;
246 }
247
248 // Cleanup code - to be run for unpredicated unconditional branches and
249 // returns.
250 if (!isPredicated(MI: *I) &&
251 (isUncondBranchOpcode(Opc: I->getOpcode()) ||
252 isIndirectBranchOpcode(Opc: I->getOpcode()) ||
253 isJumpTableBranchOpcode(Opc: I->getOpcode()) ||
254 I->isReturn())) {
255 // Forget any previous condition branch information - it no longer applies.
256 Cond.clear();
257 FBB = nullptr;
258
259 // If we can modify the function, delete everything below this
260 // unconditional branch.
261 if (AllowModify) {
262 MachineBasicBlock::iterator DI = std::next(x: I);
263 while (DI != MBB.instr_end()) {
264 MachineInstr &InstToDelete = *DI;
265 ++DI;
266 // Speculation barriers must not be deleted.
267 if (isSpeculationBarrierEndBBOpcode(Opc: InstToDelete.getOpcode()))
268 continue;
269 InstToDelete.eraseFromParent();
270 }
271 }
272 }
273
274 if (CantAnalyze) {
275 // We may not be able to analyze the block, but we could still have
276 // an unconditional branch as the last instruction in the block, which
277 // just branches to layout successor. If this is the case, then just
278 // remove it if we're allowed to make modifications.
279 if (AllowModify && !isPredicated(MI: MBB.back()) &&
280 isUncondBranchOpcode(Opc: MBB.back().getOpcode()) &&
281 TBB && MBB.isLayoutSuccessor(MBB: TBB))
282 removeBranch(MBB);
283 return true;
284 }
285
286 if (I == MBB.instr_begin())
287 return false;
288
289 --I;
290 }
291
292 // We made it past the terminators without bailing out - we must have
293 // analyzed this branch successfully.
294 return false;
295}
296
297unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
298 int *BytesRemoved) const {
299 assert(!BytesRemoved && "code size not handled");
300
301 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
302 if (I == MBB.end())
303 return 0;
304
305 if (!isUncondBranchOpcode(Opc: I->getOpcode()) &&
306 !isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
307 return 0;
308
309 // Remove the branch.
310 I->eraseFromParent();
311
312 I = MBB.end();
313
314 if (I == MBB.begin()) return 1;
315 --I;
316 if (!isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
317 return 1;
318
319 // Remove the branch.
320 I->eraseFromParent();
321 return 2;
322}
323
324unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
325 MachineBasicBlock *TBB,
326 MachineBasicBlock *FBB,
327 ArrayRef<MachineOperand> Cond,
328 const DebugLoc &DL,
329 int *BytesAdded) const {
330 assert(!BytesAdded && "code size not handled");
331 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
332 int BOpc = !AFI->isThumbFunction()
333 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
334 int BccOpc = !AFI->isThumbFunction()
335 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
336 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
337
338 // Shouldn't be a fall through.
339 assert(TBB && "insertBranch must not be told to insert a fallthrough");
340 assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
341 "ARM branch conditions have two or three components!");
342
343 // For conditional branches, we use addOperand to preserve CPSR flags.
344
345 if (!FBB) {
346 if (Cond.empty()) { // Unconditional branch?
347 if (isThumb)
348 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB).add(MOs: predOps(Pred: ARMCC::AL));
349 else
350 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB);
351 } else if (Cond.size() == 2) {
352 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc))
353 .addMBB(MBB: TBB)
354 .addImm(Val: Cond[0].getImm())
355 .add(MO: Cond[1]);
356 } else
357 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB);
358 return 1;
359 }
360
361 // Two-way conditional branch.
362 if (Cond.size() == 2)
363 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc))
364 .addMBB(MBB: TBB)
365 .addImm(Val: Cond[0].getImm())
366 .add(MO: Cond[1]);
367 else if (Cond.size() == 3)
368 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB);
369 if (isThumb)
370 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB).add(MOs: predOps(Pred: ARMCC::AL));
371 else
372 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB);
373 return 2;
374}
375
376bool ARMBaseInstrInfo::
377reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
378 if (Cond.size() == 2) {
379 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
380 Cond[0].setImm(ARMCC::getOppositeCondition(CC));
381 return false;
382 }
383 return true;
384}
385
386bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
387 if (MI.isBundle()) {
388 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
389 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
390 while (++I != E && I->isInsideBundle()) {
391 int PIdx = I->findFirstPredOperandIdx();
392 if (PIdx != -1 && I->getOperand(i: PIdx).getImm() != ARMCC::AL)
393 return true;
394 }
395 return false;
396 }
397
398 int PIdx = MI.findFirstPredOperandIdx();
399 return PIdx != -1 && MI.getOperand(i: PIdx).getImm() != ARMCC::AL;
400}
401
402std::string ARMBaseInstrInfo::createMIROperandComment(
403 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
404 const TargetRegisterInfo *TRI) const {
405
406 // First, let's see if there is a generic comment for this operand
407 std::string GenericComment =
408 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
409 if (!GenericComment.empty())
410 return GenericComment;
411
412 // If not, check if we have an immediate operand.
413 if (!Op.isImm())
414 return std::string();
415
416 // And print its corresponding condition code if the immediate is a
417 // predicate.
418 int FirstPredOp = MI.findFirstPredOperandIdx();
419 if (FirstPredOp != (int) OpIdx)
420 return std::string();
421
422 std::string CC = "CC::";
423 CC += ARMCondCodeToString(CC: (ARMCC::CondCodes)Op.getImm());
424 return CC;
425}
426
427bool ARMBaseInstrInfo::PredicateInstruction(
428 MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
429 unsigned Opc = MI.getOpcode();
430 if (isUncondBranchOpcode(Opc)) {
431 MI.setDesc(get(Opcode: getMatchingCondBranchOpcode(Opc)));
432 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
433 .addImm(Val: Pred[0].getImm())
434 .addReg(RegNo: Pred[1].getReg());
435 return true;
436 }
437
438 int PIdx = MI.findFirstPredOperandIdx();
439 if (PIdx != -1) {
440 MachineOperand &PMO = MI.getOperand(i: PIdx);
441 PMO.setImm(Pred[0].getImm());
442 MI.getOperand(i: PIdx+1).setReg(Pred[1].getReg());
443
444 // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
445 // IT block. This affects how they are printed.
446 const MCInstrDesc &MCID = MI.getDesc();
447 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
448 assert(MCID.operands()[1].isOptionalDef() &&
449 "CPSR def isn't expected operand");
450 assert((MI.getOperand(1).isDead() ||
451 MI.getOperand(1).getReg() != ARM::CPSR) &&
452 "if conversion tried to stop defining used CPSR");
453 MI.getOperand(i: 1).setReg(ARM::NoRegister);
454 }
455
456 return true;
457 }
458 return false;
459}
460
461bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
462 ArrayRef<MachineOperand> Pred2) const {
463 if (Pred1.size() > 2 || Pred2.size() > 2)
464 return false;
465
466 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
467 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
468 if (CC1 == CC2)
469 return true;
470
471 switch (CC1) {
472 default:
473 return false;
474 case ARMCC::AL:
475 return true;
476 case ARMCC::HS:
477 return CC2 == ARMCC::HI;
478 case ARMCC::LS:
479 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
480 case ARMCC::GE:
481 return CC2 == ARMCC::GT;
482 case ARMCC::LE:
483 return CC2 == ARMCC::LT;
484 }
485}
486
487bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
488 std::vector<MachineOperand> &Pred,
489 bool SkipDead) const {
490 bool Found = false;
491 for (const MachineOperand &MO : MI.operands()) {
492 bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR);
493 bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
494 if (ClobbersCPSR || IsCPSR) {
495
496 // Filter out T1 instructions that have a dead CPSR,
497 // allowing IT blocks to be generated containing T1 instructions
498 const MCInstrDesc &MCID = MI.getDesc();
499 if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
500 SkipDead)
501 continue;
502
503 Pred.push_back(x: MO);
504 Found = true;
505 }
506 }
507
508 return Found;
509}
510
511bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
512 for (const auto &MO : MI.operands())
513 if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
514 return true;
515 return false;
516}
517
518static bool isEligibleForITBlock(const MachineInstr *MI) {
519 switch (MI->getOpcode()) {
520 default: return true;
521 case ARM::tADC: // ADC (register) T1
522 case ARM::tADDi3: // ADD (immediate) T1
523 case ARM::tADDi8: // ADD (immediate) T2
524 case ARM::tADDrr: // ADD (register) T1
525 case ARM::tAND: // AND (register) T1
526 case ARM::tASRri: // ASR (immediate) T1
527 case ARM::tASRrr: // ASR (register) T1
528 case ARM::tBIC: // BIC (register) T1
529 case ARM::tEOR: // EOR (register) T1
530 case ARM::tLSLri: // LSL (immediate) T1
531 case ARM::tLSLrr: // LSL (register) T1
532 case ARM::tLSRri: // LSR (immediate) T1
533 case ARM::tLSRrr: // LSR (register) T1
534 case ARM::tMUL: // MUL T1
535 case ARM::tMVN: // MVN (register) T1
536 case ARM::tORR: // ORR (register) T1
537 case ARM::tROR: // ROR (register) T1
538 case ARM::tRSB: // RSB (immediate) T1
539 case ARM::tSBC: // SBC (register) T1
540 case ARM::tSUBi3: // SUB (immediate) T1
541 case ARM::tSUBi8: // SUB (immediate) T2
542 case ARM::tSUBrr: // SUB (register) T1
543 return !ARMBaseInstrInfo::isCPSRDefined(MI: *MI);
544 }
545}
546
547/// isPredicable - Return true if the specified instruction can be predicated.
548/// By default, this returns true for every instruction with a
549/// PredicateOperand.
550bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
551 if (!MI.isPredicable())
552 return false;
553
554 if (MI.isBundle())
555 return false;
556
557 if (!isEligibleForITBlock(MI: &MI))
558 return false;
559
560 const MachineFunction *MF = MI.getParent()->getParent();
561 const ARMFunctionInfo *AFI =
562 MF->getInfo<ARMFunctionInfo>();
563
564 // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
565 // In their ARM encoding, they can't be encoded in a conditional form.
566 if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
567 return false;
568
569 // Make indirect control flow changes unpredicable when SLS mitigation is
570 // enabled.
571 const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
572 if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
573 return false;
574 if (ST.hardenSlsBlr() && isIndirectCall(MI))
575 return false;
576
577 if (AFI->isThumb2Function()) {
578 if (getSubtarget().restrictIT())
579 return isV8EligibleForIT(Instr: &MI);
580 }
581
582 return true;
583}
584
585namespace llvm {
586
587template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
588 for (const MachineOperand &MO : MI->operands()) {
589 if (!MO.isReg() || MO.isUndef() || MO.isUse())
590 continue;
591 if (MO.getReg() != ARM::CPSR)
592 continue;
593 if (!MO.isDead())
594 return false;
595 }
596 // all definitions of CPSR are dead
597 return true;
598}
599
600} // end namespace llvm
601
602/// GetInstSize - Return the size of the specified MachineInstr.
603///
604unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
605 const MachineBasicBlock &MBB = *MI.getParent();
606 const MachineFunction *MF = MBB.getParent();
607 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
608
609 const MCInstrDesc &MCID = MI.getDesc();
610
611 switch (MI.getOpcode()) {
612 default:
613 // Return the size specified in .td file. If there's none, return 0, as we
614 // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
615 // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
616 // contrast to AArch64 instructions which have a default size of 4 bytes for
617 // example.
618 return MCID.getSize();
619 case TargetOpcode::BUNDLE:
620 return getInstBundleLength(MI);
621 case ARM::CONSTPOOL_ENTRY:
622 case ARM::JUMPTABLE_INSTS:
623 case ARM::JUMPTABLE_ADDRS:
624 case ARM::JUMPTABLE_TBB:
625 case ARM::JUMPTABLE_TBH:
626 // If this machine instr is a constant pool entry, its size is recorded as
627 // operand #2.
628 return MI.getOperand(i: 2).getImm();
629 case ARM::SPACE:
630 return MI.getOperand(i: 1).getImm();
631 case ARM::INLINEASM:
632 case ARM::INLINEASM_BR: {
633 // If this machine instr is an inline asm, measure it.
634 unsigned Size = getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(), MAI: *MAI);
635 if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
636 Size = alignTo(Value: Size, Align: 4);
637 return Size;
638 }
639 }
640}
641
642unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
643 unsigned Size = 0;
644 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
645 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
646 while (++I != E && I->isInsideBundle()) {
647 assert(!I->isBundle() && "No nested bundle!");
648 Size += getInstSizeInBytes(MI: *I);
649 }
650 return Size;
651}
652
653void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
654 MachineBasicBlock::iterator I,
655 MCRegister DestReg, bool KillSrc,
656 const ARMSubtarget &Subtarget) const {
657 unsigned Opc = Subtarget.isThumb()
658 ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
659 : ARM::MRS;
660
661 MachineInstrBuilder MIB =
662 BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg);
663
664 // There is only 1 A/R class MRS instruction, and it always refers to
665 // APSR. However, there are lots of other possibilities on M-class cores.
666 if (Subtarget.isMClass())
667 MIB.addImm(Val: 0x800);
668
669 MIB.add(MOs: predOps(Pred: ARMCC::AL))
670 .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | getKillRegState(B: KillSrc));
671}
672
673void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
674 MachineBasicBlock::iterator I,
675 MCRegister SrcReg, bool KillSrc,
676 const ARMSubtarget &Subtarget) const {
677 unsigned Opc = Subtarget.isThumb()
678 ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
679 : ARM::MSR;
680
681 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc));
682
683 if (Subtarget.isMClass())
684 MIB.addImm(Val: 0x800);
685 else
686 MIB.addImm(Val: 8);
687
688 MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
689 .add(MOs: predOps(Pred: ARMCC::AL))
690 .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | RegState::Define);
691}
692
693void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
694 MIB.addImm(Val: ARMVCC::None);
695 MIB.addReg(RegNo: 0);
696 MIB.addReg(RegNo: 0); // tp_reg
697}
698
699void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
700 Register DestReg) {
701 addUnpredicatedMveVpredNOp(MIB);
702 MIB.addReg(RegNo: DestReg, flags: RegState::Undef);
703}
704
705void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
706 MIB.addImm(Val: Cond);
707 MIB.addReg(RegNo: ARM::VPR, flags: RegState::Implicit);
708 MIB.addReg(RegNo: 0); // tp_reg
709}
710
711void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
712 unsigned Cond, unsigned Inactive) {
713 addPredicatedMveVpredNOp(MIB, Cond);
714 MIB.addReg(RegNo: Inactive);
715}
716
717void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
718 MachineBasicBlock::iterator I,
719 const DebugLoc &DL, Register DestReg,
720 Register SrcReg, bool KillSrc,
721 bool RenamableDest,
722 bool RenamableSrc) const {
723 bool GPRDest = ARM::GPRRegClass.contains(Reg: DestReg);
724 bool GPRSrc = ARM::GPRRegClass.contains(Reg: SrcReg);
725
726 if (GPRDest && GPRSrc) {
727 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MOVr), DestReg)
728 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
729 .add(MOs: predOps(Pred: ARMCC::AL))
730 .add(MO: condCodeOp());
731 return;
732 }
733
734 bool SPRDest = ARM::SPRRegClass.contains(Reg: DestReg);
735 bool SPRSrc = ARM::SPRRegClass.contains(Reg: SrcReg);
736
737 unsigned Opc = 0;
738 if (SPRDest && SPRSrc)
739 Opc = ARM::VMOVS;
740 else if (GPRDest && SPRSrc)
741 Opc = ARM::VMOVRS;
742 else if (SPRDest && GPRSrc)
743 Opc = ARM::VMOVSR;
744 else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && Subtarget.hasFP64())
745 Opc = ARM::VMOVD;
746 else if (ARM::QPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
747 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
748
749 if (Opc) {
750 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Opc), DestReg);
751 MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
752 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
753 MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
754 if (Opc == ARM::MVE_VORR)
755 addUnpredicatedMveVpredROp(MIB, DestReg);
756 else if (Opc != ARM::MQPRCopy)
757 MIB.add(MOs: predOps(Pred: ARMCC::AL));
758 return;
759 }
760
761 // Handle register classes that require multiple instructions.
762 unsigned BeginIdx = 0;
763 unsigned SubRegs = 0;
764 int Spacing = 1;
765
766 // Use VORRq when possible.
767 if (ARM::QQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
768 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
769 BeginIdx = ARM::qsub_0;
770 SubRegs = 2;
771 } else if (ARM::QQQQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
772 Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
773 BeginIdx = ARM::qsub_0;
774 SubRegs = 4;
775 // Fall back to VMOVD.
776 } else if (ARM::DPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
777 Opc = ARM::VMOVD;
778 BeginIdx = ARM::dsub_0;
779 SubRegs = 2;
780 } else if (ARM::DTripleRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
781 Opc = ARM::VMOVD;
782 BeginIdx = ARM::dsub_0;
783 SubRegs = 3;
784 } else if (ARM::DQuadRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
785 Opc = ARM::VMOVD;
786 BeginIdx = ARM::dsub_0;
787 SubRegs = 4;
788 } else if (ARM::GPRPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
789 Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
790 BeginIdx = ARM::gsub_0;
791 SubRegs = 2;
792 } else if (ARM::DPairSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
793 Opc = ARM::VMOVD;
794 BeginIdx = ARM::dsub_0;
795 SubRegs = 2;
796 Spacing = 2;
797 } else if (ARM::DTripleSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
798 Opc = ARM::VMOVD;
799 BeginIdx = ARM::dsub_0;
800 SubRegs = 3;
801 Spacing = 2;
802 } else if (ARM::DQuadSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
803 Opc = ARM::VMOVD;
804 BeginIdx = ARM::dsub_0;
805 SubRegs = 4;
806 Spacing = 2;
807 } else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) &&
808 !Subtarget.hasFP64()) {
809 Opc = ARM::VMOVS;
810 BeginIdx = ARM::ssub_0;
811 SubRegs = 2;
812 } else if (SrcReg == ARM::CPSR) {
813 copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
814 return;
815 } else if (DestReg == ARM::CPSR) {
816 copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
817 return;
818 } else if (DestReg == ARM::VPR) {
819 assert(ARM::GPRRegClass.contains(SrcReg));
820 BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_P0), DestReg)
821 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
822 .add(MOs: predOps(Pred: ARMCC::AL));
823 return;
824 } else if (SrcReg == ARM::VPR) {
825 assert(ARM::GPRRegClass.contains(DestReg));
826 BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_P0), DestReg)
827 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
828 .add(MOs: predOps(Pred: ARMCC::AL));
829 return;
830 } else if (DestReg == ARM::FPSCR_NZCV) {
831 assert(ARM::GPRRegClass.contains(SrcReg));
832 BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_FPSCR_NZCVQC), DestReg)
833 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
834 .add(MOs: predOps(Pred: ARMCC::AL));
835 return;
836 } else if (SrcReg == ARM::FPSCR_NZCV) {
837 assert(ARM::GPRRegClass.contains(DestReg));
838 BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_FPSCR_NZCVQC), DestReg)
839 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
840 .add(MOs: predOps(Pred: ARMCC::AL));
841 return;
842 }
843
844 assert(Opc && "Impossible reg-to-reg copy");
845
846 const TargetRegisterInfo *TRI = &getRegisterInfo();
847 MachineInstrBuilder Mov;
848
849 // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
850 if (TRI->regsOverlap(RegA: SrcReg, RegB: TRI->getSubReg(Reg: DestReg, Idx: BeginIdx))) {
851 BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
852 Spacing = -Spacing;
853 }
854#ifndef NDEBUG
855 SmallSet<unsigned, 4> DstRegs;
856#endif
857 for (unsigned i = 0; i != SubRegs; ++i) {
858 Register Dst = TRI->getSubReg(Reg: DestReg, Idx: BeginIdx + i * Spacing);
859 Register Src = TRI->getSubReg(Reg: SrcReg, Idx: BeginIdx + i * Spacing);
860 assert(Dst && Src && "Bad sub-register");
861#ifndef NDEBUG
862 assert(!DstRegs.count(Src) && "destructive vector copy");
863 DstRegs.insert(Dst);
864#endif
865 Mov = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg: Dst).addReg(RegNo: Src);
866 // VORR (NEON or MVE) takes two source operands.
867 if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
868 Mov.addReg(RegNo: Src);
869 }
870 // MVE VORR takes predicate operands in place of an ordinary condition.
871 if (Opc == ARM::MVE_VORR)
872 addUnpredicatedMveVpredROp(MIB&: Mov, DestReg: Dst);
873 else
874 Mov = Mov.add(MOs: predOps(Pred: ARMCC::AL));
875 // MOVr can set CC.
876 if (Opc == ARM::MOVr)
877 Mov = Mov.add(MO: condCodeOp());
878 }
879 // Add implicit super-register defs and kills to the last instruction.
880 Mov->addRegisterDefined(Reg: DestReg, RegInfo: TRI);
881 if (KillSrc)
882 Mov->addRegisterKilled(IncomingReg: SrcReg, RegInfo: TRI);
883}
884
885std::optional<DestSourcePair>
886ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
887 // VMOVRRD is also a copy instruction but it requires
888 // special way of handling. It is more complex copy version
889 // and since that we are not considering it. For recognition
890 // of such instruction isExtractSubregLike MI interface fuction
891 // could be used.
892 // VORRq is considered as a move only if two inputs are
893 // the same register.
894 if (!MI.isMoveReg() ||
895 (MI.getOpcode() == ARM::VORRq &&
896 MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg()))
897 return std::nullopt;
898 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
899}
900
901std::optional<ParamLoadedValue>
902ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI,
903 Register Reg) const {
904 if (auto DstSrcPair = isCopyInstrImpl(MI)) {
905 Register DstReg = DstSrcPair->Destination->getReg();
906
907 // TODO: We don't handle cases where the forwarding reg is narrower/wider
908 // than the copy registers. Consider for example:
909 //
910 // s16 = VMOVS s0
911 // s17 = VMOVS s1
912 // call @callee(d0)
913 //
914 // We'd like to describe the call site value of d0 as d8, but this requires
915 // gathering and merging the descriptions for the two VMOVS instructions.
916 //
917 // We also don't handle the reverse situation, where the forwarding reg is
918 // narrower than the copy destination:
919 //
920 // d8 = VMOVD d0
921 // call @callee(s1)
922 //
923 // We need to produce a fragment description (the call site value of s1 is
924 // /not/ just d8).
925 if (DstReg != Reg)
926 return std::nullopt;
927 }
928 return TargetInstrInfo::describeLoadedValue(MI, Reg);
929}
930
931const MachineInstrBuilder &
932ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
933 unsigned SubIdx, unsigned State,
934 const TargetRegisterInfo *TRI) const {
935 if (!SubIdx)
936 return MIB.addReg(RegNo: Reg, flags: State);
937
938 if (Register::isPhysicalRegister(Reg))
939 return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State);
940 return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx);
941}
942
943void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
944 MachineBasicBlock::iterator I,
945 Register SrcReg, bool isKill, int FI,
946 const TargetRegisterClass *RC,
947 const TargetRegisterInfo *TRI,
948 Register VReg,
949 MachineInstr::MIFlag Flags) const {
950 MachineFunction &MF = *MBB.getParent();
951 MachineFrameInfo &MFI = MF.getFrameInfo();
952 Align Alignment = MFI.getObjectAlign(ObjectIdx: FI);
953
954 MachineMemOperand *MMO = MF.getMachineMemOperand(
955 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore,
956 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment);
957
958 switch (TRI->getSpillSize(RC: *RC)) {
959 case 2:
960 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
961 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRH))
962 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
963 .addFrameIndex(Idx: FI)
964 .addImm(Val: 0)
965 .addMemOperand(MMO)
966 .add(MOs: predOps(Pred: ARMCC::AL));
967 } else
968 llvm_unreachable("Unknown reg class!");
969 break;
970 case 4:
971 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
972 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRi12))
973 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
974 .addFrameIndex(Idx: FI)
975 .addImm(Val: 0)
976 .addMemOperand(MMO)
977 .add(MOs: predOps(Pred: ARMCC::AL));
978 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
979 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRS))
980 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
981 .addFrameIndex(Idx: FI)
982 .addImm(Val: 0)
983 .addMemOperand(MMO)
984 .add(MOs: predOps(Pred: ARMCC::AL));
985 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
986 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_P0_off))
987 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
988 .addFrameIndex(Idx: FI)
989 .addImm(Val: 0)
990 .addMemOperand(MMO)
991 .add(MOs: predOps(Pred: ARMCC::AL));
992 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
993 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_FPSCR_NZCVQC_off))
994 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
995 .addFrameIndex(Idx: FI)
996 .addImm(Val: 0)
997 .addMemOperand(MMO)
998 .add(MOs: predOps(Pred: ARMCC::AL));
999 } else
1000 llvm_unreachable("Unknown reg class!");
1001 break;
1002 case 8:
1003 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1004 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRD))
1005 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1006 .addFrameIndex(Idx: FI)
1007 .addImm(Val: 0)
1008 .addMemOperand(MMO)
1009 .add(MOs: predOps(Pred: ARMCC::AL));
1010 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1011 if (Subtarget.hasV5TEOps()) {
1012 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRD));
1013 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI);
1014 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI);
1015 MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO)
1016 .add(MOs: predOps(Pred: ARMCC::AL));
1017 } else {
1018 // Fallback to STM instruction, which has existed since the dawn of
1019 // time.
1020 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STMIA))
1021 .addFrameIndex(Idx: FI)
1022 .addMemOperand(MMO)
1023 .add(MOs: predOps(Pred: ARMCC::AL));
1024 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI);
1025 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI);
1026 }
1027 } else
1028 llvm_unreachable("Unknown reg class!");
1029 break;
1030 case 16:
1031 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1032 // Use aligned spills if the stack can be realigned.
1033 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1034 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1q64))
1035 .addFrameIndex(Idx: FI)
1036 .addImm(Val: 16)
1037 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1038 .addMemOperand(MMO)
1039 .add(MOs: predOps(Pred: ARMCC::AL));
1040 } else {
1041 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMQIA))
1042 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1043 .addFrameIndex(Idx: FI)
1044 .addMemOperand(MMO)
1045 .add(MOs: predOps(Pred: ARMCC::AL));
1046 }
1047 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1048 Subtarget.hasMVEIntegerOps()) {
1049 auto MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MVE_VSTRWU32));
1050 MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1051 .addFrameIndex(Idx: FI)
1052 .addImm(Val: 0)
1053 .addMemOperand(MMO);
1054 addUnpredicatedMveVpredNOp(MIB);
1055 } else
1056 llvm_unreachable("Unknown reg class!");
1057 break;
1058 case 24:
1059 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1060 // Use aligned spills if the stack can be realigned.
1061 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1062 Subtarget.hasNEON()) {
1063 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64TPseudo))
1064 .addFrameIndex(Idx: FI)
1065 .addImm(Val: 16)
1066 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1067 .addMemOperand(MMO)
1068 .add(MOs: predOps(Pred: ARMCC::AL));
1069 } else {
1070 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(),
1071 MCID: get(Opcode: ARM::VSTMDIA))
1072 .addFrameIndex(Idx: FI)
1073 .add(MOs: predOps(Pred: ARMCC::AL))
1074 .addMemOperand(MMO);
1075 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI);
1076 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI);
1077 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI);
1078 }
1079 } else
1080 llvm_unreachable("Unknown reg class!");
1081 break;
1082 case 32:
1083 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1084 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1085 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1086 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1087 Subtarget.hasNEON()) {
1088 // FIXME: It's possible to only store part of the QQ register if the
1089 // spilled def has a sub-register index.
1090 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64QPseudo))
1091 .addFrameIndex(Idx: FI)
1092 .addImm(Val: 16)
1093 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1094 .addMemOperand(MMO)
1095 .add(MOs: predOps(Pred: ARMCC::AL));
1096 } else if (Subtarget.hasMVEIntegerOps()) {
1097 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQPRStore))
1098 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1099 .addFrameIndex(Idx: FI)
1100 .addMemOperand(MMO);
1101 } else {
1102 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(),
1103 MCID: get(Opcode: ARM::VSTMDIA))
1104 .addFrameIndex(Idx: FI)
1105 .add(MOs: predOps(Pred: ARMCC::AL))
1106 .addMemOperand(MMO);
1107 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI);
1108 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI);
1109 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI);
1110 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI);
1111 }
1112 } else
1113 llvm_unreachable("Unknown reg class!");
1114 break;
1115 case 64:
1116 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1117 Subtarget.hasMVEIntegerOps()) {
1118 BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQQQPRStore))
1119 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
1120 .addFrameIndex(Idx: FI)
1121 .addMemOperand(MMO);
1122 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1123 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMDIA))
1124 .addFrameIndex(Idx: FI)
1125 .add(MOs: predOps(Pred: ARMCC::AL))
1126 .addMemOperand(MMO);
1127 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI);
1128 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI);
1129 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI);
1130 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI);
1131 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_4, State: 0, TRI);
1132 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_5, State: 0, TRI);
1133 MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_6, State: 0, TRI);
1134 AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_7, State: 0, TRI);
1135 } else
1136 llvm_unreachable("Unknown reg class!");
1137 break;
1138 default:
1139 llvm_unreachable("Unknown reg class!");
1140 }
1141}
1142
1143Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1144 int &FrameIndex) const {
1145 switch (MI.getOpcode()) {
1146 default: break;
1147 case ARM::STRrs:
1148 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
1149 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() &&
1150 MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 &&
1151 MI.getOperand(i: 3).getImm() == 0) {
1152 FrameIndex = MI.getOperand(i: 1).getIndex();
1153 return MI.getOperand(i: 0).getReg();
1154 }
1155 break;
1156 case ARM::STRi12:
1157 case ARM::t2STRi12:
1158 case ARM::tSTRspi:
1159 case ARM::VSTRD:
1160 case ARM::VSTRS:
1161 case ARM::VSTRH:
1162 case ARM::VSTR_P0_off:
1163 case ARM::VSTR_FPSCR_NZCVQC_off:
1164 case ARM::MVE_VSTRWU32:
1165 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
1166 MI.getOperand(i: 2).getImm() == 0) {
1167 FrameIndex = MI.getOperand(i: 1).getIndex();
1168 return MI.getOperand(i: 0).getReg();
1169 }
1170 break;
1171 case ARM::VST1q64:
1172 case ARM::VST1d64TPseudo:
1173 case ARM::VST1d64QPseudo:
1174 if (MI.getOperand(i: 0).isFI() && MI.getOperand(i: 2).getSubReg() == 0) {
1175 FrameIndex = MI.getOperand(i: 0).getIndex();
1176 return MI.getOperand(i: 2).getReg();
1177 }
1178 break;
1179 case ARM::VSTMQIA:
1180 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) {
1181 FrameIndex = MI.getOperand(i: 1).getIndex();
1182 return MI.getOperand(i: 0).getReg();
1183 }
1184 break;
1185 case ARM::MQQPRStore:
1186 case ARM::MQQQQPRStore:
1187 if (MI.getOperand(i: 1).isFI()) {
1188 FrameIndex = MI.getOperand(i: 1).getIndex();
1189 return MI.getOperand(i: 0).getReg();
1190 }
1191 break;
1192 }
1193
1194 return 0;
1195}
1196
1197Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
1198 int &FrameIndex) const {
1199 SmallVector<const MachineMemOperand *, 1> Accesses;
1200 if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
1201 Accesses.size() == 1) {
1202 FrameIndex =
1203 cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue())
1204 ->getFrameIndex();
1205 return true;
1206 }
1207 return false;
1208}
1209
1210void ARMBaseInstrInfo::loadRegFromStackSlot(
1211 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
1212 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
1213 Register VReg, MachineInstr::MIFlag Flags) const {
1214 DebugLoc DL;
1215 if (I != MBB.end()) DL = I->getDebugLoc();
1216 MachineFunction &MF = *MBB.getParent();
1217 MachineFrameInfo &MFI = MF.getFrameInfo();
1218 const Align Alignment = MFI.getObjectAlign(ObjectIdx: FI);
1219 MachineMemOperand *MMO = MF.getMachineMemOperand(
1220 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad,
1221 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment);
1222
1223 switch (TRI->getSpillSize(RC: *RC)) {
1224 case 2:
1225 if (ARM::HPRRegClass.hasSubClassEq(RC)) {
1226 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRH), DestReg)
1227 .addFrameIndex(Idx: FI)
1228 .addImm(Val: 0)
1229 .addMemOperand(MMO)
1230 .add(MOs: predOps(Pred: ARMCC::AL));
1231 } else
1232 llvm_unreachable("Unknown reg class!");
1233 break;
1234 case 4:
1235 if (ARM::GPRRegClass.hasSubClassEq(RC)) {
1236 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRi12), DestReg)
1237 .addFrameIndex(Idx: FI)
1238 .addImm(Val: 0)
1239 .addMemOperand(MMO)
1240 .add(MOs: predOps(Pred: ARMCC::AL));
1241 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
1242 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRS), DestReg)
1243 .addFrameIndex(Idx: FI)
1244 .addImm(Val: 0)
1245 .addMemOperand(MMO)
1246 .add(MOs: predOps(Pred: ARMCC::AL));
1247 } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
1248 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_P0_off), DestReg)
1249 .addFrameIndex(Idx: FI)
1250 .addImm(Val: 0)
1251 .addMemOperand(MMO)
1252 .add(MOs: predOps(Pred: ARMCC::AL));
1253 } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) {
1254 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_FPSCR_NZCVQC_off), DestReg)
1255 .addFrameIndex(Idx: FI)
1256 .addImm(Val: 0)
1257 .addMemOperand(MMO)
1258 .add(MOs: predOps(Pred: ARMCC::AL));
1259 } else
1260 llvm_unreachable("Unknown reg class!");
1261 break;
1262 case 8:
1263 if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1264 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRD), DestReg)
1265 .addFrameIndex(Idx: FI)
1266 .addImm(Val: 0)
1267 .addMemOperand(MMO)
1268 .add(MOs: predOps(Pred: ARMCC::AL));
1269 } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1270 MachineInstrBuilder MIB;
1271
1272 if (Subtarget.hasV5TEOps()) {
1273 MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRD));
1274 AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI);
1275 AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI);
1276 MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO)
1277 .add(MOs: predOps(Pred: ARMCC::AL));
1278 } else {
1279 // Fallback to LDM instruction, which has existed since the dawn of
1280 // time.
1281 MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDMIA))
1282 .addFrameIndex(Idx: FI)
1283 .addMemOperand(MMO)
1284 .add(MOs: predOps(Pred: ARMCC::AL));
1285 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI);
1286 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI);
1287 }
1288
1289 if (DestReg.isPhysical())
1290 MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine);
1291 } else
1292 llvm_unreachable("Unknown reg class!");
1293 break;
1294 case 16:
1295 if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
1296 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
1297 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1q64), DestReg)
1298 .addFrameIndex(Idx: FI)
1299 .addImm(Val: 16)
1300 .addMemOperand(MMO)
1301 .add(MOs: predOps(Pred: ARMCC::AL));
1302 } else {
1303 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMQIA), DestReg)
1304 .addFrameIndex(Idx: FI)
1305 .addMemOperand(MMO)
1306 .add(MOs: predOps(Pred: ARMCC::AL));
1307 }
1308 } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
1309 Subtarget.hasMVEIntegerOps()) {
1310 auto MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MVE_VLDRWU32), DestReg);
1311 MIB.addFrameIndex(Idx: FI)
1312 .addImm(Val: 0)
1313 .addMemOperand(MMO);
1314 addUnpredicatedMveVpredNOp(MIB);
1315 } else
1316 llvm_unreachable("Unknown reg class!");
1317 break;
1318 case 24:
1319 if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1320 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1321 Subtarget.hasNEON()) {
1322 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64TPseudo), DestReg)
1323 .addFrameIndex(Idx: FI)
1324 .addImm(Val: 16)
1325 .addMemOperand(MMO)
1326 .add(MOs: predOps(Pred: ARMCC::AL));
1327 } else {
1328 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA))
1329 .addFrameIndex(Idx: FI)
1330 .addMemOperand(MMO)
1331 .add(MOs: predOps(Pred: ARMCC::AL));
1332 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI);
1333 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI);
1334 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI);
1335 if (DestReg.isPhysical())
1336 MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine);
1337 }
1338 } else
1339 llvm_unreachable("Unknown reg class!");
1340 break;
1341 case 32:
1342 if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
1343 ARM::MQQPRRegClass.hasSubClassEq(RC) ||
1344 ARM::DQuadRegClass.hasSubClassEq(RC)) {
1345 if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
1346 Subtarget.hasNEON()) {
1347 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64QPseudo), DestReg)
1348 .addFrameIndex(Idx: FI)
1349 .addImm(Val: 16)
1350 .addMemOperand(MMO)
1351 .add(MOs: predOps(Pred: ARMCC::AL));
1352 } else if (Subtarget.hasMVEIntegerOps()) {
1353 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQPRLoad), DestReg)
1354 .addFrameIndex(Idx: FI)
1355 .addMemOperand(MMO);
1356 } else {
1357 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA))
1358 .addFrameIndex(Idx: FI)
1359 .add(MOs: predOps(Pred: ARMCC::AL))
1360 .addMemOperand(MMO);
1361 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI);
1362 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI);
1363 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI);
1364 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI);
1365 if (DestReg.isPhysical())
1366 MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine);
1367 }
1368 } else
1369 llvm_unreachable("Unknown reg class!");
1370 break;
1371 case 64:
1372 if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
1373 Subtarget.hasMVEIntegerOps()) {
1374 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQQQPRLoad), DestReg)
1375 .addFrameIndex(Idx: FI)
1376 .addMemOperand(MMO);
1377 } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1378 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA))
1379 .addFrameIndex(Idx: FI)
1380 .add(MOs: predOps(Pred: ARMCC::AL))
1381 .addMemOperand(MMO);
1382 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI);
1383 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI);
1384 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI);
1385 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI);
1386 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_4, State: RegState::DefineNoRead, TRI);
1387 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_5, State: RegState::DefineNoRead, TRI);
1388 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_6, State: RegState::DefineNoRead, TRI);
1389 MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_7, State: RegState::DefineNoRead, TRI);
1390 if (DestReg.isPhysical())
1391 MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine);
1392 } else
1393 llvm_unreachable("Unknown reg class!");
1394 break;
1395 default:
1396 llvm_unreachable("Unknown regclass!");
1397 }
1398}
1399
1400Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1401 int &FrameIndex) const {
1402 switch (MI.getOpcode()) {
1403 default: break;
1404 case ARM::LDRrs:
1405 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
1406 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() &&
1407 MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 &&
1408 MI.getOperand(i: 3).getImm() == 0) {
1409 FrameIndex = MI.getOperand(i: 1).getIndex();
1410 return MI.getOperand(i: 0).getReg();
1411 }
1412 break;
1413 case ARM::LDRi12:
1414 case ARM::t2LDRi12:
1415 case ARM::tLDRspi:
1416 case ARM::VLDRD:
1417 case ARM::VLDRS:
1418 case ARM::VLDRH:
1419 case ARM::VLDR_P0_off:
1420 case ARM::VLDR_FPSCR_NZCVQC_off:
1421 case ARM::MVE_VLDRWU32:
1422 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
1423 MI.getOperand(i: 2).getImm() == 0) {
1424 FrameIndex = MI.getOperand(i: 1).getIndex();
1425 return MI.getOperand(i: 0).getReg();
1426 }
1427 break;
1428 case ARM::VLD1q64:
1429 case ARM::VLD1d8TPseudo:
1430 case ARM::VLD1d16TPseudo:
1431 case ARM::VLD1d32TPseudo:
1432 case ARM::VLD1d64TPseudo:
1433 case ARM::VLD1d8QPseudo:
1434 case ARM::VLD1d16QPseudo:
1435 case ARM::VLD1d32QPseudo:
1436 case ARM::VLD1d64QPseudo:
1437 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) {
1438 FrameIndex = MI.getOperand(i: 1).getIndex();
1439 return MI.getOperand(i: 0).getReg();
1440 }
1441 break;
1442 case ARM::VLDMQIA:
1443 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) {
1444 FrameIndex = MI.getOperand(i: 1).getIndex();
1445 return MI.getOperand(i: 0).getReg();
1446 }
1447 break;
1448 case ARM::MQQPRLoad:
1449 case ARM::MQQQQPRLoad:
1450 if (MI.getOperand(i: 1).isFI()) {
1451 FrameIndex = MI.getOperand(i: 1).getIndex();
1452 return MI.getOperand(i: 0).getReg();
1453 }
1454 break;
1455 }
1456
1457 return 0;
1458}
1459
1460Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
1461 int &FrameIndex) const {
1462 SmallVector<const MachineMemOperand *, 1> Accesses;
1463 if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
1464 Accesses.size() == 1) {
1465 FrameIndex =
1466 cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue())
1467 ->getFrameIndex();
1468 return true;
1469 }
1470 return false;
1471}
1472
1473/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
1474/// depending on whether the result is used.
1475void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
1476 bool isThumb1 = Subtarget.isThumb1Only();
1477 bool isThumb2 = Subtarget.isThumb2();
1478 const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
1479
1480 DebugLoc dl = MI->getDebugLoc();
1481 MachineBasicBlock *BB = MI->getParent();
1482
1483 MachineInstrBuilder LDM, STM;
1484 if (isThumb1 || !MI->getOperand(i: 1).isDead()) {
1485 MachineOperand LDWb(MI->getOperand(i: 1));
1486 LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA_UPD
1487 : isThumb1 ? ARM::tLDMIA_UPD
1488 : ARM::LDMIA_UPD))
1489 .add(MO: LDWb);
1490 } else {
1491 LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
1492 }
1493
1494 if (isThumb1 || !MI->getOperand(i: 0).isDead()) {
1495 MachineOperand STWb(MI->getOperand(i: 0));
1496 STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA_UPD
1497 : isThumb1 ? ARM::tSTMIA_UPD
1498 : ARM::STMIA_UPD))
1499 .add(MO: STWb);
1500 } else {
1501 STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA : ARM::STMIA));
1502 }
1503
1504 MachineOperand LDBase(MI->getOperand(i: 3));
1505 LDM.add(MO: LDBase).add(MOs: predOps(Pred: ARMCC::AL));
1506
1507 MachineOperand STBase(MI->getOperand(i: 2));
1508 STM.add(MO: STBase).add(MOs: predOps(Pred: ARMCC::AL));
1509
1510 // Sort the scratch registers into ascending order.
1511 const TargetRegisterInfo &TRI = getRegisterInfo();
1512 SmallVector<unsigned, 6> ScratchRegs;
1513 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 5))
1514 ScratchRegs.push_back(Elt: MO.getReg());
1515 llvm::sort(C&: ScratchRegs,
1516 Comp: [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
1517 return TRI.getEncodingValue(Reg: Reg1) <
1518 TRI.getEncodingValue(Reg: Reg2);
1519 });
1520
1521 for (const auto &Reg : ScratchRegs) {
1522 LDM.addReg(RegNo: Reg, flags: RegState::Define);
1523 STM.addReg(RegNo: Reg, flags: RegState::Kill);
1524 }
1525
1526 BB->erase(I: MI);
1527}
1528
1529bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1530 if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
1531 expandLoadStackGuard(MI);
1532 MI.getParent()->erase(I: MI);
1533 return true;
1534 }
1535
1536 if (MI.getOpcode() == ARM::MEMCPY) {
1537 expandMEMCPY(MI);
1538 return true;
1539 }
1540
1541 // This hook gets to expand COPY instructions before they become
1542 // copyPhysReg() calls. Look for VMOVS instructions that can legally be
1543 // widened to VMOVD. We prefer the VMOVD when possible because it may be
1544 // changed into a VORR that can go down the NEON pipeline.
1545 if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
1546 return false;
1547
1548 // Look for a copy between even S-registers. That is where we keep floats
1549 // when using NEON v2f32 instructions for f32 arithmetic.
1550 Register DstRegS = MI.getOperand(i: 0).getReg();
1551 Register SrcRegS = MI.getOperand(i: 1).getReg();
1552 if (!ARM::SPRRegClass.contains(Reg1: DstRegS, Reg2: SrcRegS))
1553 return false;
1554
1555 const TargetRegisterInfo *TRI = &getRegisterInfo();
1556 MCRegister DstRegD =
1557 TRI->getMatchingSuperReg(Reg: DstRegS, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass);
1558 MCRegister SrcRegD =
1559 TRI->getMatchingSuperReg(Reg: SrcRegS, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass);
1560 if (!DstRegD || !SrcRegD)
1561 return false;
1562
1563 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
1564 // legal if the COPY already defines the full DstRegD, and it isn't a
1565 // sub-register insertion.
1566 if (!MI.definesRegister(Reg: DstRegD, TRI) || MI.readsRegister(Reg: DstRegD, TRI))
1567 return false;
1568
1569 // A dead copy shouldn't show up here, but reject it just in case.
1570 if (MI.getOperand(i: 0).isDead())
1571 return false;
1572
1573 // All clear, widen the COPY.
1574 LLVM_DEBUG(dbgs() << "widening: " << MI);
1575 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
1576
1577 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
1578 // or some other super-register.
1579 int ImpDefIdx = MI.findRegisterDefOperandIdx(Reg: DstRegD, /*TRI=*/nullptr);
1580 if (ImpDefIdx != -1)
1581 MI.removeOperand(OpNo: ImpDefIdx);
1582
1583 // Change the opcode and operands.
1584 MI.setDesc(get(Opcode: ARM::VMOVD));
1585 MI.getOperand(i: 0).setReg(DstRegD);
1586 MI.getOperand(i: 1).setReg(SrcRegD);
1587 MIB.add(MOs: predOps(Pred: ARMCC::AL));
1588
1589 // We are now reading SrcRegD instead of SrcRegS. This may upset the
1590 // register scavenger and machine verifier, so we need to indicate that we
1591 // are reading an undefined value from SrcRegD, but a proper value from
1592 // SrcRegS.
1593 MI.getOperand(i: 1).setIsUndef();
1594 MIB.addReg(RegNo: SrcRegS, flags: RegState::Implicit);
1595
1596 // SrcRegD may actually contain an unrelated value in the ssub_1
1597 // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
1598 if (MI.getOperand(i: 1).isKill()) {
1599 MI.getOperand(i: 1).setIsKill(false);
1600 MI.addRegisterKilled(IncomingReg: SrcRegS, RegInfo: TRI, AddIfNotFound: true);
1601 }
1602
1603 LLVM_DEBUG(dbgs() << "replaced by: " << MI);
1604 return true;
1605}
1606
1607/// Create a copy of a const pool value. Update CPI to the new index and return
1608/// the label UID.
1609static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1610 MachineConstantPool *MCP = MF.getConstantPool();
1611 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1612
1613 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1614 assert(MCPE.isMachineConstantPoolEntry() &&
1615 "Expecting a machine constantpool entry!");
1616 ARMConstantPoolValue *ACPV =
1617 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1618
1619 unsigned PCLabelId = AFI->createPICLabelUId();
1620 ARMConstantPoolValue *NewCPV = nullptr;
1621
1622 // FIXME: The below assumes PIC relocation model and that the function
1623 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1624 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1625 // instructions, so that's probably OK, but is PIC always correct when
1626 // we get here?
1627 if (ACPV->isGlobalValue())
1628 NewCPV = ARMConstantPoolConstant::Create(
1629 C: cast<ARMConstantPoolConstant>(Val: ACPV)->getGV(), ID: PCLabelId, Kind: ARMCP::CPValue,
1630 PCAdj: 4, Modifier: ACPV->getModifier(), AddCurrentAddress: ACPV->mustAddCurrentAddress());
1631 else if (ACPV->isExtSymbol())
1632 NewCPV = ARMConstantPoolSymbol::
1633 Create(C&: MF.getFunction().getContext(),
1634 s: cast<ARMConstantPoolSymbol>(Val: ACPV)->getSymbol(), ID: PCLabelId, PCAdj: 4);
1635 else if (ACPV->isBlockAddress())
1636 NewCPV = ARMConstantPoolConstant::
1637 Create(C: cast<ARMConstantPoolConstant>(Val: ACPV)->getBlockAddress(), ID: PCLabelId,
1638 Kind: ARMCP::CPBlockAddress, PCAdj: 4);
1639 else if (ACPV->isLSDA())
1640 NewCPV = ARMConstantPoolConstant::Create(C: &MF.getFunction(), ID: PCLabelId,
1641 Kind: ARMCP::CPLSDA, PCAdj: 4);
1642 else if (ACPV->isMachineBasicBlock())
1643 NewCPV = ARMConstantPoolMBB::
1644 Create(C&: MF.getFunction().getContext(),
1645 mbb: cast<ARMConstantPoolMBB>(Val: ACPV)->getMBB(), ID: PCLabelId, PCAdj: 4);
1646 else
1647 llvm_unreachable("Unexpected ARM constantpool value type!!");
1648 CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: MCPE.getAlign());
1649 return PCLabelId;
1650}
1651
1652void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
1653 MachineBasicBlock::iterator I,
1654 Register DestReg, unsigned SubIdx,
1655 const MachineInstr &Orig,
1656 const TargetRegisterInfo &TRI) const {
1657 unsigned Opcode = Orig.getOpcode();
1658 switch (Opcode) {
1659 default: {
1660 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig: &Orig);
1661 MI->substituteRegister(FromReg: Orig.getOperand(i: 0).getReg(), ToReg: DestReg, SubIdx, RegInfo: TRI);
1662 MBB.insert(I, MI);
1663 break;
1664 }
1665 case ARM::tLDRpci_pic:
1666 case ARM::t2LDRpci_pic: {
1667 MachineFunction &MF = *MBB.getParent();
1668 unsigned CPI = Orig.getOperand(i: 1).getIndex();
1669 unsigned PCLabelId = duplicateCPV(MF, CPI);
1670 BuildMI(BB&: MBB, I, MIMD: Orig.getDebugLoc(), MCID: get(Opcode), DestReg)
1671 .addConstantPoolIndex(Idx: CPI)
1672 .addImm(Val: PCLabelId)
1673 .cloneMemRefs(OtherMI: Orig);
1674 break;
1675 }
1676 }
1677}
1678
1679MachineInstr &
1680ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,
1681 MachineBasicBlock::iterator InsertBefore,
1682 const MachineInstr &Orig) const {
1683 MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);
1684 MachineBasicBlock::instr_iterator I = Cloned.getIterator();
1685 for (;;) {
1686 switch (I->getOpcode()) {
1687 case ARM::tLDRpci_pic:
1688 case ARM::t2LDRpci_pic: {
1689 MachineFunction &MF = *MBB.getParent();
1690 unsigned CPI = I->getOperand(i: 1).getIndex();
1691 unsigned PCLabelId = duplicateCPV(MF, CPI);
1692 I->getOperand(i: 1).setIndex(CPI);
1693 I->getOperand(i: 2).setImm(PCLabelId);
1694 break;
1695 }
1696 }
1697 if (!I->isBundledWithSucc())
1698 break;
1699 ++I;
1700 }
1701 return Cloned;
1702}
1703
1704bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
1705 const MachineInstr &MI1,
1706 const MachineRegisterInfo *MRI) const {
1707 unsigned Opcode = MI0.getOpcode();
1708 if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
1709 Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
1710 Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1711 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1712 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1713 Opcode == ARM::t2MOV_ga_pcrel) {
1714 if (MI1.getOpcode() != Opcode)
1715 return false;
1716 if (MI0.getNumOperands() != MI1.getNumOperands())
1717 return false;
1718
1719 const MachineOperand &MO0 = MI0.getOperand(i: 1);
1720 const MachineOperand &MO1 = MI1.getOperand(i: 1);
1721 if (MO0.getOffset() != MO1.getOffset())
1722 return false;
1723
1724 if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
1725 Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
1726 Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
1727 Opcode == ARM::t2MOV_ga_pcrel)
1728 // Ignore the PC labels.
1729 return MO0.getGlobal() == MO1.getGlobal();
1730
1731 const MachineFunction *MF = MI0.getParent()->getParent();
1732 const MachineConstantPool *MCP = MF->getConstantPool();
1733 int CPI0 = MO0.getIndex();
1734 int CPI1 = MO1.getIndex();
1735 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1736 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1737 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1738 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1739 if (isARMCP0 && isARMCP1) {
1740 ARMConstantPoolValue *ACPV0 =
1741 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1742 ARMConstantPoolValue *ACPV1 =
1743 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1744 return ACPV0->hasSameValue(ACPV: ACPV1);
1745 } else if (!isARMCP0 && !isARMCP1) {
1746 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1747 }
1748 return false;
1749 } else if (Opcode == ARM::PICLDR) {
1750 if (MI1.getOpcode() != Opcode)
1751 return false;
1752 if (MI0.getNumOperands() != MI1.getNumOperands())
1753 return false;
1754
1755 Register Addr0 = MI0.getOperand(i: 1).getReg();
1756 Register Addr1 = MI1.getOperand(i: 1).getReg();
1757 if (Addr0 != Addr1) {
1758 if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())
1759 return false;
1760
1761 // This assumes SSA form.
1762 MachineInstr *Def0 = MRI->getVRegDef(Reg: Addr0);
1763 MachineInstr *Def1 = MRI->getVRegDef(Reg: Addr1);
1764 // Check if the loaded value, e.g. a constantpool of a global address, are
1765 // the same.
1766 if (!produceSameValue(MI0: *Def0, MI1: *Def1, MRI))
1767 return false;
1768 }
1769
1770 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1771 // %12 = PICLDR %11, 0, 14, %noreg
1772 const MachineOperand &MO0 = MI0.getOperand(i);
1773 const MachineOperand &MO1 = MI1.getOperand(i);
1774 if (!MO0.isIdenticalTo(Other: MO1))
1775 return false;
1776 }
1777 return true;
1778 }
1779
1780 return MI0.isIdenticalTo(Other: MI1, Check: MachineInstr::IgnoreVRegDefs);
1781}
1782
1783/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1784/// determine if two loads are loading from the same base address. It should
1785/// only return true if the base pointers are the same and the only differences
1786/// between the two addresses is the offset. It also returns the offsets by
1787/// reference.
1788///
1789/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1790/// is permanently disabled.
1791bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1792 int64_t &Offset1,
1793 int64_t &Offset2) const {
1794 // Don't worry about Thumb: just ARM and Thumb2.
1795 if (Subtarget.isThumb1Only()) return false;
1796
1797 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1798 return false;
1799
1800 auto IsLoadOpcode = [&](unsigned Opcode) {
1801 switch (Opcode) {
1802 default:
1803 return false;
1804 case ARM::LDRi12:
1805 case ARM::LDRBi12:
1806 case ARM::LDRD:
1807 case ARM::LDRH:
1808 case ARM::LDRSB:
1809 case ARM::LDRSH:
1810 case ARM::VLDRD:
1811 case ARM::VLDRS:
1812 case ARM::t2LDRi8:
1813 case ARM::t2LDRBi8:
1814 case ARM::t2LDRDi8:
1815 case ARM::t2LDRSHi8:
1816 case ARM::t2LDRi12:
1817 case ARM::t2LDRBi12:
1818 case ARM::t2LDRSHi12:
1819 return true;
1820 }
1821 };
1822
1823 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
1824 !IsLoadOpcode(Load2->getMachineOpcode()))
1825 return false;
1826
1827 // Check if base addresses and chain operands match.
1828 if (Load1->getOperand(Num: 0) != Load2->getOperand(Num: 0) ||
1829 Load1->getOperand(Num: 4) != Load2->getOperand(Num: 4))
1830 return false;
1831
1832 // Index should be Reg0.
1833 if (Load1->getOperand(Num: 3) != Load2->getOperand(Num: 3))
1834 return false;
1835
1836 // Determine the offsets.
1837 if (isa<ConstantSDNode>(Val: Load1->getOperand(Num: 1)) &&
1838 isa<ConstantSDNode>(Val: Load2->getOperand(Num: 1))) {
1839 Offset1 = cast<ConstantSDNode>(Val: Load1->getOperand(Num: 1))->getSExtValue();
1840 Offset2 = cast<ConstantSDNode>(Val: Load2->getOperand(Num: 1))->getSExtValue();
1841 return true;
1842 }
1843
1844 return false;
1845}
1846
1847/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1848/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1849/// be scheduled togther. On some targets if two loads are loading from
1850/// addresses in the same cache line, it's better if they are scheduled
1851/// together. This function takes two integers that represent the load offsets
1852/// from the common base address. It returns true if it decides it's desirable
1853/// to schedule the two loads together. "NumLoads" is the number of loads that
1854/// have already been scheduled after Load1.
1855///
1856/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1857/// is permanently disabled.
1858bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1859 int64_t Offset1, int64_t Offset2,
1860 unsigned NumLoads) const {
1861 // Don't worry about Thumb: just ARM and Thumb2.
1862 if (Subtarget.isThumb1Only()) return false;
1863
1864 assert(Offset2 > Offset1);
1865
1866 if ((Offset2 - Offset1) / 8 > 64)
1867 return false;
1868
1869 // Check if the machine opcodes are different. If they are different
1870 // then we consider them to not be of the same base address,
1871 // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1872 // In this case, they are considered to be the same because they are different
1873 // encoding forms of the same basic instruction.
1874 if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1875 !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1876 Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1877 (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1878 Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1879 return false; // FIXME: overly conservative?
1880
1881 // Four loads in a row should be sufficient.
1882 if (NumLoads >= 3)
1883 return false;
1884
1885 return true;
1886}
1887
1888bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1889 const MachineBasicBlock *MBB,
1890 const MachineFunction &MF) const {
1891 // Debug info is never a scheduling boundary. It's necessary to be explicit
1892 // due to the special treatment of IT instructions below, otherwise a
1893 // dbg_value followed by an IT will result in the IT instruction being
1894 // considered a scheduling hazard, which is wrong. It should be the actual
1895 // instruction preceding the dbg_value instruction(s), just like it is
1896 // when debug info is not present.
1897 if (MI.isDebugInstr())
1898 return false;
1899
1900 // Terminators and labels can't be scheduled around.
1901 if (MI.isTerminator() || MI.isPosition())
1902 return true;
1903
1904 // INLINEASM_BR can jump to another block
1905 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
1906 return true;
1907
1908 if (isSEHInstruction(MI))
1909 return true;
1910
1911 // Treat the start of the IT block as a scheduling boundary, but schedule
1912 // t2IT along with all instructions following it.
1913 // FIXME: This is a big hammer. But the alternative is to add all potential
1914 // true and anti dependencies to IT block instructions as implicit operands
1915 // to the t2IT instruction. The added compile time and complexity does not
1916 // seem worth it.
1917 MachineBasicBlock::const_iterator I = MI;
1918 // Make sure to skip any debug instructions
1919 while (++I != MBB->end() && I->isDebugInstr())
1920 ;
1921 if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1922 return true;
1923
1924 // Don't attempt to schedule around any instruction that defines
1925 // a stack-oriented pointer, as it's unlikely to be profitable. This
1926 // saves compile time, because it doesn't require every single
1927 // stack slot reference to depend on the instruction that does the
1928 // modification.
1929 // Calls don't actually change the stack pointer, even if they have imp-defs.
1930 // No ARM calling conventions change the stack pointer. (X86 calling
1931 // conventions sometimes do).
1932 if (!MI.isCall() && MI.definesRegister(Reg: ARM::SP, /*TRI=*/nullptr))
1933 return true;
1934
1935 return false;
1936}
1937
1938bool ARMBaseInstrInfo::
1939isProfitableToIfCvt(MachineBasicBlock &MBB,
1940 unsigned NumCycles, unsigned ExtraPredCycles,
1941 BranchProbability Probability) const {
1942 if (!NumCycles)
1943 return false;
1944
1945 // If we are optimizing for size, see if the branch in the predecessor can be
1946 // lowered to cbn?z by the constant island lowering pass, and return false if
1947 // so. This results in a shorter instruction sequence.
1948 if (MBB.getParent()->getFunction().hasOptSize()) {
1949 MachineBasicBlock *Pred = *MBB.pred_begin();
1950 if (!Pred->empty()) {
1951 MachineInstr *LastMI = &*Pred->rbegin();
1952 if (LastMI->getOpcode() == ARM::t2Bcc) {
1953 const TargetRegisterInfo *TRI = &getRegisterInfo();
1954 MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br: LastMI, TRI);
1955 if (CmpMI)
1956 return false;
1957 }
1958 }
1959 }
1960 return isProfitableToIfCvt(TMBB&: MBB, NumT: NumCycles, ExtraT: ExtraPredCycles,
1961 FMBB&: MBB, NumF: 0, ExtraF: 0, Probability);
1962}
1963
1964bool ARMBaseInstrInfo::
1965isProfitableToIfCvt(MachineBasicBlock &TBB,
1966 unsigned TCycles, unsigned TExtra,
1967 MachineBasicBlock &FBB,
1968 unsigned FCycles, unsigned FExtra,
1969 BranchProbability Probability) const {
1970 if (!TCycles)
1971 return false;
1972
1973 // In thumb code we often end up trading one branch for a IT block, and
1974 // if we are cloning the instruction can increase code size. Prevent
1975 // blocks with multiple predecesors from being ifcvted to prevent this
1976 // cloning.
1977 if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
1978 if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
1979 return false;
1980 }
1981
1982 // Attempt to estimate the relative costs of predication versus branching.
1983 // Here we scale up each component of UnpredCost to avoid precision issue when
1984 // scaling TCycles/FCycles by Probability.
1985 const unsigned ScalingUpFactor = 1024;
1986
1987 unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
1988 unsigned UnpredCost;
1989 if (!Subtarget.hasBranchPredictor()) {
1990 // When we don't have a branch predictor it's always cheaper to not take a
1991 // branch than take it, so we have to take that into account.
1992 unsigned NotTakenBranchCost = 1;
1993 unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
1994 unsigned TUnpredCycles, FUnpredCycles;
1995 if (!FCycles) {
1996 // Triangle: TBB is the fallthrough
1997 TUnpredCycles = TCycles + NotTakenBranchCost;
1998 FUnpredCycles = TakenBranchCost;
1999 } else {
2000 // Diamond: TBB is the block that is branched to, FBB is the fallthrough
2001 TUnpredCycles = TCycles + TakenBranchCost;
2002 FUnpredCycles = FCycles + NotTakenBranchCost;
2003 // The branch at the end of FBB will disappear when it's predicated, so
2004 // discount it from PredCost.
2005 PredCost -= 1 * ScalingUpFactor;
2006 }
2007 // The total cost is the cost of each path scaled by their probabilites
2008 unsigned TUnpredCost = Probability.scale(Num: TUnpredCycles * ScalingUpFactor);
2009 unsigned FUnpredCost = Probability.getCompl().scale(Num: FUnpredCycles * ScalingUpFactor);
2010 UnpredCost = TUnpredCost + FUnpredCost;
2011 // When predicating assume that the first IT can be folded away but later
2012 // ones cost one cycle each
2013 if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
2014 PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
2015 }
2016 } else {
2017 unsigned TUnpredCost = Probability.scale(Num: TCycles * ScalingUpFactor);
2018 unsigned FUnpredCost =
2019 Probability.getCompl().scale(Num: FCycles * ScalingUpFactor);
2020 UnpredCost = TUnpredCost + FUnpredCost;
2021 UnpredCost += 1 * ScalingUpFactor; // The branch itself
2022 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
2023 }
2024
2025 return PredCost <= UnpredCost;
2026}
2027
2028unsigned
2029ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
2030 unsigned NumInsts) const {
2031 // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
2032 // ARM has a condition code field in every predicable instruction, using it
2033 // doesn't change code size.
2034 if (!Subtarget.isThumb2())
2035 return 0;
2036
2037 // It's possible that the size of the IT is restricted to a single block.
2038 unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
2039 return divideCeil(Numerator: NumInsts, Denominator: MaxInsts) * 2;
2040}
2041
2042unsigned
2043ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {
2044 // If this branch is likely to be folded into the comparison to form a
2045 // CB(N)Z, then removing it won't reduce code size at all, because that will
2046 // just replace the CB(N)Z with a CMP.
2047 if (MI.getOpcode() == ARM::t2Bcc &&
2048 findCMPToFoldIntoCBZ(Br: &MI, TRI: &getRegisterInfo()))
2049 return 0;
2050
2051 unsigned Size = getInstSizeInBytes(MI);
2052
2053 // For Thumb2, all branches are 32-bit instructions during the if conversion
2054 // pass, but may be replaced with 16-bit instructions during size reduction.
2055 // Since the branches considered by if conversion tend to be forward branches
2056 // over small basic blocks, they are very likely to be in range for the
2057 // narrow instructions, so we assume the final code size will be half what it
2058 // currently is.
2059 if (Subtarget.isThumb2())
2060 Size /= 2;
2061
2062 return Size;
2063}
2064
2065bool
2066ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
2067 MachineBasicBlock &FMBB) const {
2068 // Reduce false anti-dependencies to let the target's out-of-order execution
2069 // engine do its thing.
2070 return Subtarget.isProfitableToUnpredicate();
2071}
2072
2073/// getInstrPredicate - If instruction is predicated, returns its predicate
2074/// condition, otherwise returns AL. It also returns the condition code
2075/// register by reference.
2076ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
2077 Register &PredReg) {
2078 int PIdx = MI.findFirstPredOperandIdx();
2079 if (PIdx == -1) {
2080 PredReg = 0;
2081 return ARMCC::AL;
2082 }
2083
2084 PredReg = MI.getOperand(i: PIdx+1).getReg();
2085 return (ARMCC::CondCodes)MI.getOperand(i: PIdx).getImm();
2086}
2087
2088unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
2089 if (Opc == ARM::B)
2090 return ARM::Bcc;
2091 if (Opc == ARM::tB)
2092 return ARM::tBcc;
2093 if (Opc == ARM::t2B)
2094 return ARM::t2Bcc;
2095
2096 llvm_unreachable("Unknown unconditional branch opcode!");
2097}
2098
2099MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
2100 bool NewMI,
2101 unsigned OpIdx1,
2102 unsigned OpIdx2) const {
2103 switch (MI.getOpcode()) {
2104 case ARM::MOVCCr:
2105 case ARM::t2MOVCCr: {
2106 // MOVCC can be commuted by inverting the condition.
2107 Register PredReg;
2108 ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
2109 // MOVCC AL can't be inverted. Shouldn't happen.
2110 if (CC == ARMCC::AL || PredReg != ARM::CPSR)
2111 return nullptr;
2112 MachineInstr *CommutedMI =
2113 TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2114 if (!CommutedMI)
2115 return nullptr;
2116 // After swapping the MOVCC operands, also invert the condition.
2117 CommutedMI->getOperand(i: CommutedMI->findFirstPredOperandIdx())
2118 .setImm(ARMCC::getOppositeCondition(CC));
2119 return CommutedMI;
2120 }
2121 }
2122 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2123}
2124
2125/// Identify instructions that can be folded into a MOVCC instruction, and
2126/// return the defining instruction.
2127MachineInstr *
2128ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
2129 const TargetInstrInfo *TII) const {
2130 if (!Reg.isVirtual())
2131 return nullptr;
2132 if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2133 return nullptr;
2134 MachineInstr *MI = MRI.getVRegDef(Reg);
2135 if (!MI)
2136 return nullptr;
2137 // Check if MI can be predicated and folded into the MOVCC.
2138 if (!isPredicable(MI: *MI))
2139 return nullptr;
2140 // Check if MI has any non-dead defs or physreg uses. This also detects
2141 // predicated instructions which will be reading CPSR.
2142 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 1)) {
2143 // Reject frame index operands, PEI can't handle the predicated pseudos.
2144 if (MO.isFI() || MO.isCPI() || MO.isJTI())
2145 return nullptr;
2146 if (!MO.isReg())
2147 continue;
2148 // MI can't have any tied operands, that would conflict with predication.
2149 if (MO.isTied())
2150 return nullptr;
2151 if (MO.getReg().isPhysical())
2152 return nullptr;
2153 if (MO.isDef() && !MO.isDead())
2154 return nullptr;
2155 }
2156 bool DontMoveAcrossStores = true;
2157 if (!MI->isSafeToMove(SawStore&: DontMoveAcrossStores))
2158 return nullptr;
2159 return MI;
2160}
2161
2162bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
2163 SmallVectorImpl<MachineOperand> &Cond,
2164 unsigned &TrueOp, unsigned &FalseOp,
2165 bool &Optimizable) const {
2166 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2167 "Unknown select instruction");
2168 // MOVCC operands:
2169 // 0: Def.
2170 // 1: True use.
2171 // 2: False use.
2172 // 3: Condition code.
2173 // 4: CPSR use.
2174 TrueOp = 1;
2175 FalseOp = 2;
2176 Cond.push_back(Elt: MI.getOperand(i: 3));
2177 Cond.push_back(Elt: MI.getOperand(i: 4));
2178 // We can always fold a def.
2179 Optimizable = true;
2180 return false;
2181}
2182
2183MachineInstr *
2184ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
2185 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
2186 bool PreferFalse) const {
2187 assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
2188 "Unknown select instruction");
2189 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2190 MachineInstr *DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 2).getReg(), MRI, TII: this);
2191 bool Invert = !DefMI;
2192 if (!DefMI)
2193 DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 1).getReg(), MRI, TII: this);
2194 if (!DefMI)
2195 return nullptr;
2196
2197 // Find new register class to use.
2198 MachineOperand FalseReg = MI.getOperand(i: Invert ? 2 : 1);
2199 MachineOperand TrueReg = MI.getOperand(i: Invert ? 1 : 2);
2200 Register DestReg = MI.getOperand(i: 0).getReg();
2201 const TargetRegisterClass *FalseClass = MRI.getRegClass(Reg: FalseReg.getReg());
2202 const TargetRegisterClass *TrueClass = MRI.getRegClass(Reg: TrueReg.getReg());
2203 if (!MRI.constrainRegClass(Reg: DestReg, RC: FalseClass))
2204 return nullptr;
2205 if (!MRI.constrainRegClass(Reg: DestReg, RC: TrueClass))
2206 return nullptr;
2207
2208 // Create a new predicated version of DefMI.
2209 // Rfalse is the first use.
2210 MachineInstrBuilder NewMI =
2211 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: DefMI->getDesc(), DestReg);
2212
2213 // Copy all the DefMI operands, excluding its (null) predicate.
2214 const MCInstrDesc &DefDesc = DefMI->getDesc();
2215 for (unsigned i = 1, e = DefDesc.getNumOperands();
2216 i != e && !DefDesc.operands()[i].isPredicate(); ++i)
2217 NewMI.add(MO: DefMI->getOperand(i));
2218
2219 unsigned CondCode = MI.getOperand(i: 3).getImm();
2220 if (Invert)
2221 NewMI.addImm(Val: ARMCC::getOppositeCondition(CC: ARMCC::CondCodes(CondCode)));
2222 else
2223 NewMI.addImm(Val: CondCode);
2224 NewMI.add(MO: MI.getOperand(i: 4));
2225
2226 // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
2227 if (NewMI->hasOptionalDef())
2228 NewMI.add(MO: condCodeOp());
2229
2230 // The output register value when the predicate is false is an implicit
2231 // register operand tied to the first def.
2232 // The tie makes the register allocator ensure the FalseReg is allocated the
2233 // same register as operand 0.
2234 FalseReg.setImplicit();
2235 NewMI.add(MO: FalseReg);
2236 NewMI->tieOperands(DefIdx: 0, UseIdx: NewMI->getNumOperands() - 1);
2237
2238 // Update SeenMIs set: register newly created MI and erase removed DefMI.
2239 SeenMIs.insert(Ptr: NewMI);
2240 SeenMIs.erase(Ptr: DefMI);
2241
2242 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
2243 // DefMI would be invalid when tranferred inside the loop. Checking for a
2244 // loop is expensive, but at least remove kill flags if they are in different
2245 // BBs.
2246 if (DefMI->getParent() != MI.getParent())
2247 NewMI->clearKillInfo();
2248
2249 // The caller will erase MI, but not DefMI.
2250 DefMI->eraseFromParent();
2251 return NewMI;
2252}
2253
2254/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
2255/// instruction is encoded with an 'S' bit is determined by the optional CPSR
2256/// def operand.
2257///
2258/// This will go away once we can teach tblgen how to set the optional CPSR def
2259/// operand itself.
2260struct AddSubFlagsOpcodePair {
2261 uint16_t PseudoOpc;
2262 uint16_t MachineOpc;
2263};
2264
2265static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
2266 {.PseudoOpc: ARM::ADDSri, .MachineOpc: ARM::ADDri},
2267 {.PseudoOpc: ARM::ADDSrr, .MachineOpc: ARM::ADDrr},
2268 {.PseudoOpc: ARM::ADDSrsi, .MachineOpc: ARM::ADDrsi},
2269 {.PseudoOpc: ARM::ADDSrsr, .MachineOpc: ARM::ADDrsr},
2270
2271 {.PseudoOpc: ARM::SUBSri, .MachineOpc: ARM::SUBri},
2272 {.PseudoOpc: ARM::SUBSrr, .MachineOpc: ARM::SUBrr},
2273 {.PseudoOpc: ARM::SUBSrsi, .MachineOpc: ARM::SUBrsi},
2274 {.PseudoOpc: ARM::SUBSrsr, .MachineOpc: ARM::SUBrsr},
2275
2276 {.PseudoOpc: ARM::RSBSri, .MachineOpc: ARM::RSBri},
2277 {.PseudoOpc: ARM::RSBSrsi, .MachineOpc: ARM::RSBrsi},
2278 {.PseudoOpc: ARM::RSBSrsr, .MachineOpc: ARM::RSBrsr},
2279
2280 {.PseudoOpc: ARM::tADDSi3, .MachineOpc: ARM::tADDi3},
2281 {.PseudoOpc: ARM::tADDSi8, .MachineOpc: ARM::tADDi8},
2282 {.PseudoOpc: ARM::tADDSrr, .MachineOpc: ARM::tADDrr},
2283 {.PseudoOpc: ARM::tADCS, .MachineOpc: ARM::tADC},
2284
2285 {.PseudoOpc: ARM::tSUBSi3, .MachineOpc: ARM::tSUBi3},
2286 {.PseudoOpc: ARM::tSUBSi8, .MachineOpc: ARM::tSUBi8},
2287 {.PseudoOpc: ARM::tSUBSrr, .MachineOpc: ARM::tSUBrr},
2288 {.PseudoOpc: ARM::tSBCS, .MachineOpc: ARM::tSBC},
2289 {.PseudoOpc: ARM::tRSBS, .MachineOpc: ARM::tRSB},
2290 {.PseudoOpc: ARM::tLSLSri, .MachineOpc: ARM::tLSLri},
2291
2292 {.PseudoOpc: ARM::t2ADDSri, .MachineOpc: ARM::t2ADDri},
2293 {.PseudoOpc: ARM::t2ADDSrr, .MachineOpc: ARM::t2ADDrr},
2294 {.PseudoOpc: ARM::t2ADDSrs, .MachineOpc: ARM::t2ADDrs},
2295
2296 {.PseudoOpc: ARM::t2SUBSri, .MachineOpc: ARM::t2SUBri},
2297 {.PseudoOpc: ARM::t2SUBSrr, .MachineOpc: ARM::t2SUBrr},
2298 {.PseudoOpc: ARM::t2SUBSrs, .MachineOpc: ARM::t2SUBrs},
2299
2300 {.PseudoOpc: ARM::t2RSBSri, .MachineOpc: ARM::t2RSBri},
2301 {.PseudoOpc: ARM::t2RSBSrs, .MachineOpc: ARM::t2RSBrs},
2302};
2303
2304unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
2305 for (const auto &Entry : AddSubFlagsOpcodeMap)
2306 if (OldOpc == Entry.PseudoOpc)
2307 return Entry.MachineOpc;
2308 return 0;
2309}
2310
2311void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
2312 MachineBasicBlock::iterator &MBBI,
2313 const DebugLoc &dl, Register DestReg,
2314 Register BaseReg, int NumBytes,
2315 ARMCC::CondCodes Pred, Register PredReg,
2316 const ARMBaseInstrInfo &TII,
2317 unsigned MIFlags) {
2318 if (NumBytes == 0 && DestReg != BaseReg) {
2319 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg)
2320 .addReg(RegNo: BaseReg, flags: RegState::Kill)
2321 .add(MOs: predOps(Pred, PredReg))
2322 .add(MO: condCodeOp())
2323 .setMIFlags(MIFlags);
2324 return;
2325 }
2326
2327 bool isSub = NumBytes < 0;
2328 if (isSub) NumBytes = -NumBytes;
2329
2330 while (NumBytes) {
2331 unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: NumBytes);
2332 unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt);
2333 assert(ThisVal && "Didn't extract field correctly");
2334
2335 // We will handle these bits from offset, clear them.
2336 NumBytes &= ~ThisVal;
2337
2338 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
2339
2340 // Build the new ADD / SUB.
2341 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
2342 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg)
2343 .addReg(RegNo: BaseReg, flags: RegState::Kill)
2344 .addImm(Val: ThisVal)
2345 .add(MOs: predOps(Pred, PredReg))
2346 .add(MO: condCodeOp())
2347 .setMIFlags(MIFlags);
2348 BaseReg = DestReg;
2349 }
2350}
2351
2352bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
2353 MachineFunction &MF, MachineInstr *MI,
2354 unsigned NumBytes) {
2355 // This optimisation potentially adds lots of load and store
2356 // micro-operations, it's only really a great benefit to code-size.
2357 if (!Subtarget.hasMinSize())
2358 return false;
2359
2360 // If only one register is pushed/popped, LLVM can use an LDR/STR
2361 // instead. We can't modify those so make sure we're dealing with an
2362 // instruction we understand.
2363 bool IsPop = isPopOpcode(Opc: MI->getOpcode());
2364 bool IsPush = isPushOpcode(Opc: MI->getOpcode());
2365 if (!IsPush && !IsPop)
2366 return false;
2367
2368 bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
2369 MI->getOpcode() == ARM::VLDMDIA_UPD;
2370 bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
2371 MI->getOpcode() == ARM::tPOP ||
2372 MI->getOpcode() == ARM::tPOP_RET;
2373
2374 assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
2375 MI->getOperand(1).getReg() == ARM::SP)) &&
2376 "trying to fold sp update into non-sp-updating push/pop");
2377
2378 // The VFP push & pop act on D-registers, so we can only fold an adjustment
2379 // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
2380 // if this is violated.
2381 if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
2382 return false;
2383
2384 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
2385 // pred) so the list starts at 4. Thumb1 starts after the predicate.
2386 int RegListIdx = IsT1PushPop ? 2 : 4;
2387
2388 // Calculate the space we'll need in terms of registers.
2389 unsigned RegsNeeded;
2390 const TargetRegisterClass *RegClass;
2391 if (IsVFPPushPop) {
2392 RegsNeeded = NumBytes / 8;
2393 RegClass = &ARM::DPRRegClass;
2394 } else {
2395 RegsNeeded = NumBytes / 4;
2396 RegClass = &ARM::GPRRegClass;
2397 }
2398
2399 // We're going to have to strip all list operands off before
2400 // re-adding them since the order matters, so save the existing ones
2401 // for later.
2402 SmallVector<MachineOperand, 4> RegList;
2403
2404 // We're also going to need the first register transferred by this
2405 // instruction, which won't necessarily be the first register in the list.
2406 unsigned FirstRegEnc = -1;
2407
2408 const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
2409 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
2410 MachineOperand &MO = MI->getOperand(i);
2411 RegList.push_back(Elt: MO);
2412
2413 if (MO.isReg() && !MO.isImplicit() &&
2414 TRI->getEncodingValue(Reg: MO.getReg()) < FirstRegEnc)
2415 FirstRegEnc = TRI->getEncodingValue(Reg: MO.getReg());
2416 }
2417
2418 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF: &MF);
2419
2420 // Now try to find enough space in the reglist to allocate NumBytes.
2421 for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
2422 --CurRegEnc) {
2423 MCRegister CurReg = RegClass->getRegister(i: CurRegEnc);
2424 if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(Reg: ARM::R7))
2425 continue;
2426 if (!IsPop) {
2427 // Pushing any register is completely harmless, mark the register involved
2428 // as undef since we don't care about its value and must not restore it
2429 // during stack unwinding.
2430 RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: false, isImp: false,
2431 isKill: false, isDead: false, isUndef: true));
2432 --RegsNeeded;
2433 continue;
2434 }
2435
2436 // However, we can only pop an extra register if it's not live. For
2437 // registers live within the function we might clobber a return value
2438 // register; the other way a register can be live here is if it's
2439 // callee-saved.
2440 if (isCalleeSavedRegister(Reg: CurReg, CSRegs) ||
2441 MI->getParent()->computeRegisterLiveness(TRI, Reg: CurReg, Before: MI) !=
2442 MachineBasicBlock::LQR_Dead) {
2443 // VFP pops don't allow holes in the register list, so any skip is fatal
2444 // for our transformation. GPR pops do, so we should just keep looking.
2445 if (IsVFPPushPop)
2446 return false;
2447 else
2448 continue;
2449 }
2450
2451 // Mark the unimportant registers as <def,dead> in the POP.
2452 RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: true, isImp: false, isKill: false,
2453 isDead: true));
2454 --RegsNeeded;
2455 }
2456
2457 if (RegsNeeded > 0)
2458 return false;
2459
2460 // Finally we know we can profitably perform the optimisation so go
2461 // ahead: strip all existing registers off and add them back again
2462 // in the right order.
2463 for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
2464 MI->removeOperand(OpNo: i);
2465
2466 // Add the complete list back in.
2467 MachineInstrBuilder MIB(MF, &*MI);
2468 for (const MachineOperand &MO : llvm::reverse(C&: RegList))
2469 MIB.add(MO);
2470
2471 return true;
2472}
2473
2474bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2475 Register FrameReg, int &Offset,
2476 const ARMBaseInstrInfo &TII) {
2477 unsigned Opcode = MI.getOpcode();
2478 const MCInstrDesc &Desc = MI.getDesc();
2479 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
2480 bool isSub = false;
2481
2482 // Memory operands in inline assembly always use AddrMode2.
2483 if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
2484 AddrMode = ARMII::AddrMode2;
2485
2486 if (Opcode == ARM::ADDri) {
2487 Offset += MI.getOperand(i: FrameRegIdx+1).getImm();
2488 if (Offset == 0) {
2489 // Turn it into a move.
2490 MI.setDesc(TII.get(Opcode: ARM::MOVr));
2491 MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false);
2492 MI.removeOperand(OpNo: FrameRegIdx+1);
2493 Offset = 0;
2494 return true;
2495 } else if (Offset < 0) {
2496 Offset = -Offset;
2497 isSub = true;
2498 MI.setDesc(TII.get(Opcode: ARM::SUBri));
2499 }
2500
2501 // Common case: small offset, fits into instruction.
2502 if (ARM_AM::getSOImmVal(Arg: Offset) != -1) {
2503 // Replace the FrameIndex with sp / fp
2504 MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false);
2505 MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset);
2506 Offset = 0;
2507 return true;
2508 }
2509
2510 // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2511 // as possible.
2512 unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: Offset);
2513 unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt);
2514
2515 // We will handle these bits from offset, clear them.
2516 Offset &= ~ThisImmVal;
2517
2518 // Get the properly encoded SOImmVal field.
2519 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2520 "Bit extraction didn't work?");
2521 MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal);
2522 } else {
2523 unsigned ImmIdx = 0;
2524 int InstrOffs = 0;
2525 unsigned NumBits = 0;
2526 unsigned Scale = 1;
2527 switch (AddrMode) {
2528 case ARMII::AddrMode_i12:
2529 ImmIdx = FrameRegIdx + 1;
2530 InstrOffs = MI.getOperand(i: ImmIdx).getImm();
2531 NumBits = 12;
2532 break;
2533 case ARMII::AddrMode2:
2534 ImmIdx = FrameRegIdx+2;
2535 InstrOffs = ARM_AM::getAM2Offset(AM2Opc: MI.getOperand(i: ImmIdx).getImm());
2536 if (ARM_AM::getAM2Op(AM2Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub)
2537 InstrOffs *= -1;
2538 NumBits = 12;
2539 break;
2540 case ARMII::AddrMode3:
2541 ImmIdx = FrameRegIdx+2;
2542 InstrOffs = ARM_AM::getAM3Offset(AM3Opc: MI.getOperand(i: ImmIdx).getImm());
2543 if (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub)
2544 InstrOffs *= -1;
2545 NumBits = 8;
2546 break;
2547 case ARMII::AddrMode4:
2548 case ARMII::AddrMode6:
2549 // Can't fold any offset even if it's zero.
2550 return false;
2551 case ARMII::AddrMode5:
2552 ImmIdx = FrameRegIdx+1;
2553 InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm());
2554 if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub)
2555 InstrOffs *= -1;
2556 NumBits = 8;
2557 Scale = 4;
2558 break;
2559 case ARMII::AddrMode5FP16:
2560 ImmIdx = FrameRegIdx+1;
2561 InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm());
2562 if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub)
2563 InstrOffs *= -1;
2564 NumBits = 8;
2565 Scale = 2;
2566 break;
2567 case ARMII::AddrModeT2_i7:
2568 case ARMII::AddrModeT2_i7s2:
2569 case ARMII::AddrModeT2_i7s4:
2570 ImmIdx = FrameRegIdx+1;
2571 InstrOffs = MI.getOperand(i: ImmIdx).getImm();
2572 NumBits = 7;
2573 Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
2574 AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
2575 break;
2576 default:
2577 llvm_unreachable("Unsupported addressing mode!");
2578 }
2579
2580 Offset += InstrOffs * Scale;
2581 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2582 if (Offset < 0) {
2583 Offset = -Offset;
2584 isSub = true;
2585 }
2586
2587 // Attempt to fold address comp. if opcode has offset bits
2588 if (NumBits > 0) {
2589 // Common case: small offset, fits into instruction.
2590 MachineOperand &ImmOp = MI.getOperand(i: ImmIdx);
2591 int ImmedOffset = Offset / Scale;
2592 unsigned Mask = (1 << NumBits) - 1;
2593 if ((unsigned)Offset <= Mask * Scale) {
2594 // Replace the FrameIndex with sp
2595 MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false);
2596 // FIXME: When addrmode2 goes away, this will simplify (like the
2597 // T2 version), as the LDR.i12 versions don't need the encoding
2598 // tricks for the offset value.
2599 if (isSub) {
2600 if (AddrMode == ARMII::AddrMode_i12)
2601 ImmedOffset = -ImmedOffset;
2602 else
2603 ImmedOffset |= 1 << NumBits;
2604 }
2605 ImmOp.ChangeToImmediate(ImmVal: ImmedOffset);
2606 Offset = 0;
2607 return true;
2608 }
2609
2610 // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2611 ImmedOffset = ImmedOffset & Mask;
2612 if (isSub) {
2613 if (AddrMode == ARMII::AddrMode_i12)
2614 ImmedOffset = -ImmedOffset;
2615 else
2616 ImmedOffset |= 1 << NumBits;
2617 }
2618 ImmOp.ChangeToImmediate(ImmVal: ImmedOffset);
2619 Offset &= ~(Mask*Scale);
2620 }
2621 }
2622
2623 Offset = (isSub) ? -Offset : Offset;
2624 return Offset == 0;
2625}
2626
2627/// analyzeCompare - For a comparison instruction, return the source registers
2628/// in SrcReg and SrcReg2 if having two register operands, and the value it
2629/// compares against in CmpValue. Return true if the comparison instruction
2630/// can be analyzed.
2631bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
2632 Register &SrcReg2, int64_t &CmpMask,
2633 int64_t &CmpValue) const {
2634 switch (MI.getOpcode()) {
2635 default: break;
2636 case ARM::CMPri:
2637 case ARM::t2CMPri:
2638 case ARM::tCMPi8:
2639 SrcReg = MI.getOperand(i: 0).getReg();
2640 SrcReg2 = 0;
2641 CmpMask = ~0;
2642 CmpValue = MI.getOperand(i: 1).getImm();
2643 return true;
2644 case ARM::CMPrr:
2645 case ARM::t2CMPrr:
2646 case ARM::tCMPr:
2647 SrcReg = MI.getOperand(i: 0).getReg();
2648 SrcReg2 = MI.getOperand(i: 1).getReg();
2649 CmpMask = ~0;
2650 CmpValue = 0;
2651 return true;
2652 case ARM::TSTri:
2653 case ARM::t2TSTri:
2654 SrcReg = MI.getOperand(i: 0).getReg();
2655 SrcReg2 = 0;
2656 CmpMask = MI.getOperand(i: 1).getImm();
2657 CmpValue = 0;
2658 return true;
2659 }
2660
2661 return false;
2662}
2663
2664/// isSuitableForMask - Identify a suitable 'and' instruction that
2665/// operates on the given source register and applies the same mask
2666/// as a 'tst' instruction. Provide a limited look-through for copies.
2667/// When successful, MI will hold the found instruction.
2668static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg,
2669 int CmpMask, bool CommonUse) {
2670 switch (MI->getOpcode()) {
2671 case ARM::ANDri:
2672 case ARM::t2ANDri:
2673 if (CmpMask != MI->getOperand(i: 2).getImm())
2674 return false;
2675 if (SrcReg == MI->getOperand(i: CommonUse ? 1 : 0).getReg())
2676 return true;
2677 break;
2678 }
2679
2680 return false;
2681}
2682
2683/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return
2684/// the condition code if we modify the instructions such that flags are
2685/// set by ADD(a,b,X).
2686inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
2687 switch (CC) {
2688 default: return ARMCC::AL;
2689 case ARMCC::HS: return ARMCC::LO;
2690 case ARMCC::LO: return ARMCC::HS;
2691 case ARMCC::VS: return ARMCC::VS;
2692 case ARMCC::VC: return ARMCC::VC;
2693 }
2694}
2695
2696/// isRedundantFlagInstr - check whether the first instruction, whose only
2697/// purpose is to update flags, can be made redundant.
2698/// CMPrr can be made redundant by SUBrr if the operands are the same.
2699/// CMPri can be made redundant by SUBri if the operands are the same.
2700/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
2701/// This function can be extended later on.
2702inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
2703 Register SrcReg, Register SrcReg2,
2704 int64_t ImmValue,
2705 const MachineInstr *OI,
2706 bool &IsThumb1) {
2707 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2708 (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
2709 ((OI->getOperand(i: 1).getReg() == SrcReg &&
2710 OI->getOperand(i: 2).getReg() == SrcReg2) ||
2711 (OI->getOperand(i: 1).getReg() == SrcReg2 &&
2712 OI->getOperand(i: 2).getReg() == SrcReg))) {
2713 IsThumb1 = false;
2714 return true;
2715 }
2716
2717 if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
2718 ((OI->getOperand(i: 2).getReg() == SrcReg &&
2719 OI->getOperand(i: 3).getReg() == SrcReg2) ||
2720 (OI->getOperand(i: 2).getReg() == SrcReg2 &&
2721 OI->getOperand(i: 3).getReg() == SrcReg))) {
2722 IsThumb1 = true;
2723 return true;
2724 }
2725
2726 if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
2727 (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
2728 OI->getOperand(i: 1).getReg() == SrcReg &&
2729 OI->getOperand(i: 2).getImm() == ImmValue) {
2730 IsThumb1 = false;
2731 return true;
2732 }
2733
2734 if (CmpI->getOpcode() == ARM::tCMPi8 &&
2735 (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
2736 OI->getOperand(i: 2).getReg() == SrcReg &&
2737 OI->getOperand(i: 3).getImm() == ImmValue) {
2738 IsThumb1 = true;
2739 return true;
2740 }
2741
2742 if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
2743 (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
2744 OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
2745 OI->getOperand(i: 0).isReg() && OI->getOperand(i: 1).isReg() &&
2746 OI->getOperand(i: 0).getReg() == SrcReg &&
2747 OI->getOperand(i: 1).getReg() == SrcReg2) {
2748 IsThumb1 = false;
2749 return true;
2750 }
2751
2752 if (CmpI->getOpcode() == ARM::tCMPr &&
2753 (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
2754 OI->getOpcode() == ARM::tADDrr) &&
2755 OI->getOperand(i: 0).getReg() == SrcReg &&
2756 OI->getOperand(i: 2).getReg() == SrcReg2) {
2757 IsThumb1 = true;
2758 return true;
2759 }
2760
2761 return false;
2762}
2763
2764static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
2765 switch (MI->getOpcode()) {
2766 default: return false;
2767 case ARM::tLSLri:
2768 case ARM::tLSRri:
2769 case ARM::tLSLrr:
2770 case ARM::tLSRrr:
2771 case ARM::tSUBrr:
2772 case ARM::tADDrr:
2773 case ARM::tADDi3:
2774 case ARM::tADDi8:
2775 case ARM::tSUBi3:
2776 case ARM::tSUBi8:
2777 case ARM::tMUL:
2778 case ARM::tADC:
2779 case ARM::tSBC:
2780 case ARM::tRSB:
2781 case ARM::tAND:
2782 case ARM::tORR:
2783 case ARM::tEOR:
2784 case ARM::tBIC:
2785 case ARM::tMVN:
2786 case ARM::tASRri:
2787 case ARM::tASRrr:
2788 case ARM::tROR:
2789 IsThumb1 = true;
2790 [[fallthrough]];
2791 case ARM::RSBrr:
2792 case ARM::RSBri:
2793 case ARM::RSCrr:
2794 case ARM::RSCri:
2795 case ARM::ADDrr:
2796 case ARM::ADDri:
2797 case ARM::ADCrr:
2798 case ARM::ADCri:
2799 case ARM::SUBrr:
2800 case ARM::SUBri:
2801 case ARM::SBCrr:
2802 case ARM::SBCri:
2803 case ARM::t2RSBri:
2804 case ARM::t2ADDrr:
2805 case ARM::t2ADDri:
2806 case ARM::t2ADCrr:
2807 case ARM::t2ADCri:
2808 case ARM::t2SUBrr:
2809 case ARM::t2SUBri:
2810 case ARM::t2SBCrr:
2811 case ARM::t2SBCri:
2812 case ARM::ANDrr:
2813 case ARM::ANDri:
2814 case ARM::ANDrsr:
2815 case ARM::ANDrsi:
2816 case ARM::t2ANDrr:
2817 case ARM::t2ANDri:
2818 case ARM::t2ANDrs:
2819 case ARM::ORRrr:
2820 case ARM::ORRri:
2821 case ARM::ORRrsr:
2822 case ARM::ORRrsi:
2823 case ARM::t2ORRrr:
2824 case ARM::t2ORRri:
2825 case ARM::t2ORRrs:
2826 case ARM::EORrr:
2827 case ARM::EORri:
2828 case ARM::EORrsr:
2829 case ARM::EORrsi:
2830 case ARM::t2EORrr:
2831 case ARM::t2EORri:
2832 case ARM::t2EORrs:
2833 case ARM::BICri:
2834 case ARM::BICrr:
2835 case ARM::BICrsi:
2836 case ARM::BICrsr:
2837 case ARM::t2BICri:
2838 case ARM::t2BICrr:
2839 case ARM::t2BICrs:
2840 case ARM::t2LSRri:
2841 case ARM::t2LSRrr:
2842 case ARM::t2LSLri:
2843 case ARM::t2LSLrr:
2844 case ARM::MOVsr:
2845 case ARM::MOVsi:
2846 return true;
2847 }
2848}
2849
2850/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2851/// comparison into one that sets the zero bit in the flags register;
2852/// Remove a redundant Compare instruction if an earlier instruction can set the
2853/// flags in the same way as Compare.
2854/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2855/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2856/// condition code of instructions which use the flags.
2857bool ARMBaseInstrInfo::optimizeCompareInstr(
2858 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
2859 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
2860 // Get the unique definition of SrcReg.
2861 MachineInstr *MI = MRI->getUniqueVRegDef(Reg: SrcReg);
2862 if (!MI) return false;
2863
2864 // Masked compares sometimes use the same register as the corresponding 'and'.
2865 if (CmpMask != ~0) {
2866 if (!isSuitableForMask(MI, SrcReg, CmpMask, CommonUse: false) || isPredicated(MI: *MI)) {
2867 MI = nullptr;
2868 for (MachineRegisterInfo::use_instr_iterator
2869 UI = MRI->use_instr_begin(RegNo: SrcReg), UE = MRI->use_instr_end();
2870 UI != UE; ++UI) {
2871 if (UI->getParent() != CmpInstr.getParent())
2872 continue;
2873 MachineInstr *PotentialAND = &*UI;
2874 if (!isSuitableForMask(MI&: PotentialAND, SrcReg, CmpMask, CommonUse: true) ||
2875 isPredicated(MI: *PotentialAND))
2876 continue;
2877 MI = PotentialAND;
2878 break;
2879 }
2880 if (!MI) return false;
2881 }
2882 }
2883
2884 // Get ready to iterate backward from CmpInstr.
2885 MachineBasicBlock::iterator I = CmpInstr, E = MI,
2886 B = CmpInstr.getParent()->begin();
2887
2888 // Early exit if CmpInstr is at the beginning of the BB.
2889 if (I == B) return false;
2890
2891 // There are two possible candidates which can be changed to set CPSR:
2892 // One is MI, the other is a SUB or ADD instruction.
2893 // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or
2894 // ADDr[ri](r1, r2, X).
2895 // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2896 MachineInstr *SubAdd = nullptr;
2897 if (SrcReg2 != 0)
2898 // MI is not a candidate for CMPrr.
2899 MI = nullptr;
2900 else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
2901 // Conservatively refuse to convert an instruction which isn't in the same
2902 // BB as the comparison.
2903 // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
2904 // Thus we cannot return here.
2905 if (CmpInstr.getOpcode() == ARM::CMPri ||
2906 CmpInstr.getOpcode() == ARM::t2CMPri ||
2907 CmpInstr.getOpcode() == ARM::tCMPi8)
2908 MI = nullptr;
2909 else
2910 return false;
2911 }
2912
2913 bool IsThumb1 = false;
2914 if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
2915 return false;
2916
2917 // We also want to do this peephole for cases like this: if (a*b == 0),
2918 // and optimise away the CMP instruction from the generated code sequence:
2919 // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
2920 // resulting from the select instruction, but these MOVS instructions for
2921 // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
2922 // However, if we only have MOVS instructions in between the CMP and the
2923 // other instruction (the MULS in this example), then the CPSR is dead so we
2924 // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
2925 // reordering and then continue the analysis hoping we can eliminate the
2926 // CMP. This peephole works on the vregs, so is still in SSA form. As a
2927 // consequence, the movs won't redefine/kill the MUL operands which would
2928 // make this reordering illegal.
2929 const TargetRegisterInfo *TRI = &getRegisterInfo();
2930 if (MI && IsThumb1) {
2931 --I;
2932 if (I != E && !MI->readsRegister(Reg: ARM::CPSR, TRI)) {
2933 bool CanReorder = true;
2934 for (; I != E; --I) {
2935 if (I->getOpcode() != ARM::tMOVi8) {
2936 CanReorder = false;
2937 break;
2938 }
2939 }
2940 if (CanReorder) {
2941 MI = MI->removeFromParent();
2942 E = CmpInstr;
2943 CmpInstr.getParent()->insert(I: E, MI);
2944 }
2945 }
2946 I = CmpInstr;
2947 E = MI;
2948 }
2949
2950 // Check that CPSR isn't set between the comparison instruction and the one we
2951 // want to change. At the same time, search for SubAdd.
2952 bool SubAddIsThumb1 = false;
2953 do {
2954 const MachineInstr &Instr = *--I;
2955
2956 // Check whether CmpInstr can be made redundant by the current instruction.
2957 if (isRedundantFlagInstr(CmpI: &CmpInstr, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &Instr,
2958 IsThumb1&: SubAddIsThumb1)) {
2959 SubAdd = &*I;
2960 break;
2961 }
2962
2963 // Allow E (which was initially MI) to be SubAdd but do not search before E.
2964 if (I == E)
2965 break;
2966
2967 if (Instr.modifiesRegister(Reg: ARM::CPSR, TRI) ||
2968 Instr.readsRegister(Reg: ARM::CPSR, TRI))
2969 // This instruction modifies or uses CPSR after the one we want to
2970 // change. We can't do this transformation.
2971 return false;
2972
2973 if (I == B) {
2974 // In some cases, we scan the use-list of an instruction for an AND;
2975 // that AND is in the same BB, but may not be scheduled before the
2976 // corresponding TST. In that case, bail out.
2977 //
2978 // FIXME: We could try to reschedule the AND.
2979 return false;
2980 }
2981 } while (true);
2982
2983 // Return false if no candidates exist.
2984 if (!MI && !SubAdd)
2985 return false;
2986
2987 // If we found a SubAdd, use it as it will be closer to the CMP
2988 if (SubAdd) {
2989 MI = SubAdd;
2990 IsThumb1 = SubAddIsThumb1;
2991 }
2992
2993 // We can't use a predicated instruction - it doesn't always write the flags.
2994 if (isPredicated(MI: *MI))
2995 return false;
2996
2997 // Scan forward for the use of CPSR
2998 // When checking against MI: if it's a conditional code that requires
2999 // checking of the V bit or C bit, then this is not safe to do.
3000 // It is safe to remove CmpInstr if CPSR is redefined or killed.
3001 // If we are done with the basic block, we need to check whether CPSR is
3002 // live-out.
3003 SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
3004 OperandsToUpdate;
3005 bool isSafe = false;
3006 I = CmpInstr;
3007 E = CmpInstr.getParent()->end();
3008 while (!isSafe && ++I != E) {
3009 const MachineInstr &Instr = *I;
3010 for (unsigned IO = 0, EO = Instr.getNumOperands();
3011 !isSafe && IO != EO; ++IO) {
3012 const MachineOperand &MO = Instr.getOperand(i: IO);
3013 if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR)) {
3014 isSafe = true;
3015 break;
3016 }
3017 if (!MO.isReg() || MO.getReg() != ARM::CPSR)
3018 continue;
3019 if (MO.isDef()) {
3020 isSafe = true;
3021 break;
3022 }
3023 // Condition code is after the operand before CPSR except for VSELs.
3024 ARMCC::CondCodes CC;
3025 bool IsInstrVSel = true;
3026 switch (Instr.getOpcode()) {
3027 default:
3028 IsInstrVSel = false;
3029 CC = (ARMCC::CondCodes)Instr.getOperand(i: IO - 1).getImm();
3030 break;
3031 case ARM::VSELEQD:
3032 case ARM::VSELEQS:
3033 case ARM::VSELEQH:
3034 CC = ARMCC::EQ;
3035 break;
3036 case ARM::VSELGTD:
3037 case ARM::VSELGTS:
3038 case ARM::VSELGTH:
3039 CC = ARMCC::GT;
3040 break;
3041 case ARM::VSELGED:
3042 case ARM::VSELGES:
3043 case ARM::VSELGEH:
3044 CC = ARMCC::GE;
3045 break;
3046 case ARM::VSELVSD:
3047 case ARM::VSELVSS:
3048 case ARM::VSELVSH:
3049 CC = ARMCC::VS;
3050 break;
3051 }
3052
3053 if (SubAdd) {
3054 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
3055 // on CMP needs to be updated to be based on SUB.
3056 // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also
3057 // needs to be modified.
3058 // Push the condition code operands to OperandsToUpdate.
3059 // If it is safe to remove CmpInstr, the condition code of these
3060 // operands will be modified.
3061 unsigned Opc = SubAdd->getOpcode();
3062 bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
3063 Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
3064 Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
3065 Opc == ARM::tSUBi8;
3066 unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
3067 if (!IsSub ||
3068 (SrcReg2 != 0 && SubAdd->getOperand(i: OpI).getReg() == SrcReg2 &&
3069 SubAdd->getOperand(i: OpI + 1).getReg() == SrcReg)) {
3070 // VSel doesn't support condition code update.
3071 if (IsInstrVSel)
3072 return false;
3073 // Ensure we can swap the condition.
3074 ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));
3075 if (NewCC == ARMCC::AL)
3076 return false;
3077 OperandsToUpdate.push_back(
3078 Elt: std::make_pair(x: &((*I).getOperand(i: IO - 1)), y&: NewCC));
3079 }
3080 } else {
3081 // No SubAdd, so this is x = <op> y, z; cmp x, 0.
3082 switch (CC) {
3083 case ARMCC::EQ: // Z
3084 case ARMCC::NE: // Z
3085 case ARMCC::MI: // N
3086 case ARMCC::PL: // N
3087 case ARMCC::AL: // none
3088 // CPSR can be used multiple times, we should continue.
3089 break;
3090 case ARMCC::HS: // C
3091 case ARMCC::LO: // C
3092 case ARMCC::VS: // V
3093 case ARMCC::VC: // V
3094 case ARMCC::HI: // C Z
3095 case ARMCC::LS: // C Z
3096 case ARMCC::GE: // N V
3097 case ARMCC::LT: // N V
3098 case ARMCC::GT: // Z N V
3099 case ARMCC::LE: // Z N V
3100 // The instruction uses the V bit or C bit which is not safe.
3101 return false;
3102 }
3103 }
3104 }
3105 }
3106
3107 // If CPSR is not killed nor re-defined, we should check whether it is
3108 // live-out. If it is live-out, do not optimize.
3109 if (!isSafe) {
3110 MachineBasicBlock *MBB = CmpInstr.getParent();
3111 for (MachineBasicBlock *Succ : MBB->successors())
3112 if (Succ->isLiveIn(Reg: ARM::CPSR))
3113 return false;
3114 }
3115
3116 // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
3117 // set CPSR so this is represented as an explicit output)
3118 if (!IsThumb1) {
3119 unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;
3120 MI->getOperand(i: CPSRRegNum).setReg(ARM::CPSR);
3121 MI->getOperand(i: CPSRRegNum).setIsDef(true);
3122 }
3123 assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
3124 CmpInstr.eraseFromParent();
3125
3126 // Modify the condition code of operands in OperandsToUpdate.
3127 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
3128 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3129 for (auto &[MO, Cond] : OperandsToUpdate)
3130 MO->setImm(Cond);
3131
3132 MI->clearRegisterDeads(Reg: ARM::CPSR);
3133
3134 return true;
3135}
3136
3137bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
3138 // Do not sink MI if it might be used to optimize a redundant compare.
3139 // We heuristically only look at the instruction immediately following MI to
3140 // avoid potentially searching the entire basic block.
3141 if (isPredicated(MI))
3142 return true;
3143 MachineBasicBlock::const_iterator Next = &MI;
3144 ++Next;
3145 Register SrcReg, SrcReg2;
3146 int64_t CmpMask, CmpValue;
3147 bool IsThumb1;
3148 if (Next != MI.getParent()->end() &&
3149 analyzeCompare(MI: *Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
3150 isRedundantFlagInstr(CmpI: &*Next, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &MI, IsThumb1))
3151 return false;
3152 return true;
3153}
3154
3155bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3156 Register Reg,
3157 MachineRegisterInfo *MRI) const {
3158 // Fold large immediates into add, sub, or, xor.
3159 unsigned DefOpc = DefMI.getOpcode();
3160 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
3161 DefOpc != ARM::tMOVi32imm)
3162 return false;
3163 if (!DefMI.getOperand(i: 1).isImm())
3164 // Could be t2MOVi32imm @xx
3165 return false;
3166
3167 if (!MRI->hasOneNonDBGUse(RegNo: Reg))
3168 return false;
3169
3170 const MCInstrDesc &DefMCID = DefMI.getDesc();
3171 if (DefMCID.hasOptionalDef()) {
3172 unsigned NumOps = DefMCID.getNumOperands();
3173 const MachineOperand &MO = DefMI.getOperand(i: NumOps - 1);
3174 if (MO.getReg() == ARM::CPSR && !MO.isDead())
3175 // If DefMI defines CPSR and it is not dead, it's obviously not safe
3176 // to delete DefMI.
3177 return false;
3178 }
3179
3180 const MCInstrDesc &UseMCID = UseMI.getDesc();
3181 if (UseMCID.hasOptionalDef()) {
3182 unsigned NumOps = UseMCID.getNumOperands();
3183 if (UseMI.getOperand(i: NumOps - 1).getReg() == ARM::CPSR)
3184 // If the instruction sets the flag, do not attempt this optimization
3185 // since it may change the semantics of the code.
3186 return false;
3187 }
3188
3189 unsigned UseOpc = UseMI.getOpcode();
3190 unsigned NewUseOpc = 0;
3191 uint32_t ImmVal = (uint32_t)DefMI.getOperand(i: 1).getImm();
3192 uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
3193 bool Commute = false;
3194 switch (UseOpc) {
3195 default: return false;
3196 case ARM::SUBrr:
3197 case ARM::ADDrr:
3198 case ARM::ORRrr:
3199 case ARM::EORrr:
3200 case ARM::t2SUBrr:
3201 case ARM::t2ADDrr:
3202 case ARM::t2ORRrr:
3203 case ARM::t2EORrr: {
3204 Commute = UseMI.getOperand(i: 2).getReg() != Reg;
3205 switch (UseOpc) {
3206 default: break;
3207 case ARM::ADDrr:
3208 case ARM::SUBrr:
3209 if (UseOpc == ARM::SUBrr && Commute)
3210 return false;
3211
3212 // ADD/SUB are special because they're essentially the same operation, so
3213 // we can handle a larger range of immediates.
3214 if (ARM_AM::isSOImmTwoPartVal(V: ImmVal))
3215 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
3216 else if (ARM_AM::isSOImmTwoPartVal(V: -ImmVal)) {
3217 ImmVal = -ImmVal;
3218 NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
3219 } else
3220 return false;
3221 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal);
3222 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal);
3223 break;
3224 case ARM::ORRrr:
3225 case ARM::EORrr:
3226 if (!ARM_AM::isSOImmTwoPartVal(V: ImmVal))
3227 return false;
3228 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal);
3229 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal);
3230 switch (UseOpc) {
3231 default: break;
3232 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
3233 case ARM::EORrr: NewUseOpc = ARM::EORri; break;
3234 }
3235 break;
3236 case ARM::t2ADDrr:
3237 case ARM::t2SUBrr: {
3238 if (UseOpc == ARM::t2SUBrr && Commute)
3239 return false;
3240
3241 // ADD/SUB are special because they're essentially the same operation, so
3242 // we can handle a larger range of immediates.
3243 const bool ToSP = DefMI.getOperand(i: 0).getReg() == ARM::SP;
3244 const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;
3245 const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;
3246 if (ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal))
3247 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;
3248 else if (ARM_AM::isT2SOImmTwoPartVal(Imm: -ImmVal)) {
3249 ImmVal = -ImmVal;
3250 NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;
3251 } else
3252 return false;
3253 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal);
3254 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal);
3255 break;
3256 }
3257 case ARM::t2ORRrr:
3258 case ARM::t2EORrr:
3259 if (!ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal))
3260 return false;
3261 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal);
3262 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal);
3263 switch (UseOpc) {
3264 default: break;
3265 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
3266 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
3267 }
3268 break;
3269 }
3270 }
3271 }
3272
3273 unsigned OpIdx = Commute ? 2 : 1;
3274 Register Reg1 = UseMI.getOperand(i: OpIdx).getReg();
3275 bool isKill = UseMI.getOperand(i: OpIdx).isKill();
3276 const TargetRegisterClass *TRC = MRI->getRegClass(Reg);
3277 Register NewReg = MRI->createVirtualRegister(RegClass: TRC);
3278 BuildMI(BB&: *UseMI.getParent(), I&: UseMI, MIMD: UseMI.getDebugLoc(), MCID: get(Opcode: NewUseOpc),
3279 DestReg: NewReg)
3280 .addReg(RegNo: Reg1, flags: getKillRegState(B: isKill))
3281 .addImm(Val: SOImmValV1)
3282 .add(MOs: predOps(Pred: ARMCC::AL))
3283 .add(MO: condCodeOp());
3284 UseMI.setDesc(get(Opcode: NewUseOpc));
3285 UseMI.getOperand(i: 1).setReg(NewReg);
3286 UseMI.getOperand(i: 1).setIsKill();
3287 UseMI.getOperand(i: 2).ChangeToImmediate(ImmVal: SOImmValV2);
3288 DefMI.eraseFromParent();
3289 // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.
3290 // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].
3291 // Then the below code will not be needed, as the input/output register
3292 // classes will be rgpr or gprSP.
3293 // For now, we fix the UseMI operand explicitly here:
3294 switch(NewUseOpc){
3295 case ARM::t2ADDspImm:
3296 case ARM::t2SUBspImm:
3297 case ARM::t2ADDri:
3298 case ARM::t2SUBri:
3299 MRI->constrainRegClass(Reg: UseMI.getOperand(i: 0).getReg(), RC: TRC);
3300 }
3301 return true;
3302}
3303
3304static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
3305 const MachineInstr &MI) {
3306 switch (MI.getOpcode()) {
3307 default: {
3308 const MCInstrDesc &Desc = MI.getDesc();
3309 int UOps = ItinData->getNumMicroOps(ItinClassIndx: Desc.getSchedClass());
3310 assert(UOps >= 0 && "bad # UOps");
3311 return UOps;
3312 }
3313
3314 case ARM::LDRrs:
3315 case ARM::LDRBrs:
3316 case ARM::STRrs:
3317 case ARM::STRBrs: {
3318 unsigned ShOpVal = MI.getOperand(i: 3).getImm();
3319 bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub;
3320 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
3321 if (!isSub &&
3322 (ShImm == 0 ||
3323 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3324 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)))
3325 return 1;
3326 return 2;
3327 }
3328
3329 case ARM::LDRH:
3330 case ARM::STRH: {
3331 if (!MI.getOperand(i: 2).getReg())
3332 return 1;
3333
3334 unsigned ShOpVal = MI.getOperand(i: 3).getImm();
3335 bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub;
3336 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
3337 if (!isSub &&
3338 (ShImm == 0 ||
3339 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3340 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)))
3341 return 1;
3342 return 2;
3343 }
3344
3345 case ARM::LDRSB:
3346 case ARM::LDRSH:
3347 return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 3).getImm()) == ARM_AM::sub) ? 3 : 2;
3348
3349 case ARM::LDRSB_POST:
3350 case ARM::LDRSH_POST: {
3351 Register Rt = MI.getOperand(i: 0).getReg();
3352 Register Rm = MI.getOperand(i: 3).getReg();
3353 return (Rt == Rm) ? 4 : 3;
3354 }
3355
3356 case ARM::LDR_PRE_REG:
3357 case ARM::LDRB_PRE_REG: {
3358 Register Rt = MI.getOperand(i: 0).getReg();
3359 Register Rm = MI.getOperand(i: 3).getReg();
3360 if (Rt == Rm)
3361 return 3;
3362 unsigned ShOpVal = MI.getOperand(i: 4).getImm();
3363 bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub;
3364 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
3365 if (!isSub &&
3366 (ShImm == 0 ||
3367 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3368 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)))
3369 return 2;
3370 return 3;
3371 }
3372
3373 case ARM::STR_PRE_REG:
3374 case ARM::STRB_PRE_REG: {
3375 unsigned ShOpVal = MI.getOperand(i: 4).getImm();
3376 bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub;
3377 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
3378 if (!isSub &&
3379 (ShImm == 0 ||
3380 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3381 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)))
3382 return 2;
3383 return 3;
3384 }
3385
3386 case ARM::LDRH_PRE:
3387 case ARM::STRH_PRE: {
3388 Register Rt = MI.getOperand(i: 0).getReg();
3389 Register Rm = MI.getOperand(i: 3).getReg();
3390 if (!Rm)
3391 return 2;
3392 if (Rt == Rm)
3393 return 3;
3394 return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 3 : 2;
3395 }
3396
3397 case ARM::LDR_POST_REG:
3398 case ARM::LDRB_POST_REG:
3399 case ARM::LDRH_POST: {
3400 Register Rt = MI.getOperand(i: 0).getReg();
3401 Register Rm = MI.getOperand(i: 3).getReg();
3402 return (Rt == Rm) ? 3 : 2;
3403 }
3404
3405 case ARM::LDR_PRE_IMM:
3406 case ARM::LDRB_PRE_IMM:
3407 case ARM::LDR_POST_IMM:
3408 case ARM::LDRB_POST_IMM:
3409 case ARM::STRB_POST_IMM:
3410 case ARM::STRB_POST_REG:
3411 case ARM::STRB_PRE_IMM:
3412 case ARM::STRH_POST:
3413 case ARM::STR_POST_IMM:
3414 case ARM::STR_POST_REG:
3415 case ARM::STR_PRE_IMM:
3416 return 2;
3417
3418 case ARM::LDRSB_PRE:
3419 case ARM::LDRSH_PRE: {
3420 Register Rm = MI.getOperand(i: 3).getReg();
3421 if (Rm == 0)
3422 return 3;
3423 Register Rt = MI.getOperand(i: 0).getReg();
3424 if (Rt == Rm)
3425 return 4;
3426 unsigned ShOpVal = MI.getOperand(i: 4).getImm();
3427 bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub;
3428 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
3429 if (!isSub &&
3430 (ShImm == 0 ||
3431 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3432 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)))
3433 return 3;
3434 return 4;
3435 }
3436
3437 case ARM::LDRD: {
3438 Register Rt = MI.getOperand(i: 0).getReg();
3439 Register Rn = MI.getOperand(i: 2).getReg();
3440 Register Rm = MI.getOperand(i: 3).getReg();
3441 if (Rm)
3442 return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4
3443 : 3;
3444 return (Rt == Rn) ? 3 : 2;
3445 }
3446
3447 case ARM::STRD: {
3448 Register Rm = MI.getOperand(i: 3).getReg();
3449 if (Rm)
3450 return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4
3451 : 3;
3452 return 2;
3453 }
3454
3455 case ARM::LDRD_POST:
3456 case ARM::t2LDRD_POST:
3457 return 3;
3458
3459 case ARM::STRD_POST:
3460 case ARM::t2STRD_POST:
3461 return 4;
3462
3463 case ARM::LDRD_PRE: {
3464 Register Rt = MI.getOperand(i: 0).getReg();
3465 Register Rn = MI.getOperand(i: 3).getReg();
3466 Register Rm = MI.getOperand(i: 4).getReg();
3467 if (Rm)
3468 return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5
3469 : 4;
3470 return (Rt == Rn) ? 4 : 3;
3471 }
3472
3473 case ARM::t2LDRD_PRE: {
3474 Register Rt = MI.getOperand(i: 0).getReg();
3475 Register Rn = MI.getOperand(i: 3).getReg();
3476 return (Rt == Rn) ? 4 : 3;
3477 }
3478
3479 case ARM::STRD_PRE: {
3480 Register Rm = MI.getOperand(i: 4).getReg();
3481 if (Rm)
3482 return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5
3483 : 4;
3484 return 3;
3485 }
3486
3487 case ARM::t2STRD_PRE:
3488 return 3;
3489
3490 case ARM::t2LDR_POST:
3491 case ARM::t2LDRB_POST:
3492 case ARM::t2LDRB_PRE:
3493 case ARM::t2LDRSBi12:
3494 case ARM::t2LDRSBi8:
3495 case ARM::t2LDRSBpci:
3496 case ARM::t2LDRSBs:
3497 case ARM::t2LDRH_POST:
3498 case ARM::t2LDRH_PRE:
3499 case ARM::t2LDRSBT:
3500 case ARM::t2LDRSB_POST:
3501 case ARM::t2LDRSB_PRE:
3502 case ARM::t2LDRSH_POST:
3503 case ARM::t2LDRSH_PRE:
3504 case ARM::t2LDRSHi12:
3505 case ARM::t2LDRSHi8:
3506 case ARM::t2LDRSHpci:
3507 case ARM::t2LDRSHs:
3508 return 2;
3509
3510 case ARM::t2LDRDi8: {
3511 Register Rt = MI.getOperand(i: 0).getReg();
3512 Register Rn = MI.getOperand(i: 2).getReg();
3513 return (Rt == Rn) ? 3 : 2;
3514 }
3515
3516 case ARM::t2STRB_POST:
3517 case ARM::t2STRB_PRE:
3518 case ARM::t2STRBs:
3519 case ARM::t2STRDi8:
3520 case ARM::t2STRH_POST:
3521 case ARM::t2STRH_PRE:
3522 case ARM::t2STRHs:
3523 case ARM::t2STR_POST:
3524 case ARM::t2STR_PRE:
3525 case ARM::t2STRs:
3526 return 2;
3527 }
3528}
3529
3530// Return the number of 32-bit words loaded by LDM or stored by STM. If this
3531// can't be easily determined return 0 (missing MachineMemOperand).
3532//
3533// FIXME: The current MachineInstr design does not support relying on machine
3534// mem operands to determine the width of a memory access. Instead, we expect
3535// the target to provide this information based on the instruction opcode and
3536// operands. However, using MachineMemOperand is the best solution now for
3537// two reasons:
3538//
3539// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
3540// operands. This is much more dangerous than using the MachineMemOperand
3541// sizes because CodeGen passes can insert/remove optional machine operands. In
3542// fact, it's totally incorrect for preRA passes and appears to be wrong for
3543// postRA passes as well.
3544//
3545// 2) getNumLDMAddresses is only used by the scheduling machine model and any
3546// machine model that calls this should handle the unknown (zero size) case.
3547//
3548// Long term, we should require a target hook that verifies MachineMemOperand
3549// sizes during MC lowering. That target hook should be local to MC lowering
3550// because we can't ensure that it is aware of other MI forms. Doing this will
3551// ensure that MachineMemOperands are correctly propagated through all passes.
3552unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
3553 unsigned Size = 0;
3554 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
3555 E = MI.memoperands_end();
3556 I != E; ++I) {
3557 Size += (*I)->getSize().getValue();
3558 }
3559 // FIXME: The scheduler currently can't handle values larger than 16. But
3560 // the values can actually go up to 32 for floating-point load/store
3561 // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
3562 // operations isn't right; we could end up with "extra" memory operands for
3563 // various reasons, like tail merge merging two memory operations.
3564 return std::min(a: Size / 4, b: 16U);
3565}
3566
3567static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
3568 unsigned NumRegs) {
3569 unsigned UOps = 1 + NumRegs; // 1 for address computation.
3570 switch (Opc) {
3571 default:
3572 break;
3573 case ARM::VLDMDIA_UPD:
3574 case ARM::VLDMDDB_UPD:
3575 case ARM::VLDMSIA_UPD:
3576 case ARM::VLDMSDB_UPD:
3577 case ARM::VSTMDIA_UPD:
3578 case ARM::VSTMDDB_UPD:
3579 case ARM::VSTMSIA_UPD:
3580 case ARM::VSTMSDB_UPD:
3581 case ARM::LDMIA_UPD:
3582 case ARM::LDMDA_UPD:
3583 case ARM::LDMDB_UPD:
3584 case ARM::LDMIB_UPD:
3585 case ARM::STMIA_UPD:
3586 case ARM::STMDA_UPD:
3587 case ARM::STMDB_UPD:
3588 case ARM::STMIB_UPD:
3589 case ARM::tLDMIA_UPD:
3590 case ARM::tSTMIA_UPD:
3591 case ARM::t2LDMIA_UPD:
3592 case ARM::t2LDMDB_UPD:
3593 case ARM::t2STMIA_UPD:
3594 case ARM::t2STMDB_UPD:
3595 ++UOps; // One for base register writeback.
3596 break;
3597 case ARM::LDMIA_RET:
3598 case ARM::tPOP_RET:
3599 case ARM::t2LDMIA_RET:
3600 UOps += 2; // One for base reg wb, one for write to pc.
3601 break;
3602 }
3603 return UOps;
3604}
3605
3606unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
3607 const MachineInstr &MI) const {
3608 if (!ItinData || ItinData->isEmpty())
3609 return 1;
3610
3611 const MCInstrDesc &Desc = MI.getDesc();
3612 unsigned Class = Desc.getSchedClass();
3613 int ItinUOps = ItinData->getNumMicroOps(ItinClassIndx: Class);
3614 if (ItinUOps >= 0) {
3615 if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
3616 return getNumMicroOpsSwiftLdSt(ItinData, MI);
3617
3618 return ItinUOps;
3619 }
3620
3621 unsigned Opc = MI.getOpcode();
3622 switch (Opc) {
3623 default:
3624 llvm_unreachable("Unexpected multi-uops instruction!");
3625 case ARM::VLDMQIA:
3626 case ARM::VSTMQIA:
3627 return 2;
3628
3629 // The number of uOps for load / store multiple are determined by the number
3630 // registers.
3631 //
3632 // On Cortex-A8, each pair of register loads / stores can be scheduled on the
3633 // same cycle. The scheduling for the first load / store must be done
3634 // separately by assuming the address is not 64-bit aligned.
3635 //
3636 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
3637 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
3638 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
3639 case ARM::VLDMDIA:
3640 case ARM::VLDMDIA_UPD:
3641 case ARM::VLDMDDB_UPD:
3642 case ARM::VLDMSIA:
3643 case ARM::VLDMSIA_UPD:
3644 case ARM::VLDMSDB_UPD:
3645 case ARM::VSTMDIA:
3646 case ARM::VSTMDIA_UPD:
3647 case ARM::VSTMDDB_UPD:
3648 case ARM::VSTMSIA:
3649 case ARM::VSTMSIA_UPD:
3650 case ARM::VSTMSDB_UPD: {
3651 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
3652 return (NumRegs / 2) + (NumRegs % 2) + 1;
3653 }
3654
3655 case ARM::LDMIA_RET:
3656 case ARM::LDMIA:
3657 case ARM::LDMDA:
3658 case ARM::LDMDB:
3659 case ARM::LDMIB:
3660 case ARM::LDMIA_UPD:
3661 case ARM::LDMDA_UPD:
3662 case ARM::LDMDB_UPD:
3663 case ARM::LDMIB_UPD:
3664 case ARM::STMIA:
3665 case ARM::STMDA:
3666 case ARM::STMDB:
3667 case ARM::STMIB:
3668 case ARM::STMIA_UPD:
3669 case ARM::STMDA_UPD:
3670 case ARM::STMDB_UPD:
3671 case ARM::STMIB_UPD:
3672 case ARM::tLDMIA:
3673 case ARM::tLDMIA_UPD:
3674 case ARM::tSTMIA_UPD:
3675 case ARM::tPOP_RET:
3676 case ARM::tPOP:
3677 case ARM::tPUSH:
3678 case ARM::t2LDMIA_RET:
3679 case ARM::t2LDMIA:
3680 case ARM::t2LDMDB:
3681 case ARM::t2LDMIA_UPD:
3682 case ARM::t2LDMDB_UPD:
3683 case ARM::t2STMIA:
3684 case ARM::t2STMDB:
3685 case ARM::t2STMIA_UPD:
3686 case ARM::t2STMDB_UPD: {
3687 unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
3688 switch (Subtarget.getLdStMultipleTiming()) {
3689 case ARMSubtarget::SingleIssuePlusExtras:
3690 return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
3691 case ARMSubtarget::SingleIssue:
3692 // Assume the worst.
3693 return NumRegs;
3694 case ARMSubtarget::DoubleIssue: {
3695 if (NumRegs < 4)
3696 return 2;
3697 // 4 registers would be issued: 2, 2.
3698 // 5 registers would be issued: 2, 2, 1.
3699 unsigned UOps = (NumRegs / 2);
3700 if (NumRegs % 2)
3701 ++UOps;
3702 return UOps;
3703 }
3704 case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
3705 unsigned UOps = (NumRegs / 2);
3706 // If there are odd number of registers or if it's not 64-bit aligned,
3707 // then it takes an extra AGU (Address Generation Unit) cycle.
3708 if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
3709 (*MI.memoperands_begin())->getAlign() < Align(8))
3710 ++UOps;
3711 return UOps;
3712 }
3713 }
3714 }
3715 }
3716 llvm_unreachable("Didn't find the number of microops");
3717}
3718
3719std::optional<unsigned>
3720ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
3721 const MCInstrDesc &DefMCID, unsigned DefClass,
3722 unsigned DefIdx, unsigned DefAlign) const {
3723 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3724 if (RegNo <= 0)
3725 // Def is the address writeback.
3726 return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx);
3727
3728 unsigned DefCycle;
3729 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3730 // (regno / 2) + (regno % 2) + 1
3731 DefCycle = RegNo / 2 + 1;
3732 if (RegNo % 2)
3733 ++DefCycle;
3734 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3735 DefCycle = RegNo;
3736 bool isSLoad = false;
3737
3738 switch (DefMCID.getOpcode()) {
3739 default: break;
3740 case ARM::VLDMSIA:
3741 case ARM::VLDMSIA_UPD:
3742 case ARM::VLDMSDB_UPD:
3743 isSLoad = true;
3744 break;
3745 }
3746
3747 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3748 // then it takes an extra cycle.
3749 if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3750 ++DefCycle;
3751 } else {
3752 // Assume the worst.
3753 DefCycle = RegNo + 2;
3754 }
3755
3756 return DefCycle;
3757}
3758
3759std::optional<unsigned>
3760ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3761 const MCInstrDesc &DefMCID, unsigned DefClass,
3762 unsigned DefIdx, unsigned DefAlign) const {
3763 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3764 if (RegNo <= 0)
3765 // Def is the address writeback.
3766 return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx);
3767
3768 unsigned DefCycle;
3769 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3770 // 4 registers would be issued: 1, 2, 1.
3771 // 5 registers would be issued: 1, 2, 2.
3772 DefCycle = RegNo / 2;
3773 if (DefCycle < 1)
3774 DefCycle = 1;
3775 // Result latency is issue cycle + 2: E2.
3776 DefCycle += 2;
3777 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3778 DefCycle = (RegNo / 2);
3779 // If there are odd number of registers or if it's not 64-bit aligned,
3780 // then it takes an extra AGU (Address Generation Unit) cycle.
3781 if ((RegNo % 2) || DefAlign < 8)
3782 ++DefCycle;
3783 // Result latency is AGU cycles + 2.
3784 DefCycle += 2;
3785 } else {
3786 // Assume the worst.
3787 DefCycle = RegNo + 2;
3788 }
3789
3790 return DefCycle;
3791}
3792
3793std::optional<unsigned>
3794ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3795 const MCInstrDesc &UseMCID, unsigned UseClass,
3796 unsigned UseIdx, unsigned UseAlign) const {
3797 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3798 if (RegNo <= 0)
3799 return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx);
3800
3801 unsigned UseCycle;
3802 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3803 // (regno / 2) + (regno % 2) + 1
3804 UseCycle = RegNo / 2 + 1;
3805 if (RegNo % 2)
3806 ++UseCycle;
3807 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3808 UseCycle = RegNo;
3809 bool isSStore = false;
3810
3811 switch (UseMCID.getOpcode()) {
3812 default: break;
3813 case ARM::VSTMSIA:
3814 case ARM::VSTMSIA_UPD:
3815 case ARM::VSTMSDB_UPD:
3816 isSStore = true;
3817 break;
3818 }
3819
3820 // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3821 // then it takes an extra cycle.
3822 if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3823 ++UseCycle;
3824 } else {
3825 // Assume the worst.
3826 UseCycle = RegNo + 2;
3827 }
3828
3829 return UseCycle;
3830}
3831
3832std::optional<unsigned>
3833ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3834 const MCInstrDesc &UseMCID, unsigned UseClass,
3835 unsigned UseIdx, unsigned UseAlign) const {
3836 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3837 if (RegNo <= 0)
3838 return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx);
3839
3840 unsigned UseCycle;
3841 if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
3842 UseCycle = RegNo / 2;
3843 if (UseCycle < 2)
3844 UseCycle = 2;
3845 // Read in E3.
3846 UseCycle += 2;
3847 } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3848 UseCycle = (RegNo / 2);
3849 // If there are odd number of registers or if it's not 64-bit aligned,
3850 // then it takes an extra AGU (Address Generation Unit) cycle.
3851 if ((RegNo % 2) || UseAlign < 8)
3852 ++UseCycle;
3853 } else {
3854 // Assume the worst.
3855 UseCycle = 1;
3856 }
3857 return UseCycle;
3858}
3859
3860std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
3861 const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,
3862 unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,
3863 unsigned UseIdx, unsigned UseAlign) const {
3864 unsigned DefClass = DefMCID.getSchedClass();
3865 unsigned UseClass = UseMCID.getSchedClass();
3866
3867 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3868 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3869
3870 // This may be a def / use of a variable_ops instruction, the operand
3871 // latency might be determinable dynamically. Let the target try to
3872 // figure it out.
3873 std::optional<unsigned> DefCycle;
3874 bool LdmBypass = false;
3875 switch (DefMCID.getOpcode()) {
3876 default:
3877 DefCycle = ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx);
3878 break;
3879
3880 case ARM::VLDMDIA:
3881 case ARM::VLDMDIA_UPD:
3882 case ARM::VLDMDDB_UPD:
3883 case ARM::VLDMSIA:
3884 case ARM::VLDMSIA_UPD:
3885 case ARM::VLDMSDB_UPD:
3886 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3887 break;
3888
3889 case ARM::LDMIA_RET:
3890 case ARM::LDMIA:
3891 case ARM::LDMDA:
3892 case ARM::LDMDB:
3893 case ARM::LDMIB:
3894 case ARM::LDMIA_UPD:
3895 case ARM::LDMDA_UPD:
3896 case ARM::LDMDB_UPD:
3897 case ARM::LDMIB_UPD:
3898 case ARM::tLDMIA:
3899 case ARM::tLDMIA_UPD:
3900 case ARM::tPUSH:
3901 case ARM::t2LDMIA_RET:
3902 case ARM::t2LDMIA:
3903 case ARM::t2LDMDB:
3904 case ARM::t2LDMIA_UPD:
3905 case ARM::t2LDMDB_UPD:
3906 LdmBypass = true;
3907 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3908 break;
3909 }
3910
3911 if (!DefCycle)
3912 // We can't seem to determine the result latency of the def, assume it's 2.
3913 DefCycle = 2;
3914
3915 std::optional<unsigned> UseCycle;
3916 switch (UseMCID.getOpcode()) {
3917 default:
3918 UseCycle = ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx);
3919 break;
3920
3921 case ARM::VSTMDIA:
3922 case ARM::VSTMDIA_UPD:
3923 case ARM::VSTMDDB_UPD:
3924 case ARM::VSTMSIA:
3925 case ARM::VSTMSIA_UPD:
3926 case ARM::VSTMSDB_UPD:
3927 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3928 break;
3929
3930 case ARM::STMIA:
3931 case ARM::STMDA:
3932 case ARM::STMDB:
3933 case ARM::STMIB:
3934 case ARM::STMIA_UPD:
3935 case ARM::STMDA_UPD:
3936 case ARM::STMDB_UPD:
3937 case ARM::STMIB_UPD:
3938 case ARM::tSTMIA_UPD:
3939 case ARM::tPOP_RET:
3940 case ARM::tPOP:
3941 case ARM::t2STMIA:
3942 case ARM::t2STMDB:
3943 case ARM::t2STMIA_UPD:
3944 case ARM::t2STMDB_UPD:
3945 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3946 break;
3947 }
3948
3949 if (!UseCycle)
3950 // Assume it's read in the first stage.
3951 UseCycle = 1;
3952
3953 if (UseCycle > *DefCycle + 1)
3954 return std::nullopt;
3955
3956 UseCycle = *DefCycle - *UseCycle + 1;
3957 if (UseCycle > 0u) {
3958 if (LdmBypass) {
3959 // It's a variable_ops instruction so we can't use DefIdx here. Just use
3960 // first def operand.
3961 if (ItinData->hasPipelineForwarding(DefClass, DefIdx: DefMCID.getNumOperands()-1,
3962 UseClass, UseIdx))
3963 UseCycle = *UseCycle - 1;
3964 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3965 UseClass, UseIdx)) {
3966 UseCycle = *UseCycle - 1;
3967 }
3968 }
3969
3970 return UseCycle;
3971}
3972
3973static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
3974 const MachineInstr *MI, unsigned Reg,
3975 unsigned &DefIdx, unsigned &Dist) {
3976 Dist = 0;
3977
3978 MachineBasicBlock::const_iterator I = MI; ++I;
3979 MachineBasicBlock::const_instr_iterator II = std::prev(x: I.getInstrIterator());
3980 assert(II->isInsideBundle() && "Empty bundle?");
3981
3982 int Idx = -1;
3983 while (II->isInsideBundle()) {
3984 Idx = II->findRegisterDefOperandIdx(Reg, TRI, isDead: false, Overlap: true);
3985 if (Idx != -1)
3986 break;
3987 --II;
3988 ++Dist;
3989 }
3990
3991 assert(Idx != -1 && "Cannot find bundled definition!");
3992 DefIdx = Idx;
3993 return &*II;
3994}
3995
3996static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
3997 const MachineInstr &MI, unsigned Reg,
3998 unsigned &UseIdx, unsigned &Dist) {
3999 Dist = 0;
4000
4001 MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
4002 assert(II->isInsideBundle() && "Empty bundle?");
4003 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4004
4005 // FIXME: This doesn't properly handle multiple uses.
4006 int Idx = -1;
4007 while (II != E && II->isInsideBundle()) {
4008 Idx = II->findRegisterUseOperandIdx(Reg, TRI, isKill: false);
4009 if (Idx != -1)
4010 break;
4011 if (II->getOpcode() != ARM::t2IT)
4012 ++Dist;
4013 ++II;
4014 }
4015
4016 if (Idx == -1) {
4017 Dist = 0;
4018 return nullptr;
4019 }
4020
4021 UseIdx = Idx;
4022 return &*II;
4023}
4024
4025/// Return the number of cycles to add to (or subtract from) the static
4026/// itinerary based on the def opcode and alignment. The caller will ensure that
4027/// adjusted latency is at least one cycle.
4028static int adjustDefLatency(const ARMSubtarget &Subtarget,
4029 const MachineInstr &DefMI,
4030 const MCInstrDesc &DefMCID, unsigned DefAlign) {
4031 int Adjust = 0;
4032 if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
4033 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4034 // variants are one cycle cheaper.
4035 switch (DefMCID.getOpcode()) {
4036 default: break;
4037 case ARM::LDRrs:
4038 case ARM::LDRBrs: {
4039 unsigned ShOpVal = DefMI.getOperand(i: 3).getImm();
4040 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
4041 if (ShImm == 0 ||
4042 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))
4043 --Adjust;
4044 break;
4045 }
4046 case ARM::t2LDRs:
4047 case ARM::t2LDRBs:
4048 case ARM::t2LDRHs:
4049 case ARM::t2LDRSHs: {
4050 // Thumb2 mode: lsl only.
4051 unsigned ShAmt = DefMI.getOperand(i: 3).getImm();
4052 if (ShAmt == 0 || ShAmt == 2)
4053 --Adjust;
4054 break;
4055 }
4056 }
4057 } else if (Subtarget.isSwift()) {
4058 // FIXME: Properly handle all of the latency adjustments for address
4059 // writeback.
4060 switch (DefMCID.getOpcode()) {
4061 default: break;
4062 case ARM::LDRrs:
4063 case ARM::LDRBrs: {
4064 unsigned ShOpVal = DefMI.getOperand(i: 3).getImm();
4065 bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub;
4066 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
4067 if (!isSub &&
4068 (ShImm == 0 ||
4069 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4070 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)))
4071 Adjust -= 2;
4072 else if (!isSub &&
4073 ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr)
4074 --Adjust;
4075 break;
4076 }
4077 case ARM::t2LDRs:
4078 case ARM::t2LDRBs:
4079 case ARM::t2LDRHs:
4080 case ARM::t2LDRSHs: {
4081 // Thumb2 mode: lsl only.
4082 unsigned ShAmt = DefMI.getOperand(i: 3).getImm();
4083 if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
4084 Adjust -= 2;
4085 break;
4086 }
4087 }
4088 }
4089
4090 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
4091 switch (DefMCID.getOpcode()) {
4092 default: break;
4093 case ARM::VLD1q8:
4094 case ARM::VLD1q16:
4095 case ARM::VLD1q32:
4096 case ARM::VLD1q64:
4097 case ARM::VLD1q8wb_fixed:
4098 case ARM::VLD1q16wb_fixed:
4099 case ARM::VLD1q32wb_fixed:
4100 case ARM::VLD1q64wb_fixed:
4101 case ARM::VLD1q8wb_register:
4102 case ARM::VLD1q16wb_register:
4103 case ARM::VLD1q32wb_register:
4104 case ARM::VLD1q64wb_register:
4105 case ARM::VLD2d8:
4106 case ARM::VLD2d16:
4107 case ARM::VLD2d32:
4108 case ARM::VLD2q8:
4109 case ARM::VLD2q16:
4110 case ARM::VLD2q32:
4111 case ARM::VLD2d8wb_fixed:
4112 case ARM::VLD2d16wb_fixed:
4113 case ARM::VLD2d32wb_fixed:
4114 case ARM::VLD2q8wb_fixed:
4115 case ARM::VLD2q16wb_fixed:
4116 case ARM::VLD2q32wb_fixed:
4117 case ARM::VLD2d8wb_register:
4118 case ARM::VLD2d16wb_register:
4119 case ARM::VLD2d32wb_register:
4120 case ARM::VLD2q8wb_register:
4121 case ARM::VLD2q16wb_register:
4122 case ARM::VLD2q32wb_register:
4123 case ARM::VLD3d8:
4124 case ARM::VLD3d16:
4125 case ARM::VLD3d32:
4126 case ARM::VLD1d64T:
4127 case ARM::VLD3d8_UPD:
4128 case ARM::VLD3d16_UPD:
4129 case ARM::VLD3d32_UPD:
4130 case ARM::VLD1d64Twb_fixed:
4131 case ARM::VLD1d64Twb_register:
4132 case ARM::VLD3q8_UPD:
4133 case ARM::VLD3q16_UPD:
4134 case ARM::VLD3q32_UPD:
4135 case ARM::VLD4d8:
4136 case ARM::VLD4d16:
4137 case ARM::VLD4d32:
4138 case ARM::VLD1d64Q:
4139 case ARM::VLD4d8_UPD:
4140 case ARM::VLD4d16_UPD:
4141 case ARM::VLD4d32_UPD:
4142 case ARM::VLD1d64Qwb_fixed:
4143 case ARM::VLD1d64Qwb_register:
4144 case ARM::VLD4q8_UPD:
4145 case ARM::VLD4q16_UPD:
4146 case ARM::VLD4q32_UPD:
4147 case ARM::VLD1DUPq8:
4148 case ARM::VLD1DUPq16:
4149 case ARM::VLD1DUPq32:
4150 case ARM::VLD1DUPq8wb_fixed:
4151 case ARM::VLD1DUPq16wb_fixed:
4152 case ARM::VLD1DUPq32wb_fixed:
4153 case ARM::VLD1DUPq8wb_register:
4154 case ARM::VLD1DUPq16wb_register:
4155 case ARM::VLD1DUPq32wb_register:
4156 case ARM::VLD2DUPd8:
4157 case ARM::VLD2DUPd16:
4158 case ARM::VLD2DUPd32:
4159 case ARM::VLD2DUPd8wb_fixed:
4160 case ARM::VLD2DUPd16wb_fixed:
4161 case ARM::VLD2DUPd32wb_fixed:
4162 case ARM::VLD2DUPd8wb_register:
4163 case ARM::VLD2DUPd16wb_register:
4164 case ARM::VLD2DUPd32wb_register:
4165 case ARM::VLD4DUPd8:
4166 case ARM::VLD4DUPd16:
4167 case ARM::VLD4DUPd32:
4168 case ARM::VLD4DUPd8_UPD:
4169 case ARM::VLD4DUPd16_UPD:
4170 case ARM::VLD4DUPd32_UPD:
4171 case ARM::VLD1LNd8:
4172 case ARM::VLD1LNd16:
4173 case ARM::VLD1LNd32:
4174 case ARM::VLD1LNd8_UPD:
4175 case ARM::VLD1LNd16_UPD:
4176 case ARM::VLD1LNd32_UPD:
4177 case ARM::VLD2LNd8:
4178 case ARM::VLD2LNd16:
4179 case ARM::VLD2LNd32:
4180 case ARM::VLD2LNq16:
4181 case ARM::VLD2LNq32:
4182 case ARM::VLD2LNd8_UPD:
4183 case ARM::VLD2LNd16_UPD:
4184 case ARM::VLD2LNd32_UPD:
4185 case ARM::VLD2LNq16_UPD:
4186 case ARM::VLD2LNq32_UPD:
4187 case ARM::VLD4LNd8:
4188 case ARM::VLD4LNd16:
4189 case ARM::VLD4LNd32:
4190 case ARM::VLD4LNq16:
4191 case ARM::VLD4LNq32:
4192 case ARM::VLD4LNd8_UPD:
4193 case ARM::VLD4LNd16_UPD:
4194 case ARM::VLD4LNd32_UPD:
4195 case ARM::VLD4LNq16_UPD:
4196 case ARM::VLD4LNq32_UPD:
4197 // If the address is not 64-bit aligned, the latencies of these
4198 // instructions increases by one.
4199 ++Adjust;
4200 break;
4201 }
4202 }
4203 return Adjust;
4204}
4205
4206std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(
4207 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4208 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
4209 // No operand latency. The caller may fall back to getInstrLatency.
4210 if (!ItinData || ItinData->isEmpty())
4211 return std::nullopt;
4212
4213 const MachineOperand &DefMO = DefMI.getOperand(i: DefIdx);
4214 Register Reg = DefMO.getReg();
4215
4216 const MachineInstr *ResolvedDefMI = &DefMI;
4217 unsigned DefAdj = 0;
4218 if (DefMI.isBundle())
4219 ResolvedDefMI =
4220 getBundledDefMI(TRI: &getRegisterInfo(), MI: &DefMI, Reg, DefIdx, Dist&: DefAdj);
4221 if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
4222 ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
4223 return 1;
4224 }
4225
4226 const MachineInstr *ResolvedUseMI = &UseMI;
4227 unsigned UseAdj = 0;
4228 if (UseMI.isBundle()) {
4229 ResolvedUseMI =
4230 getBundledUseMI(TRI: &getRegisterInfo(), MI: UseMI, Reg, UseIdx, Dist&: UseAdj);
4231 if (!ResolvedUseMI)
4232 return std::nullopt;
4233 }
4234
4235 return getOperandLatencyImpl(
4236 ItinData, DefMI: *ResolvedDefMI, DefIdx, DefMCID: ResolvedDefMI->getDesc(), DefAdj, DefMO,
4237 Reg, UseMI: *ResolvedUseMI, UseIdx, UseMCID: ResolvedUseMI->getDesc(), UseAdj);
4238}
4239
4240std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(
4241 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
4242 unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
4243 const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
4244 unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
4245 if (Reg == ARM::CPSR) {
4246 if (DefMI.getOpcode() == ARM::FMSTAT) {
4247 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
4248 return Subtarget.isLikeA9() ? 1 : 20;
4249 }
4250
4251 // CPSR set and branch can be paired in the same cycle.
4252 if (UseMI.isBranch())
4253 return 0;
4254
4255 // Otherwise it takes the instruction latency (generally one).
4256 unsigned Latency = getInstrLatency(ItinData, MI: DefMI);
4257
4258 // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
4259 // its uses. Instructions which are otherwise scheduled between them may
4260 // incur a code size penalty (not able to use the CPSR setting 16-bit
4261 // instructions).
4262 if (Latency > 0 && Subtarget.isThumb2()) {
4263 const MachineFunction *MF = DefMI.getParent()->getParent();
4264 // FIXME: Use Function::hasOptSize().
4265 if (MF->getFunction().hasFnAttribute(Kind: Attribute::OptimizeForSize))
4266 --Latency;
4267 }
4268 return Latency;
4269 }
4270
4271 if (DefMO.isImplicit() || UseMI.getOperand(i: UseIdx).isImplicit())
4272 return std::nullopt;
4273
4274 unsigned DefAlign = DefMI.hasOneMemOperand()
4275 ? (*DefMI.memoperands_begin())->getAlign().value()
4276 : 0;
4277 unsigned UseAlign = UseMI.hasOneMemOperand()
4278 ? (*UseMI.memoperands_begin())->getAlign().value()
4279 : 0;
4280
4281 // Get the itinerary's latency if possible, and handle variable_ops.
4282 std::optional<unsigned> Latency = getOperandLatency(
4283 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4284 // Unable to find operand latency. The caller may resort to getInstrLatency.
4285 if (!Latency)
4286 return std::nullopt;
4287
4288 // Adjust for IT block position.
4289 int Adj = DefAdj + UseAdj;
4290
4291 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4292 Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
4293 if (Adj >= 0 || (int)*Latency > -Adj) {
4294 return *Latency + Adj;
4295 }
4296 // Return the itinerary latency, which may be zero but not less than zero.
4297 return Latency;
4298}
4299
4300std::optional<unsigned>
4301ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
4302 SDNode *DefNode, unsigned DefIdx,
4303 SDNode *UseNode, unsigned UseIdx) const {
4304 if (!DefNode->isMachineOpcode())
4305 return 1;
4306
4307 const MCInstrDesc &DefMCID = get(Opcode: DefNode->getMachineOpcode());
4308
4309 if (isZeroCost(Opcode: DefMCID.Opcode))
4310 return 0;
4311
4312 if (!ItinData || ItinData->isEmpty())
4313 return DefMCID.mayLoad() ? 3 : 1;
4314
4315 if (!UseNode->isMachineOpcode()) {
4316 std::optional<unsigned> Latency =
4317 ItinData->getOperandCycle(ItinClassIndx: DefMCID.getSchedClass(), OperandIdx: DefIdx);
4318 int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
4319 int Threshold = 1 + Adj;
4320 return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;
4321 }
4322
4323 const MCInstrDesc &UseMCID = get(Opcode: UseNode->getMachineOpcode());
4324 auto *DefMN = cast<MachineSDNode>(Val: DefNode);
4325 unsigned DefAlign = !DefMN->memoperands_empty()
4326 ? (*DefMN->memoperands_begin())->getAlign().value()
4327 : 0;
4328 auto *UseMN = cast<MachineSDNode>(Val: UseNode);
4329 unsigned UseAlign = !UseMN->memoperands_empty()
4330 ? (*UseMN->memoperands_begin())->getAlign().value()
4331 : 0;
4332 std::optional<unsigned> Latency = getOperandLatency(
4333 ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);
4334 if (!Latency)
4335 return std::nullopt;
4336
4337 if (Latency > 1U &&
4338 (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
4339 Subtarget.isCortexA7())) {
4340 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
4341 // variants are one cycle cheaper.
4342 switch (DefMCID.getOpcode()) {
4343 default: break;
4344 case ARM::LDRrs:
4345 case ARM::LDRBrs: {
4346 unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2);
4347 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
4348 if (ShImm == 0 ||
4349 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))
4350 Latency = *Latency - 1;
4351 break;
4352 }
4353 case ARM::t2LDRs:
4354 case ARM::t2LDRBs:
4355 case ARM::t2LDRHs:
4356 case ARM::t2LDRSHs: {
4357 // Thumb2 mode: lsl only.
4358 unsigned ShAmt = DefNode->getConstantOperandVal(Num: 2);
4359 if (ShAmt == 0 || ShAmt == 2)
4360 Latency = *Latency - 1;
4361 break;
4362 }
4363 }
4364 } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {
4365 // FIXME: Properly handle all of the latency adjustments for address
4366 // writeback.
4367 switch (DefMCID.getOpcode()) {
4368 default: break;
4369 case ARM::LDRrs:
4370 case ARM::LDRBrs: {
4371 unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2);
4372 unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal);
4373 if (ShImm == 0 ||
4374 ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
4375 ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))
4376 Latency = *Latency - 2;
4377 else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr)
4378 Latency = *Latency - 1;
4379 break;
4380 }
4381 case ARM::t2LDRs:
4382 case ARM::t2LDRBs:
4383 case ARM::t2LDRHs:
4384 case ARM::t2LDRSHs:
4385 // Thumb2 mode: lsl 0-3 only.
4386 Latency = *Latency - 2;
4387 break;
4388 }
4389 }
4390
4391 if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
4392 switch (DefMCID.getOpcode()) {
4393 default: break;
4394 case ARM::VLD1q8:
4395 case ARM::VLD1q16:
4396 case ARM::VLD1q32:
4397 case ARM::VLD1q64:
4398 case ARM::VLD1q8wb_register:
4399 case ARM::VLD1q16wb_register:
4400 case ARM::VLD1q32wb_register:
4401 case ARM::VLD1q64wb_register:
4402 case ARM::VLD1q8wb_fixed:
4403 case ARM::VLD1q16wb_fixed:
4404 case ARM::VLD1q32wb_fixed:
4405 case ARM::VLD1q64wb_fixed:
4406 case ARM::VLD2d8:
4407 case ARM::VLD2d16:
4408 case ARM::VLD2d32:
4409 case ARM::VLD2q8Pseudo:
4410 case ARM::VLD2q16Pseudo:
4411 case ARM::VLD2q32Pseudo:
4412 case ARM::VLD2d8wb_fixed:
4413 case ARM::VLD2d16wb_fixed:
4414 case ARM::VLD2d32wb_fixed:
4415 case ARM::VLD2q8PseudoWB_fixed:
4416 case ARM::VLD2q16PseudoWB_fixed:
4417 case ARM::VLD2q32PseudoWB_fixed:
4418 case ARM::VLD2d8wb_register:
4419 case ARM::VLD2d16wb_register:
4420 case ARM::VLD2d32wb_register:
4421 case ARM::VLD2q8PseudoWB_register:
4422 case ARM::VLD2q16PseudoWB_register:
4423 case ARM::VLD2q32PseudoWB_register:
4424 case ARM::VLD3d8Pseudo:
4425 case ARM::VLD3d16Pseudo:
4426 case ARM::VLD3d32Pseudo:
4427 case ARM::VLD1d8TPseudo:
4428 case ARM::VLD1d16TPseudo:
4429 case ARM::VLD1d32TPseudo:
4430 case ARM::VLD1d64TPseudo:
4431 case ARM::VLD1d64TPseudoWB_fixed:
4432 case ARM::VLD1d64TPseudoWB_register:
4433 case ARM::VLD3d8Pseudo_UPD:
4434 case ARM::VLD3d16Pseudo_UPD:
4435 case ARM::VLD3d32Pseudo_UPD:
4436 case ARM::VLD3q8Pseudo_UPD:
4437 case ARM::VLD3q16Pseudo_UPD:
4438 case ARM::VLD3q32Pseudo_UPD:
4439 case ARM::VLD3q8oddPseudo:
4440 case ARM::VLD3q16oddPseudo:
4441 case ARM::VLD3q32oddPseudo:
4442 case ARM::VLD3q8oddPseudo_UPD:
4443 case ARM::VLD3q16oddPseudo_UPD:
4444 case ARM::VLD3q32oddPseudo_UPD:
4445 case ARM::VLD4d8Pseudo:
4446 case ARM::VLD4d16Pseudo:
4447 case ARM::VLD4d32Pseudo:
4448 case ARM::VLD1d8QPseudo:
4449 case ARM::VLD1d16QPseudo:
4450 case ARM::VLD1d32QPseudo:
4451 case ARM::VLD1d64QPseudo:
4452 case ARM::VLD1d64QPseudoWB_fixed:
4453 case ARM::VLD1d64QPseudoWB_register:
4454 case ARM::VLD1q8HighQPseudo:
4455 case ARM::VLD1q8LowQPseudo_UPD:
4456 case ARM::VLD1q8HighTPseudo:
4457 case ARM::VLD1q8LowTPseudo_UPD:
4458 case ARM::VLD1q16HighQPseudo:
4459 case ARM::VLD1q16LowQPseudo_UPD:
4460 case ARM::VLD1q16HighTPseudo:
4461 case ARM::VLD1q16LowTPseudo_UPD:
4462 case ARM::VLD1q32HighQPseudo:
4463 case ARM::VLD1q32LowQPseudo_UPD:
4464 case ARM::VLD1q32HighTPseudo:
4465 case ARM::VLD1q32LowTPseudo_UPD:
4466 case ARM::VLD1q64HighQPseudo:
4467 case ARM::VLD1q64LowQPseudo_UPD:
4468 case ARM::VLD1q64HighTPseudo:
4469 case ARM::VLD1q64LowTPseudo_UPD:
4470 case ARM::VLD4d8Pseudo_UPD:
4471 case ARM::VLD4d16Pseudo_UPD:
4472 case ARM::VLD4d32Pseudo_UPD:
4473 case ARM::VLD4q8Pseudo_UPD:
4474 case ARM::VLD4q16Pseudo_UPD:
4475 case ARM::VLD4q32Pseudo_UPD:
4476 case ARM::VLD4q8oddPseudo:
4477 case ARM::VLD4q16oddPseudo:
4478 case ARM::VLD4q32oddPseudo:
4479 case ARM::VLD4q8oddPseudo_UPD:
4480 case ARM::VLD4q16oddPseudo_UPD:
4481 case ARM::VLD4q32oddPseudo_UPD:
4482 case ARM::VLD1DUPq8:
4483 case ARM::VLD1DUPq16:
4484 case ARM::VLD1DUPq32:
4485 case ARM::VLD1DUPq8wb_fixed:
4486 case ARM::VLD1DUPq16wb_fixed:
4487 case ARM::VLD1DUPq32wb_fixed:
4488 case ARM::VLD1DUPq8wb_register:
4489 case ARM::VLD1DUPq16wb_register:
4490 case ARM::VLD1DUPq32wb_register:
4491 case ARM::VLD2DUPd8:
4492 case ARM::VLD2DUPd16:
4493 case ARM::VLD2DUPd32:
4494 case ARM::VLD2DUPd8wb_fixed:
4495 case ARM::VLD2DUPd16wb_fixed:
4496 case ARM::VLD2DUPd32wb_fixed:
4497 case ARM::VLD2DUPd8wb_register:
4498 case ARM::VLD2DUPd16wb_register:
4499 case ARM::VLD2DUPd32wb_register:
4500 case ARM::VLD2DUPq8EvenPseudo:
4501 case ARM::VLD2DUPq8OddPseudo:
4502 case ARM::VLD2DUPq16EvenPseudo:
4503 case ARM::VLD2DUPq16OddPseudo:
4504 case ARM::VLD2DUPq32EvenPseudo:
4505 case ARM::VLD2DUPq32OddPseudo:
4506 case ARM::VLD3DUPq8EvenPseudo:
4507 case ARM::VLD3DUPq8OddPseudo:
4508 case ARM::VLD3DUPq16EvenPseudo:
4509 case ARM::VLD3DUPq16OddPseudo:
4510 case ARM::VLD3DUPq32EvenPseudo:
4511 case ARM::VLD3DUPq32OddPseudo:
4512 case ARM::VLD4DUPd8Pseudo:
4513 case ARM::VLD4DUPd16Pseudo:
4514 case ARM::VLD4DUPd32Pseudo:
4515 case ARM::VLD4DUPd8Pseudo_UPD:
4516 case ARM::VLD4DUPd16Pseudo_UPD:
4517 case ARM::VLD4DUPd32Pseudo_UPD:
4518 case ARM::VLD4DUPq8EvenPseudo:
4519 case ARM::VLD4DUPq8OddPseudo:
4520 case ARM::VLD4DUPq16EvenPseudo:
4521 case ARM::VLD4DUPq16OddPseudo:
4522 case ARM::VLD4DUPq32EvenPseudo:
4523 case ARM::VLD4DUPq32OddPseudo:
4524 case ARM::VLD1LNq8Pseudo:
4525 case ARM::VLD1LNq16Pseudo:
4526 case ARM::VLD1LNq32Pseudo:
4527 case ARM::VLD1LNq8Pseudo_UPD:
4528 case ARM::VLD1LNq16Pseudo_UPD:
4529 case ARM::VLD1LNq32Pseudo_UPD:
4530 case ARM::VLD2LNd8Pseudo:
4531 case ARM::VLD2LNd16Pseudo:
4532 case ARM::VLD2LNd32Pseudo:
4533 case ARM::VLD2LNq16Pseudo:
4534 case ARM::VLD2LNq32Pseudo:
4535 case ARM::VLD2LNd8Pseudo_UPD:
4536 case ARM::VLD2LNd16Pseudo_UPD:
4537 case ARM::VLD2LNd32Pseudo_UPD:
4538 case ARM::VLD2LNq16Pseudo_UPD:
4539 case ARM::VLD2LNq32Pseudo_UPD:
4540 case ARM::VLD4LNd8Pseudo:
4541 case ARM::VLD4LNd16Pseudo:
4542 case ARM::VLD4LNd32Pseudo:
4543 case ARM::VLD4LNq16Pseudo:
4544 case ARM::VLD4LNq32Pseudo:
4545 case ARM::VLD4LNd8Pseudo_UPD:
4546 case ARM::VLD4LNd16Pseudo_UPD:
4547 case ARM::VLD4LNd32Pseudo_UPD:
4548 case ARM::VLD4LNq16Pseudo_UPD:
4549 case ARM::VLD4LNq32Pseudo_UPD:
4550 // If the address is not 64-bit aligned, the latencies of these
4551 // instructions increases by one.
4552 Latency = *Latency + 1;
4553 break;
4554 }
4555
4556 return Latency;
4557}
4558
4559unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
4560 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4561 MI.isImplicitDef())
4562 return 0;
4563
4564 if (MI.isBundle())
4565 return 0;
4566
4567 const MCInstrDesc &MCID = MI.getDesc();
4568
4569 if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) &&
4570 !Subtarget.cheapPredicableCPSRDef())) {
4571 // When predicated, CPSR is an additional source operand for CPSR updating
4572 // instructions, this apparently increases their latencies.
4573 return 1;
4574 }
4575 return 0;
4576}
4577
4578unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4579 const MachineInstr &MI,
4580 unsigned *PredCost) const {
4581 if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
4582 MI.isImplicitDef())
4583 return 1;
4584
4585 // An instruction scheduler typically runs on unbundled instructions, however
4586 // other passes may query the latency of a bundled instruction.
4587 if (MI.isBundle()) {
4588 unsigned Latency = 0;
4589 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
4590 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
4591 while (++I != E && I->isInsideBundle()) {
4592 if (I->getOpcode() != ARM::t2IT)
4593 Latency += getInstrLatency(ItinData, MI: *I, PredCost);
4594 }
4595 return Latency;
4596 }
4597
4598 const MCInstrDesc &MCID = MI.getDesc();
4599 if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) &&
4600 !Subtarget.cheapPredicableCPSRDef()))) {
4601 // When predicated, CPSR is an additional source operand for CPSR updating
4602 // instructions, this apparently increases their latencies.
4603 *PredCost = 1;
4604 }
4605 // Be sure to call getStageLatency for an empty itinerary in case it has a
4606 // valid MinLatency property.
4607 if (!ItinData)
4608 return MI.mayLoad() ? 3 : 1;
4609
4610 unsigned Class = MCID.getSchedClass();
4611
4612 // For instructions with variable uops, use uops as latency.
4613 if (!ItinData->isEmpty() && ItinData->getNumMicroOps(ItinClassIndx: Class) < 0)
4614 return getNumMicroOps(ItinData, MI);
4615
4616 // For the common case, fall back on the itinerary's latency.
4617 unsigned Latency = ItinData->getStageLatency(ItinClassIndx: Class);
4618
4619 // Adjust for dynamic def-side opcode variants not captured by the itinerary.
4620 unsigned DefAlign =
4621 MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
4622 int Adj = adjustDefLatency(Subtarget, DefMI: MI, DefMCID: MCID, DefAlign);
4623 if (Adj >= 0 || (int)Latency > -Adj) {
4624 return Latency + Adj;
4625 }
4626 return Latency;
4627}
4628
4629unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
4630 SDNode *Node) const {
4631 if (!Node->isMachineOpcode())
4632 return 1;
4633
4634 if (!ItinData || ItinData->isEmpty())
4635 return 1;
4636
4637 unsigned Opcode = Node->getMachineOpcode();
4638 switch (Opcode) {
4639 default:
4640 return ItinData->getStageLatency(ItinClassIndx: get(Opcode).getSchedClass());
4641 case ARM::VLDMQIA:
4642 case ARM::VSTMQIA:
4643 return 2;
4644 }
4645}
4646
4647bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
4648 const MachineRegisterInfo *MRI,
4649 const MachineInstr &DefMI,
4650 unsigned DefIdx,
4651 const MachineInstr &UseMI,
4652 unsigned UseIdx) const {
4653 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4654 unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
4655 if (Subtarget.nonpipelinedVFP() &&
4656 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
4657 return true;
4658
4659 // Hoist VFP / NEON instructions with 4 or higher latency.
4660 unsigned Latency =
4661 SchedModel.computeOperandLatency(DefMI: &DefMI, DefOperIdx: DefIdx, UseMI: &UseMI, UseOperIdx: UseIdx);
4662 if (Latency <= 3)
4663 return false;
4664 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
4665 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
4666}
4667
4668bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
4669 const MachineInstr &DefMI,
4670 unsigned DefIdx) const {
4671 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
4672 if (!ItinData || ItinData->isEmpty())
4673 return false;
4674
4675 unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
4676 if (DDomain == ARMII::DomainGeneral) {
4677 unsigned DefClass = DefMI.getDesc().getSchedClass();
4678 std::optional<unsigned> DefCycle =
4679 ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx);
4680 return DefCycle && DefCycle <= 2U;
4681 }
4682 return false;
4683}
4684
4685bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
4686 StringRef &ErrInfo) const {
4687 if (convertAddSubFlagsOpcode(OldOpc: MI.getOpcode())) {
4688 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
4689 return false;
4690 }
4691 if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
4692 // Make sure we don't generate a lo-lo mov that isn't supported.
4693 if (!ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) &&
4694 !ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 1).getReg())) {
4695 ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
4696 return false;
4697 }
4698 }
4699 if (MI.getOpcode() == ARM::tPUSH ||
4700 MI.getOpcode() == ARM::tPOP ||
4701 MI.getOpcode() == ARM::tPOP_RET) {
4702 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands(), N: 2)) {
4703 if (MO.isImplicit() || !MO.isReg())
4704 continue;
4705 Register Reg = MO.getReg();
4706 if (Reg < ARM::R0 || Reg > ARM::R7) {
4707 if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
4708 !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
4709 ErrInfo = "Unsupported register in Thumb1 push/pop";
4710 return false;
4711 }
4712 }
4713 }
4714 }
4715 if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4716 assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4717 if ((MI.getOperand(i: 4).getImm() != 2 && MI.getOperand(i: 4).getImm() != 3) ||
4718 MI.getOperand(i: 4).getImm() != MI.getOperand(i: 5).getImm() + 2) {
4719 ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4720 return false;
4721 }
4722 }
4723
4724 // Check the address model by taking the first Imm operand and checking it is
4725 // legal for that addressing mode.
4726 ARMII::AddrMode AddrMode =
4727 (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
4728 switch (AddrMode) {
4729 default:
4730 break;
4731 case ARMII::AddrModeT2_i7:
4732 case ARMII::AddrModeT2_i7s2:
4733 case ARMII::AddrModeT2_i7s4:
4734 case ARMII::AddrModeT2_i8:
4735 case ARMII::AddrModeT2_i8pos:
4736 case ARMII::AddrModeT2_i8neg:
4737 case ARMII::AddrModeT2_i8s4:
4738 case ARMII::AddrModeT2_i12: {
4739 uint32_t Imm = 0;
4740 for (auto Op : MI.operands()) {
4741 if (Op.isImm()) {
4742 Imm = Op.getImm();
4743 break;
4744 }
4745 }
4746 if (!isLegalAddressImm(Opcode: MI.getOpcode(), Imm, TII: this)) {
4747 ErrInfo = "Incorrect AddrMode Imm for instruction";
4748 return false;
4749 }
4750 break;
4751 }
4752 }
4753 return true;
4754}
4755
4756void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
4757 unsigned LoadImmOpc,
4758 unsigned LoadOpc) const {
4759 assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
4760 "ROPI/RWPI not currently supported with stack guard");
4761
4762 MachineBasicBlock &MBB = *MI->getParent();
4763 DebugLoc DL = MI->getDebugLoc();
4764 Register Reg = MI->getOperand(i: 0).getReg();
4765 MachineInstrBuilder MIB;
4766 unsigned int Offset = 0;
4767
4768 if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
4769 assert(!Subtarget.isReadTPSoft() &&
4770 "TLS stack protector requires hardware TLS register");
4771
4772 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg)
4773 .addImm(Val: 15)
4774 .addImm(Val: 0)
4775 .addImm(Val: 13)
4776 .addImm(Val: 0)
4777 .addImm(Val: 3)
4778 .add(MOs: predOps(Pred: ARMCC::AL));
4779
4780 Module &M = *MBB.getParent()->getFunction().getParent();
4781 Offset = M.getStackProtectorGuardOffset();
4782 if (Offset & ~0xfffU) {
4783 // The offset won't fit in the LDR's 12-bit immediate field, so emit an
4784 // extra ADD to cover the delta. This gives us a guaranteed 8 additional
4785 // bits, resulting in a range of 0 to +1 MiB for the guard offset.
4786 unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
4787 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: AddOpc), DestReg: Reg)
4788 .addReg(RegNo: Reg, flags: RegState::Kill)
4789 .addImm(Val: Offset & ~0xfffU)
4790 .add(MOs: predOps(Pred: ARMCC::AL))
4791 .addReg(RegNo: 0);
4792 Offset &= 0xfffU;
4793 }
4794 } else {
4795 const GlobalValue *GV =
4796 cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue());
4797 bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
4798
4799 unsigned TargetFlags = ARMII::MO_NO_FLAG;
4800 if (Subtarget.isTargetMachO()) {
4801 TargetFlags |= ARMII::MO_NONLAZY;
4802 } else if (Subtarget.isTargetCOFF()) {
4803 if (GV->hasDLLImportStorageClass())
4804 TargetFlags |= ARMII::MO_DLLIMPORT;
4805 else if (IsIndirect)
4806 TargetFlags |= ARMII::MO_COFFSTUB;
4807 } else if (IsIndirect) {
4808 TargetFlags |= ARMII::MO_GOT;
4809 }
4810
4811 if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only
4812 Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register
4813 auto APSREncoding =
4814 ARMSysReg::lookupMClassSysRegByName(Name: "apsr_nzcvq")->Encoding;
4815 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MRS_M), DestReg: CPSRSaveReg)
4816 .addImm(Val: APSREncoding)
4817 .add(MOs: predOps(Pred: ARMCC::AL));
4818 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg)
4819 .addGlobalAddress(GV, Offset: 0, TargetFlags);
4820 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MSR_M))
4821 .addImm(Val: APSREncoding)
4822 .addReg(RegNo: CPSRSaveReg, flags: RegState::Kill)
4823 .add(MOs: predOps(Pred: ARMCC::AL));
4824 } else {
4825 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg)
4826 .addGlobalAddress(GV, Offset: 0, TargetFlags);
4827 }
4828
4829 if (IsIndirect) {
4830 MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg);
4831 MIB.addReg(RegNo: Reg, flags: RegState::Kill).addImm(Val: 0);
4832 auto Flags = MachineMemOperand::MOLoad |
4833 MachineMemOperand::MODereferenceable |
4834 MachineMemOperand::MOInvariant;
4835 MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
4836 PtrInfo: MachinePointerInfo::getGOT(MF&: *MBB.getParent()), F: Flags, Size: 4, BaseAlignment: Align(4));
4837 MIB.addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL));
4838 }
4839 }
4840
4841 MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg);
4842 MIB.addReg(RegNo: Reg, flags: RegState::Kill)
4843 .addImm(Val: Offset)
4844 .cloneMemRefs(OtherMI: *MI)
4845 .add(MOs: predOps(Pred: ARMCC::AL));
4846}
4847
4848bool
4849ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
4850 unsigned &AddSubOpc,
4851 bool &NegAcc, bool &HasLane) const {
4852 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Val: Opcode);
4853 if (I == MLxEntryMap.end())
4854 return false;
4855
4856 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
4857 MulOpc = Entry.MulOpc;
4858 AddSubOpc = Entry.AddSubOpc;
4859 NegAcc = Entry.NegAcc;
4860 HasLane = Entry.HasLane;
4861 return true;
4862}
4863
4864//===----------------------------------------------------------------------===//
4865// Execution domains.
4866//===----------------------------------------------------------------------===//
4867//
4868// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
4869// and some can go down both. The vmov instructions go down the VFP pipeline,
4870// but they can be changed to vorr equivalents that are executed by the NEON
4871// pipeline.
4872//
4873// We use the following execution domain numbering:
4874//
4875enum ARMExeDomain {
4876 ExeGeneric = 0,
4877 ExeVFP = 1,
4878 ExeNEON = 2
4879};
4880
4881//
4882// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
4883//
4884std::pair<uint16_t, uint16_t>
4885ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
4886 // If we don't have access to NEON instructions then we won't be able
4887 // to swizzle anything to the NEON domain. Check to make sure.
4888 if (Subtarget.hasNEON()) {
4889 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
4890 // if they are not predicated.
4891 if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
4892 return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON));
4893
4894 // CortexA9 is particularly picky about mixing the two and wants these
4895 // converted.
4896 if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
4897 (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
4898 MI.getOpcode() == ARM::VMOVS))
4899 return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON));
4900 }
4901 // No other instructions can be swizzled, so just determine their domain.
4902 unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
4903
4904 if (Domain & ARMII::DomainNEON)
4905 return std::make_pair(x: ExeNEON, y: 0);
4906
4907 // Certain instructions can go either way on Cortex-A8.
4908 // Treat them as NEON instructions.
4909 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
4910 return std::make_pair(x: ExeNEON, y: 0);
4911
4912 if (Domain & ARMII::DomainVFP)
4913 return std::make_pair(x: ExeVFP, y: 0);
4914
4915 return std::make_pair(x: ExeGeneric, y: 0);
4916}
4917
4918static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
4919 unsigned SReg, unsigned &Lane) {
4920 MCRegister DReg =
4921 TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass);
4922 Lane = 0;
4923
4924 if (DReg)
4925 return DReg;
4926
4927 Lane = 1;
4928 DReg = TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_1, RC: &ARM::DPRRegClass);
4929
4930 assert(DReg && "S-register with no D super-register?");
4931 return DReg;
4932}
4933
4934/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
4935/// set ImplicitSReg to a register number that must be marked as implicit-use or
4936/// zero if no register needs to be defined as implicit-use.
4937///
4938/// If the function cannot determine if an SPR should be marked implicit use or
4939/// not, it returns false.
4940///
4941/// This function handles cases where an instruction is being modified from taking
4942/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
4943/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4944/// lane of the DPR).
4945///
4946/// If the other SPR is defined, an implicit-use of it should be added. Else,
4947/// (including the case where the DPR itself is defined), it should not.
4948///
4949static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
4950 MachineInstr &MI, MCRegister DReg,
4951 unsigned Lane,
4952 MCRegister &ImplicitSReg) {
4953 // If the DPR is defined or used already, the other SPR lane will be chained
4954 // correctly, so there is nothing to be done.
4955 if (MI.definesRegister(Reg: DReg, TRI) || MI.readsRegister(Reg: DReg, TRI)) {
4956 ImplicitSReg = MCRegister();
4957 return true;
4958 }
4959
4960 // Otherwise we need to go searching to see if the SPR is set explicitly.
4961 ImplicitSReg = TRI->getSubReg(Reg: DReg,
4962 Idx: (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4963 MachineBasicBlock::LivenessQueryResult LQR =
4964 MI.getParent()->computeRegisterLiveness(TRI, Reg: ImplicitSReg, Before: MI);
4965
4966 if (LQR == MachineBasicBlock::LQR_Live)
4967 return true;
4968 else if (LQR == MachineBasicBlock::LQR_Unknown)
4969 return false;
4970
4971 // If the register is known not to be live, there is no need to add an
4972 // implicit-use.
4973 ImplicitSReg = MCRegister();
4974 return true;
4975}
4976
4977void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
4978 unsigned Domain) const {
4979 unsigned DstReg, SrcReg;
4980 MCRegister DReg;
4981 unsigned Lane;
4982 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4983 const TargetRegisterInfo *TRI = &getRegisterInfo();
4984 switch (MI.getOpcode()) {
4985 default:
4986 llvm_unreachable("cannot handle opcode!");
4987 break;
4988 case ARM::VMOVD:
4989 if (Domain != ExeNEON)
4990 break;
4991
4992 // Zap the predicate operands.
4993 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4994
4995 // Make sure we've got NEON instructions.
4996 assert(Subtarget.hasNEON() && "VORRd requires NEON");
4997
4998 // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4999 DstReg = MI.getOperand(i: 0).getReg();
5000 SrcReg = MI.getOperand(i: 1).getReg();
5001
5002 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5003 MI.removeOperand(OpNo: i - 1);
5004
5005 // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
5006 MI.setDesc(get(Opcode: ARM::VORRd));
5007 MIB.addReg(RegNo: DstReg, flags: RegState::Define)
5008 .addReg(RegNo: SrcReg)
5009 .addReg(RegNo: SrcReg)
5010 .add(MOs: predOps(Pred: ARMCC::AL));
5011 break;
5012 case ARM::VMOVRS:
5013 if (Domain != ExeNEON)
5014 break;
5015 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
5016
5017 // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
5018 DstReg = MI.getOperand(i: 0).getReg();
5019 SrcReg = MI.getOperand(i: 1).getReg();
5020
5021 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5022 MI.removeOperand(OpNo: i - 1);
5023
5024 DReg = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane);
5025
5026 // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
5027 // Note that DSrc has been widened and the other lane may be undef, which
5028 // contaminates the entire register.
5029 MI.setDesc(get(Opcode: ARM::VGETLNi32));
5030 MIB.addReg(RegNo: DstReg, flags: RegState::Define)
5031 .addReg(RegNo: DReg, flags: RegState::Undef)
5032 .addImm(Val: Lane)
5033 .add(MOs: predOps(Pred: ARMCC::AL));
5034
5035 // The old source should be an implicit use, otherwise we might think it
5036 // was dead before here.
5037 MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit);
5038 break;
5039 case ARM::VMOVSR: {
5040 if (Domain != ExeNEON)
5041 break;
5042 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
5043
5044 // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
5045 DstReg = MI.getOperand(i: 0).getReg();
5046 SrcReg = MI.getOperand(i: 1).getReg();
5047
5048 DReg = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane);
5049
5050 MCRegister ImplicitSReg;
5051 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
5052 break;
5053
5054 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5055 MI.removeOperand(OpNo: i - 1);
5056
5057 // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
5058 // Again DDst may be undefined at the beginning of this instruction.
5059 MI.setDesc(get(Opcode: ARM::VSETLNi32));
5060 MIB.addReg(RegNo: DReg, flags: RegState::Define)
5061 .addReg(RegNo: DReg, flags: getUndefRegState(B: !MI.readsRegister(Reg: DReg, TRI)))
5062 .addReg(RegNo: SrcReg)
5063 .addImm(Val: Lane)
5064 .add(MOs: predOps(Pred: ARMCC::AL));
5065
5066 // The narrower destination must be marked as set to keep previous chains
5067 // in place.
5068 MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit);
5069 if (ImplicitSReg)
5070 MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit);
5071 break;
5072 }
5073 case ARM::VMOVS: {
5074 if (Domain != ExeNEON)
5075 break;
5076
5077 // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
5078 DstReg = MI.getOperand(i: 0).getReg();
5079 SrcReg = MI.getOperand(i: 1).getReg();
5080
5081 unsigned DstLane = 0, SrcLane = 0;
5082 MCRegister DDst, DSrc;
5083 DDst = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane&: DstLane);
5084 DSrc = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane&: SrcLane);
5085
5086 MCRegister ImplicitSReg;
5087 if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg: DSrc, Lane: SrcLane, ImplicitSReg))
5088 break;
5089
5090 for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
5091 MI.removeOperand(OpNo: i - 1);
5092
5093 if (DSrc == DDst) {
5094 // Destination can be:
5095 // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
5096 MI.setDesc(get(Opcode: ARM::VDUPLN32d));
5097 MIB.addReg(RegNo: DDst, flags: RegState::Define)
5098 .addReg(RegNo: DDst, flags: getUndefRegState(B: !MI.readsRegister(Reg: DDst, TRI)))
5099 .addImm(Val: SrcLane)
5100 .add(MOs: predOps(Pred: ARMCC::AL));
5101
5102 // Neither the source or the destination are naturally represented any
5103 // more, so add them in manually.
5104 MIB.addReg(RegNo: DstReg, flags: RegState::Implicit | RegState::Define);
5105 MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit);
5106 if (ImplicitSReg)
5107 MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit);
5108 break;
5109 }
5110
5111 // In general there's no single instruction that can perform an S <-> S
5112 // move in NEON space, but a pair of VEXT instructions *can* do the
5113 // job. It turns out that the VEXTs needed will only use DSrc once, with
5114 // the position based purely on the combination of lane-0 and lane-1
5115 // involved. For example
5116 // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
5117 // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
5118 // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
5119 // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
5120 //
5121 // Pattern of the MachineInstrs is:
5122 // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
5123 MachineInstrBuilder NewMIB;
5124 NewMIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::VEXTd32),
5125 DestReg: DDst);
5126
5127 // On the first instruction, both DSrc and DDst may be undef if present.
5128 // Specifically when the original instruction didn't have them as an
5129 // <imp-use>.
5130 MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
5131 bool CurUndef = !MI.readsRegister(Reg: CurReg, TRI);
5132 NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef));
5133
5134 CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
5135 CurUndef = !MI.readsRegister(Reg: CurReg, TRI);
5136 NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef))
5137 .addImm(Val: 1)
5138 .add(MOs: predOps(Pred: ARMCC::AL));
5139
5140 if (SrcLane == DstLane)
5141 NewMIB.addReg(RegNo: SrcReg, flags: RegState::Implicit);
5142
5143 MI.setDesc(get(Opcode: ARM::VEXTd32));
5144 MIB.addReg(RegNo: DDst, flags: RegState::Define);
5145
5146 // On the second instruction, DDst has definitely been defined above, so
5147 // it is not undef. DSrc, if present, can be undef as above.
5148 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
5149 CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI);
5150 MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef));
5151
5152 CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
5153 CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI);
5154 MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef))
5155 .addImm(Val: 1)
5156 .add(MOs: predOps(Pred: ARMCC::AL));
5157
5158 if (SrcLane != DstLane)
5159 MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit);
5160
5161 // As before, the original destination is no longer represented, add it
5162 // implicitly.
5163 MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit);
5164 if (ImplicitSReg != 0)
5165 MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit);
5166 break;
5167 }
5168 }
5169}
5170
5171//===----------------------------------------------------------------------===//
5172// Partial register updates
5173//===----------------------------------------------------------------------===//
5174//
5175// Swift renames NEON registers with 64-bit granularity. That means any
5176// instruction writing an S-reg implicitly reads the containing D-reg. The
5177// problem is mostly avoided by translating f32 operations to v2f32 operations
5178// on D-registers, but f32 loads are still a problem.
5179//
5180// These instructions can load an f32 into a NEON register:
5181//
5182// VLDRS - Only writes S, partial D update.
5183// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
5184// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
5185//
5186// FCONSTD can be used as a dependency-breaking instruction.
5187unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
5188 const MachineInstr &MI, unsigned OpNum,
5189 const TargetRegisterInfo *TRI) const {
5190 auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
5191 if (!PartialUpdateClearance)
5192 return 0;
5193
5194 assert(TRI && "Need TRI instance");
5195
5196 const MachineOperand &MO = MI.getOperand(i: OpNum);
5197 if (MO.readsReg())
5198 return 0;
5199 Register Reg = MO.getReg();
5200 int UseOp = -1;
5201
5202 switch (MI.getOpcode()) {
5203 // Normal instructions writing only an S-register.
5204 case ARM::VLDRS:
5205 case ARM::FCONSTS:
5206 case ARM::VMOVSR:
5207 case ARM::VMOVv8i8:
5208 case ARM::VMOVv4i16:
5209 case ARM::VMOVv2i32:
5210 case ARM::VMOVv2f32:
5211 case ARM::VMOVv1i64:
5212 UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, isKill: false);
5213 break;
5214
5215 // Explicitly reads the dependency.
5216 case ARM::VLD1LNd32:
5217 UseOp = 3;
5218 break;
5219 default:
5220 return 0;
5221 }
5222
5223 // If this instruction actually reads a value from Reg, there is no unwanted
5224 // dependency.
5225 if (UseOp != -1 && MI.getOperand(i: UseOp).readsReg())
5226 return 0;
5227
5228 // We must be able to clobber the whole D-reg.
5229 if (Reg.isVirtual()) {
5230 // Virtual register must be a def undef foo:ssub_0 operand.
5231 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
5232 return 0;
5233 } else if (ARM::SPRRegClass.contains(Reg)) {
5234 // Physical register: MI must define the full D-reg.
5235 MCRegister DReg =
5236 TRI->getMatchingSuperReg(Reg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass);
5237 if (!DReg || !MI.definesRegister(Reg: DReg, TRI))
5238 return 0;
5239 }
5240
5241 // MI has an unwanted D-register dependency.
5242 // Avoid defs in the previous N instructrions.
5243 return PartialUpdateClearance;
5244}
5245
5246// Break a partial register dependency after getPartialRegUpdateClearance
5247// returned non-zero.
5248void ARMBaseInstrInfo::breakPartialRegDependency(
5249 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
5250 assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
5251 assert(TRI && "Need TRI instance");
5252
5253 const MachineOperand &MO = MI.getOperand(i: OpNum);
5254 Register Reg = MO.getReg();
5255 assert(Reg.isPhysical() && "Can't break virtual register dependencies.");
5256 unsigned DReg = Reg;
5257
5258 // If MI defines an S-reg, find the corresponding D super-register.
5259 if (ARM::SPRRegClass.contains(Reg)) {
5260 DReg = ARM::D0 + (Reg - ARM::S0) / 2;
5261 assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
5262 }
5263
5264 assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
5265 assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
5266
5267 // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
5268 // the full D-register by loading the same value to both lanes. The
5269 // instruction is micro-coded with 2 uops, so don't do this until we can
5270 // properly schedule micro-coded instructions. The dispatcher stalls cause
5271 // too big regressions.
5272
5273 // Insert the dependency-breaking FCONSTD before MI.
5274 // 96 is the encoding of 0.5, but the actual value doesn't matter here.
5275 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::FCONSTD), DestReg: DReg)
5276 .addImm(Val: 96)
5277 .add(MOs: predOps(Pred: ARMCC::AL));
5278 MI.addRegisterKilled(IncomingReg: DReg, RegInfo: TRI, AddIfNotFound: true);
5279}
5280
5281bool ARMBaseInstrInfo::hasNOP() const {
5282 return Subtarget.hasFeature(Feature: ARM::HasV6KOps);
5283}
5284
5285bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
5286 if (MI->getNumOperands() < 4)
5287 return true;
5288 unsigned ShOpVal = MI->getOperand(i: 3).getImm();
5289 unsigned ShImm = ARM_AM::getSORegOffset(Op: ShOpVal);
5290 // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
5291 if ((ShImm == 1 && ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsr) ||
5292 ((ShImm == 1 || ShImm == 2) &&
5293 ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsl))
5294 return true;
5295
5296 return false;
5297}
5298
5299bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
5300 const MachineInstr &MI, unsigned DefIdx,
5301 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
5302 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5303 assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
5304
5305 switch (MI.getOpcode()) {
5306 case ARM::VMOVDRR:
5307 // dX = VMOVDRR rY, rZ
5308 // is the same as:
5309 // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
5310 // Populate the InputRegs accordingly.
5311 // rY
5312 const MachineOperand *MOReg = &MI.getOperand(i: 1);
5313 if (!MOReg->isUndef())
5314 InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(),
5315 MOReg->getSubReg(), ARM::ssub_0));
5316 // rZ
5317 MOReg = &MI.getOperand(i: 2);
5318 if (!MOReg->isUndef())
5319 InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(),
5320 MOReg->getSubReg(), ARM::ssub_1));
5321 return true;
5322 }
5323 llvm_unreachable("Target dependent opcode missing");
5324}
5325
5326bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
5327 const MachineInstr &MI, unsigned DefIdx,
5328 RegSubRegPairAndIdx &InputReg) const {
5329 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5330 assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
5331
5332 switch (MI.getOpcode()) {
5333 case ARM::VMOVRRD:
5334 // rX, rY = VMOVRRD dZ
5335 // is the same as:
5336 // rX = EXTRACT_SUBREG dZ, ssub_0
5337 // rY = EXTRACT_SUBREG dZ, ssub_1
5338 const MachineOperand &MOReg = MI.getOperand(i: 2);
5339 if (MOReg.isUndef())
5340 return false;
5341 InputReg.Reg = MOReg.getReg();
5342 InputReg.SubReg = MOReg.getSubReg();
5343 InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
5344 return true;
5345 }
5346 llvm_unreachable("Target dependent opcode missing");
5347}
5348
5349bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
5350 const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
5351 RegSubRegPairAndIdx &InsertedReg) const {
5352 assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
5353 assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
5354
5355 switch (MI.getOpcode()) {
5356 case ARM::VSETLNi32:
5357 case ARM::MVE_VMOV_to_lane_32:
5358 // dX = VSETLNi32 dY, rZ, imm
5359 // qX = MVE_VMOV_to_lane_32 qY, rZ, imm
5360 const MachineOperand &MOBaseReg = MI.getOperand(i: 1);
5361 const MachineOperand &MOInsertedReg = MI.getOperand(i: 2);
5362 if (MOInsertedReg.isUndef())
5363 return false;
5364 const MachineOperand &MOIndex = MI.getOperand(i: 3);
5365 BaseReg.Reg = MOBaseReg.getReg();
5366 BaseReg.SubReg = MOBaseReg.getSubReg();
5367
5368 InsertedReg.Reg = MOInsertedReg.getReg();
5369 InsertedReg.SubReg = MOInsertedReg.getSubReg();
5370 InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();
5371 return true;
5372 }
5373 llvm_unreachable("Target dependent opcode missing");
5374}
5375
5376std::pair<unsigned, unsigned>
5377ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5378 const unsigned Mask = ARMII::MO_OPTION_MASK;
5379 return std::make_pair(x: TF & Mask, y: TF & ~Mask);
5380}
5381
5382ArrayRef<std::pair<unsigned, const char *>>
5383ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5384 using namespace ARMII;
5385
5386 static const std::pair<unsigned, const char *> TargetFlags[] = {
5387 {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
5388 {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
5389 {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
5390 };
5391 return ArrayRef(TargetFlags);
5392}
5393
5394ArrayRef<std::pair<unsigned, const char *>>
5395ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5396 using namespace ARMII;
5397
5398 static const std::pair<unsigned, const char *> TargetFlags[] = {
5399 {MO_COFFSTUB, "arm-coffstub"},
5400 {MO_GOT, "arm-got"},
5401 {MO_SBREL, "arm-sbrel"},
5402 {MO_DLLIMPORT, "arm-dllimport"},
5403 {MO_SECREL, "arm-secrel"},
5404 {MO_NONLAZY, "arm-nonlazy"}};
5405 return ArrayRef(TargetFlags);
5406}
5407
5408std::optional<RegImmPair>
5409ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
5410 int Sign = 1;
5411 unsigned Opcode = MI.getOpcode();
5412 int64_t Offset = 0;
5413
5414 // TODO: Handle cases where Reg is a super- or sub-register of the
5415 // destination register.
5416 const MachineOperand &Op0 = MI.getOperand(i: 0);
5417 if (!Op0.isReg() || Reg != Op0.getReg())
5418 return std::nullopt;
5419
5420 // We describe SUBri or ADDri instructions.
5421 if (Opcode == ARM::SUBri)
5422 Sign = -1;
5423 else if (Opcode != ARM::ADDri)
5424 return std::nullopt;
5425
5426 // TODO: Third operand can be global address (usually some string). Since
5427 // strings can be relocated we cannot calculate their offsets for
5428 // now.
5429 if (!MI.getOperand(i: 1).isReg() || !MI.getOperand(i: 2).isImm())
5430 return std::nullopt;
5431
5432 Offset = MI.getOperand(i: 2).getImm() * Sign;
5433 return RegImmPair{MI.getOperand(i: 1).getReg(), Offset};
5434}
5435
5436bool llvm::registerDefinedBetween(unsigned Reg,
5437 MachineBasicBlock::iterator From,
5438 MachineBasicBlock::iterator To,
5439 const TargetRegisterInfo *TRI) {
5440 for (auto I = From; I != To; ++I)
5441 if (I->modifiesRegister(Reg, TRI))
5442 return true;
5443 return false;
5444}
5445
5446MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
5447 const TargetRegisterInfo *TRI) {
5448 // Search backwards to the instruction that defines CSPR. This may or not
5449 // be a CMP, we check that after this loop. If we find another instruction
5450 // that reads cpsr, we return nullptr.
5451 MachineBasicBlock::iterator CmpMI = Br;
5452 while (CmpMI != Br->getParent()->begin()) {
5453 --CmpMI;
5454 if (CmpMI->modifiesRegister(Reg: ARM::CPSR, TRI))
5455 break;
5456 if (CmpMI->readsRegister(Reg: ARM::CPSR, TRI))
5457 break;
5458 }
5459
5460 // Check that this inst is a CMP r[0-7], #0 and that the register
5461 // is not redefined between the cmp and the br.
5462 if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
5463 return nullptr;
5464 Register Reg = CmpMI->getOperand(i: 0).getReg();
5465 Register PredReg;
5466 ARMCC::CondCodes Pred = getInstrPredicate(MI: *CmpMI, PredReg);
5467 if (Pred != ARMCC::AL || CmpMI->getOperand(i: 1).getImm() != 0)
5468 return nullptr;
5469 if (!isARMLowRegister(Reg))
5470 return nullptr;
5471 if (registerDefinedBetween(Reg, From: CmpMI->getNextNode(), To: Br, TRI))
5472 return nullptr;
5473
5474 return &*CmpMI;
5475}
5476
5477unsigned llvm::ConstantMaterializationCost(unsigned Val,
5478 const ARMSubtarget *Subtarget,
5479 bool ForCodesize) {
5480 if (Subtarget->isThumb()) {
5481 if (Val <= 255) // MOV
5482 return ForCodesize ? 2 : 1;
5483 if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV
5484 ARM_AM::getT2SOImmVal(Arg: Val) != -1 || // MOVW
5485 ARM_AM::getT2SOImmVal(Arg: ~Val) != -1)) // MVN
5486 return ForCodesize ? 4 : 1;
5487 if (Val <= 510) // MOV + ADDi8
5488 return ForCodesize ? 4 : 2;
5489 if (~Val <= 255) // MOV + MVN
5490 return ForCodesize ? 4 : 2;
5491 if (ARM_AM::isThumbImmShiftedVal(V: Val)) // MOV + LSL
5492 return ForCodesize ? 4 : 2;
5493 } else {
5494 if (ARM_AM::getSOImmVal(Arg: Val) != -1) // MOV
5495 return ForCodesize ? 4 : 1;
5496 if (ARM_AM::getSOImmVal(Arg: ~Val) != -1) // MVN
5497 return ForCodesize ? 4 : 1;
5498 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW
5499 return ForCodesize ? 4 : 1;
5500 if (ARM_AM::isSOImmTwoPartVal(V: Val)) // two instrs
5501 return ForCodesize ? 8 : 2;
5502 if (ARM_AM::isSOImmTwoPartValNeg(V: Val)) // two instrs
5503 return ForCodesize ? 8 : 2;
5504 }
5505 if (Subtarget->useMovt()) // MOVW + MOVT
5506 return ForCodesize ? 8 : 2;
5507 return ForCodesize ? 8 : 3; // Literal pool load
5508}
5509
5510bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
5511 const ARMSubtarget *Subtarget,
5512 bool ForCodesize) {
5513 // Check with ForCodesize
5514 unsigned Cost1 = ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize);
5515 unsigned Cost2 = ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize);
5516 if (Cost1 < Cost2)
5517 return true;
5518 if (Cost1 > Cost2)
5519 return false;
5520
5521 // If they are equal, try with !ForCodesize
5522 return ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize: !ForCodesize) <
5523 ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize: !ForCodesize);
5524}
5525
5526/// Constants defining how certain sequences should be outlined.
5527/// This encompasses how an outlined function should be called, and what kind of
5528/// frame should be emitted for that outlined function.
5529///
5530/// \p MachineOutlinerTailCall implies that the function is being created from
5531/// a sequence of instructions ending in a return.
5532///
5533/// That is,
5534///
5535/// I1 OUTLINED_FUNCTION:
5536/// I2 --> B OUTLINED_FUNCTION I1
5537/// BX LR I2
5538/// BX LR
5539///
5540/// +-------------------------+--------+-----+
5541/// | | Thumb2 | ARM |
5542/// +-------------------------+--------+-----+
5543/// | Call overhead in Bytes | 4 | 4 |
5544/// | Frame overhead in Bytes | 0 | 0 |
5545/// | Stack fixup required | No | No |
5546/// +-------------------------+--------+-----+
5547///
5548/// \p MachineOutlinerThunk implies that the function is being created from
5549/// a sequence of instructions ending in a call. The outlined function is
5550/// called with a BL instruction, and the outlined function tail-calls the
5551/// original call destination.
5552///
5553/// That is,
5554///
5555/// I1 OUTLINED_FUNCTION:
5556/// I2 --> BL OUTLINED_FUNCTION I1
5557/// BL f I2
5558/// B f
5559///
5560/// +-------------------------+--------+-----+
5561/// | | Thumb2 | ARM |
5562/// +-------------------------+--------+-----+
5563/// | Call overhead in Bytes | 4 | 4 |
5564/// | Frame overhead in Bytes | 0 | 0 |
5565/// | Stack fixup required | No | No |
5566/// +-------------------------+--------+-----+
5567///
5568/// \p MachineOutlinerNoLRSave implies that the function should be called using
5569/// a BL instruction, but doesn't require LR to be saved and restored. This
5570/// happens when LR is known to be dead.
5571///
5572/// That is,
5573///
5574/// I1 OUTLINED_FUNCTION:
5575/// I2 --> BL OUTLINED_FUNCTION I1
5576/// I3 I2
5577/// I3
5578/// BX LR
5579///
5580/// +-------------------------+--------+-----+
5581/// | | Thumb2 | ARM |
5582/// +-------------------------+--------+-----+
5583/// | Call overhead in Bytes | 4 | 4 |
5584/// | Frame overhead in Bytes | 2 | 4 |
5585/// | Stack fixup required | No | No |
5586/// +-------------------------+--------+-----+
5587///
5588/// \p MachineOutlinerRegSave implies that the function should be called with a
5589/// save and restore of LR to an available register. This allows us to avoid
5590/// stack fixups. Note that this outlining variant is compatible with the
5591/// NoLRSave case.
5592///
5593/// That is,
5594///
5595/// I1 Save LR OUTLINED_FUNCTION:
5596/// I2 --> BL OUTLINED_FUNCTION I1
5597/// I3 Restore LR I2
5598/// I3
5599/// BX LR
5600///
5601/// +-------------------------+--------+-----+
5602/// | | Thumb2 | ARM |
5603/// +-------------------------+--------+-----+
5604/// | Call overhead in Bytes | 8 | 12 |
5605/// | Frame overhead in Bytes | 2 | 4 |
5606/// | Stack fixup required | No | No |
5607/// +-------------------------+--------+-----+
5608///
5609/// \p MachineOutlinerDefault implies that the function should be called with
5610/// a save and restore of LR to the stack.
5611///
5612/// That is,
5613///
5614/// I1 Save LR OUTLINED_FUNCTION:
5615/// I2 --> BL OUTLINED_FUNCTION I1
5616/// I3 Restore LR I2
5617/// I3
5618/// BX LR
5619///
5620/// +-------------------------+--------+-----+
5621/// | | Thumb2 | ARM |
5622/// +-------------------------+--------+-----+
5623/// | Call overhead in Bytes | 8 | 12 |
5624/// | Frame overhead in Bytes | 2 | 4 |
5625/// | Stack fixup required | Yes | Yes |
5626/// +-------------------------+--------+-----+
5627
5628enum MachineOutlinerClass {
5629 MachineOutlinerTailCall,
5630 MachineOutlinerThunk,
5631 MachineOutlinerNoLRSave,
5632 MachineOutlinerRegSave,
5633 MachineOutlinerDefault
5634};
5635
5636enum MachineOutlinerMBBFlags {
5637 LRUnavailableSomewhere = 0x2,
5638 HasCalls = 0x4,
5639 UnsafeRegsDead = 0x8
5640};
5641
5642struct OutlinerCosts {
5643 int CallTailCall;
5644 int FrameTailCall;
5645 int CallThunk;
5646 int FrameThunk;
5647 int CallNoLRSave;
5648 int FrameNoLRSave;
5649 int CallRegSave;
5650 int FrameRegSave;
5651 int CallDefault;
5652 int FrameDefault;
5653 int SaveRestoreLROnStack;
5654
5655 OutlinerCosts(const ARMSubtarget &target)
5656 : CallTailCall(target.isThumb() ? 4 : 4),
5657 FrameTailCall(target.isThumb() ? 0 : 0),
5658 CallThunk(target.isThumb() ? 4 : 4),
5659 FrameThunk(target.isThumb() ? 0 : 0),
5660 CallNoLRSave(target.isThumb() ? 4 : 4),
5661 FrameNoLRSave(target.isThumb() ? 2 : 4),
5662 CallRegSave(target.isThumb() ? 8 : 12),
5663 FrameRegSave(target.isThumb() ? 2 : 4),
5664 CallDefault(target.isThumb() ? 8 : 12),
5665 FrameDefault(target.isThumb() ? 2 : 4),
5666 SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
5667};
5668
5669Register
5670ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
5671 MachineFunction *MF = C.getMF();
5672 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
5673 const ARMBaseRegisterInfo *ARI =
5674 static_cast<const ARMBaseRegisterInfo *>(&TRI);
5675
5676 BitVector regsReserved = ARI->getReservedRegs(MF: *MF);
5677 // Check if there is an available register across the sequence that we can
5678 // use.
5679 for (Register Reg : ARM::rGPRRegClass) {
5680 if (!(Reg < regsReserved.size() && regsReserved.test(Idx: Reg)) &&
5681 Reg != ARM::LR && // LR is not reserved, but don't use it.
5682 Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
5683 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
5684 C.isAvailableInsideSeq(Reg, TRI))
5685 return Reg;
5686 }
5687 return Register();
5688}
5689
5690// Compute liveness of LR at the point after the interval [I, E), which
5691// denotes a *backward* iteration through instructions. Used only for return
5692// basic blocks, which do not end with a tail call.
5693static bool isLRAvailable(const TargetRegisterInfo &TRI,
5694 MachineBasicBlock::reverse_iterator I,
5695 MachineBasicBlock::reverse_iterator E) {
5696 // At the end of the function LR dead.
5697 bool Live = false;
5698 for (; I != E; ++I) {
5699 const MachineInstr &MI = *I;
5700
5701 // Check defs of LR.
5702 if (MI.modifiesRegister(Reg: ARM::LR, TRI: &TRI))
5703 Live = false;
5704
5705 // Check uses of LR.
5706 unsigned Opcode = MI.getOpcode();
5707 if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
5708 Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
5709 Opcode == ARM::tBXNS_RET) {
5710 // These instructions use LR, but it's not an (explicit or implicit)
5711 // operand.
5712 Live = true;
5713 continue;
5714 }
5715 if (MI.readsRegister(Reg: ARM::LR, TRI: &TRI))
5716 Live = true;
5717 }
5718 return !Live;
5719}
5720
5721std::optional<std::unique_ptr<outliner::OutlinedFunction>>
5722ARMBaseInstrInfo::getOutliningCandidateInfo(
5723 const MachineModuleInfo &MMI,
5724 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
5725 unsigned MinRepeats) const {
5726 unsigned SequenceSize = 0;
5727 for (auto &MI : RepeatedSequenceLocs[0])
5728 SequenceSize += getInstSizeInBytes(MI);
5729
5730 // Properties about candidate MBBs that hold for all of them.
5731 unsigned FlagsSetInAll = 0xF;
5732
5733 // Compute liveness information for each candidate, and set FlagsSetInAll.
5734 const TargetRegisterInfo &TRI = getRegisterInfo();
5735 for (outliner::Candidate &C : RepeatedSequenceLocs)
5736 FlagsSetInAll &= C.Flags;
5737
5738 // According to the ARM Procedure Call Standard, the following are
5739 // undefined on entry/exit from a function call:
5740 //
5741 // * Register R12(IP),
5742 // * Condition codes (and thus the CPSR register)
5743 //
5744 // Since we control the instructions which are part of the outlined regions
5745 // we don't need to be fully compliant with the AAPCS, but we have to
5746 // guarantee that if a veneer is inserted at link time the code is still
5747 // correct. Because of this, we can't outline any sequence of instructions
5748 // where one of these registers is live into/across it. Thus, we need to
5749 // delete those candidates.
5750 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5751 // If the unsafe registers in this block are all dead, then we don't need
5752 // to compute liveness here.
5753 if (C.Flags & UnsafeRegsDead)
5754 return false;
5755 return C.isAnyUnavailableAcrossOrOutOfSeq(Regs: {ARM::R12, ARM::CPSR}, TRI);
5756 };
5757
5758 // Are there any candidates where those registers are live?
5759 if (!(FlagsSetInAll & UnsafeRegsDead)) {
5760 // Erase every candidate that violates the restrictions above. (It could be
5761 // true that we have viable candidates, so it's not worth bailing out in
5762 // the case that, say, 1 out of 20 candidates violate the restructions.)
5763 llvm::erase_if(C&: RepeatedSequenceLocs, P: CantGuaranteeValueAcrossCall);
5764
5765 // If the sequence doesn't have enough candidates left, then we're done.
5766 if (RepeatedSequenceLocs.size() < MinRepeats)
5767 return std::nullopt;
5768 }
5769
5770 // We expect the majority of the outlining candidates to be in consensus with
5771 // regard to return address sign and authentication, and branch target
5772 // enforcement, in other words, partitioning according to all the four
5773 // possible combinations of PAC-RET and BTI is going to yield one big subset
5774 // and three small (likely empty) subsets. That allows us to cull incompatible
5775 // candidates separately for PAC-RET and BTI.
5776
5777 // Partition the candidates in two sets: one with BTI enabled and one with BTI
5778 // disabled. Remove the candidates from the smaller set. If they are the same
5779 // number prefer the non-BTI ones for outlining, since they have less
5780 // overhead.
5781 auto NoBTI =
5782 llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) {
5783 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5784 return AFI.branchTargetEnforcement();
5785 });
5786 if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoBTI) >
5787 std::distance(first: NoBTI, last: RepeatedSequenceLocs.end()))
5788 RepeatedSequenceLocs.erase(first: NoBTI, last: RepeatedSequenceLocs.end());
5789 else
5790 RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoBTI);
5791
5792 if (RepeatedSequenceLocs.size() < MinRepeats)
5793 return std::nullopt;
5794
5795 // Likewise, partition the candidates according to PAC-RET enablement.
5796 auto NoPAC =
5797 llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) {
5798 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
5799 // If the function happens to not spill the LR, do not disqualify it
5800 // from the outlining.
5801 return AFI.shouldSignReturnAddress(SpillsLR: true);
5802 });
5803 if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoPAC) >
5804 std::distance(first: NoPAC, last: RepeatedSequenceLocs.end()))
5805 RepeatedSequenceLocs.erase(first: NoPAC, last: RepeatedSequenceLocs.end());
5806 else
5807 RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoPAC);
5808
5809 if (RepeatedSequenceLocs.size() < MinRepeats)
5810 return std::nullopt;
5811
5812 // At this point, we have only "safe" candidates to outline. Figure out
5813 // frame + call instruction information.
5814
5815 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
5816
5817 // Helper lambda which sets call information for every candidate.
5818 auto SetCandidateCallInfo =
5819 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5820 for (outliner::Candidate &C : RepeatedSequenceLocs)
5821 C.setCallInfo(CID: CallID, CO: NumBytesForCall);
5822 };
5823
5824 OutlinerCosts Costs(Subtarget);
5825
5826 const auto &SomeMFI =
5827 *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
5828 // Adjust costs to account for the BTI instructions.
5829 if (SomeMFI.branchTargetEnforcement()) {
5830 Costs.FrameDefault += 4;
5831 Costs.FrameNoLRSave += 4;
5832 Costs.FrameRegSave += 4;
5833 Costs.FrameTailCall += 4;
5834 Costs.FrameThunk += 4;
5835 }
5836
5837 // Adjust costs to account for sign and authentication instructions.
5838 if (SomeMFI.shouldSignReturnAddress(SpillsLR: true)) {
5839 Costs.CallDefault += 8; // +PAC instr, +AUT instr
5840 Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr
5841 }
5842
5843 unsigned FrameID = MachineOutlinerDefault;
5844 unsigned NumBytesToCreateFrame = Costs.FrameDefault;
5845
5846 // If the last instruction in any candidate is a terminator, then we should
5847 // tail call all of the candidates.
5848 if (RepeatedSequenceLocs[0].back().isTerminator()) {
5849 FrameID = MachineOutlinerTailCall;
5850 NumBytesToCreateFrame = Costs.FrameTailCall;
5851 SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
5852 } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5853 LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
5854 LastInstrOpcode == ARM::tBLXr ||
5855 LastInstrOpcode == ARM::tBLXr_noip ||
5856 LastInstrOpcode == ARM::tBLXi) {
5857 FrameID = MachineOutlinerThunk;
5858 NumBytesToCreateFrame = Costs.FrameThunk;
5859 SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
5860 } else {
5861 // We need to decide how to emit calls + frames. We can always emit the same
5862 // frame if we don't need to save to the stack. If we have to save to the
5863 // stack, then we need a different frame.
5864 unsigned NumBytesNoStackCalls = 0;
5865 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5866
5867 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5868 // LR liveness is overestimated in return blocks, unless they end with a
5869 // tail call.
5870 const auto Last = C.getMBB()->rbegin();
5871 const bool LRIsAvailable =
5872 C.getMBB()->isReturnBlock() && !Last->isCall()
5873 ? isLRAvailable(TRI, I: Last,
5874 E: (MachineBasicBlock::reverse_iterator)C.begin())
5875 : C.isAvailableAcrossAndOutOfSeq(Reg: ARM::LR, TRI);
5876 if (LRIsAvailable) {
5877 FrameID = MachineOutlinerNoLRSave;
5878 NumBytesNoStackCalls += Costs.CallNoLRSave;
5879 C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: Costs.CallNoLRSave);
5880 CandidatesWithoutStackFixups.push_back(x: C);
5881 }
5882
5883 // Is an unused register available? If so, we won't modify the stack, so
5884 // we can outline with the same frame type as those that don't save LR.
5885 else if (findRegisterToSaveLRTo(C)) {
5886 FrameID = MachineOutlinerRegSave;
5887 NumBytesNoStackCalls += Costs.CallRegSave;
5888 C.setCallInfo(CID: MachineOutlinerRegSave, CO: Costs.CallRegSave);
5889 CandidatesWithoutStackFixups.push_back(x: C);
5890 }
5891
5892 // Is SP used in the sequence at all? If not, we don't have to modify
5893 // the stack, so we are guaranteed to get the same frame.
5894 else if (C.isAvailableInsideSeq(Reg: ARM::SP, TRI)) {
5895 NumBytesNoStackCalls += Costs.CallDefault;
5896 C.setCallInfo(CID: MachineOutlinerDefault, CO: Costs.CallDefault);
5897 CandidatesWithoutStackFixups.push_back(x: C);
5898 }
5899
5900 // If we outline this, we need to modify the stack. Pretend we don't
5901 // outline this by saving all of its bytes.
5902 else
5903 NumBytesNoStackCalls += SequenceSize;
5904 }
5905
5906 // If there are no places where we have to save LR, then note that we don't
5907 // have to update the stack. Otherwise, give every candidate the default
5908 // call type
5909 if (NumBytesNoStackCalls <=
5910 RepeatedSequenceLocs.size() * Costs.CallDefault) {
5911 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5912 FrameID = MachineOutlinerNoLRSave;
5913 if (RepeatedSequenceLocs.size() < MinRepeats)
5914 return std::nullopt;
5915 } else
5916 SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
5917 }
5918
5919 // Does every candidate's MBB contain a call? If so, then we might have a
5920 // call in the range.
5921 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5922 // check if the range contains a call. These require a save + restore of
5923 // the link register.
5924 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5925 if (any_of(Range: drop_end(RangeOrContainer&: FirstCand),
5926 P: [](const MachineInstr &MI) { return MI.isCall(); }))
5927 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5928
5929 // Handle the last instruction separately. If it is tail call, then the
5930 // last instruction is a call, we don't want to save + restore in this
5931 // case. However, it could be possible that the last instruction is a
5932 // call without it being valid to tail call this sequence. We should
5933 // consider this as well.
5934 else if (FrameID != MachineOutlinerThunk &&
5935 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
5936 NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
5937 }
5938
5939 return std::make_unique<outliner::OutlinedFunction>(
5940 args&: RepeatedSequenceLocs, args&: SequenceSize, args&: NumBytesToCreateFrame, args&: FrameID);
5941}
5942
5943bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
5944 int64_t Fixup,
5945 bool Updt) const {
5946 int SPIdx = MI->findRegisterUseOperandIdx(Reg: ARM::SP, /*TRI=*/nullptr);
5947 unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
5948 if (SPIdx < 0)
5949 // No SP operand
5950 return true;
5951 else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
5952 // If SP is not the base register we can't do much
5953 return false;
5954
5955 // Stack might be involved but addressing mode doesn't handle any offset.
5956 // Rq: AddrModeT1_[1|2|4] don't operate on SP
5957 if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
5958 AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
5959 AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
5960 AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
5961 AddrMode == ARMII::AddrModeT2_pc || // PCrel access
5962 AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
5963 AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
5964 AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
5965 AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
5966 AddrMode == ARMII::AddrModeNone ||
5967 AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
5968 AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
5969 return false;
5970
5971 unsigned NumOps = MI->getDesc().getNumOperands();
5972 unsigned ImmIdx = NumOps - 3;
5973
5974 const MachineOperand &Offset = MI->getOperand(i: ImmIdx);
5975 assert(Offset.isImm() && "Is not an immediate");
5976 int64_t OffVal = Offset.getImm();
5977
5978 if (OffVal < 0)
5979 // Don't override data if the are below SP.
5980 return false;
5981
5982 unsigned NumBits = 0;
5983 unsigned Scale = 1;
5984
5985 switch (AddrMode) {
5986 case ARMII::AddrMode3:
5987 if (ARM_AM::getAM3Op(AM3Opc: OffVal) == ARM_AM::sub)
5988 return false;
5989 OffVal = ARM_AM::getAM3Offset(AM3Opc: OffVal);
5990 NumBits = 8;
5991 break;
5992 case ARMII::AddrMode5:
5993 if (ARM_AM::getAM5Op(AM5Opc: OffVal) == ARM_AM::sub)
5994 return false;
5995 OffVal = ARM_AM::getAM5Offset(AM5Opc: OffVal);
5996 NumBits = 8;
5997 Scale = 4;
5998 break;
5999 case ARMII::AddrMode5FP16:
6000 if (ARM_AM::getAM5FP16Op(AM5Opc: OffVal) == ARM_AM::sub)
6001 return false;
6002 OffVal = ARM_AM::getAM5FP16Offset(AM5Opc: OffVal);
6003 NumBits = 8;
6004 Scale = 2;
6005 break;
6006 case ARMII::AddrModeT2_i8pos:
6007 NumBits = 8;
6008 break;
6009 case ARMII::AddrModeT2_i8s4:
6010 // FIXME: Values are already scaled in this addressing mode.
6011 assert((Fixup & 3) == 0 && "Can't encode this offset!");
6012 NumBits = 10;
6013 break;
6014 case ARMII::AddrModeT2_ldrex:
6015 NumBits = 8;
6016 Scale = 4;
6017 break;
6018 case ARMII::AddrModeT2_i12:
6019 case ARMII::AddrMode_i12:
6020 NumBits = 12;
6021 break;
6022 case ARMII::AddrModeT1_s: // SP-relative LD/ST
6023 NumBits = 8;
6024 Scale = 4;
6025 break;
6026 default:
6027 llvm_unreachable("Unsupported addressing mode!");
6028 }
6029 // Make sure the offset is encodable for instructions that scale the
6030 // immediate.
6031 assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
6032 "Can't encode this offset!");
6033 OffVal += Fixup / Scale;
6034
6035 unsigned Mask = (1 << NumBits) - 1;
6036
6037 if (OffVal <= Mask) {
6038 if (Updt)
6039 MI->getOperand(i: ImmIdx).setImm(OffVal);
6040 return true;
6041 }
6042
6043 return false;
6044}
6045
6046void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(
6047 Function &F, std::vector<outliner::Candidate> &Candidates) const {
6048 outliner::Candidate &C = Candidates.front();
6049 // branch-target-enforcement is guaranteed to be consistent between all
6050 // candidates, so we only need to look at one.
6051 const Function &CFn = C.getMF()->getFunction();
6052 if (CFn.hasFnAttribute(Kind: "branch-target-enforcement"))
6053 F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "branch-target-enforcement"));
6054
6055 if (CFn.hasFnAttribute(Kind: "sign-return-address"))
6056 F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address"));
6057
6058 ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
6059}
6060
6061bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
6062 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6063 const Function &F = MF.getFunction();
6064
6065 // Can F be deduplicated by the linker? If it can, don't outline from it.
6066 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6067 return false;
6068
6069 // Don't outline from functions with section markings; the program could
6070 // expect that all the code is in the named section.
6071 // FIXME: Allow outlining from multiple functions with the same section
6072 // marking.
6073 if (F.hasSection())
6074 return false;
6075
6076 // FIXME: Thumb1 outlining is not handled
6077 if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
6078 return false;
6079
6080 // It's safe to outline from MF.
6081 return true;
6082}
6083
6084bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
6085 unsigned &Flags) const {
6086 // Check if LR is available through all of the MBB. If it's not, then set
6087 // a flag.
6088 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6089 "Suitable Machine Function for outlining must track liveness");
6090
6091 LiveRegUnits LRU(getRegisterInfo());
6092
6093 for (MachineInstr &MI : llvm::reverse(C&: MBB))
6094 LRU.accumulate(MI);
6095
6096 // Check if each of the unsafe registers are available...
6097 bool R12AvailableInBlock = LRU.available(Reg: ARM::R12);
6098 bool CPSRAvailableInBlock = LRU.available(Reg: ARM::CPSR);
6099
6100 // If all of these are dead (and not live out), we know we don't have to check
6101 // them later.
6102 if (R12AvailableInBlock && CPSRAvailableInBlock)
6103 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6104
6105 // Now, add the live outs to the set.
6106 LRU.addLiveOuts(MBB);
6107
6108 // If any of these registers is available in the MBB, but also a live out of
6109 // the block, then we know outlining is unsafe.
6110 if (R12AvailableInBlock && !LRU.available(Reg: ARM::R12))
6111 return false;
6112 if (CPSRAvailableInBlock && !LRU.available(Reg: ARM::CPSR))
6113 return false;
6114
6115 // Check if there's a call inside this MachineBasicBlock. If there is, then
6116 // set a flag.
6117 if (any_of(Range&: MBB, P: [](MachineInstr &MI) { return MI.isCall(); }))
6118 Flags |= MachineOutlinerMBBFlags::HasCalls;
6119
6120 // LR liveness is overestimated in return blocks.
6121
6122 bool LRIsAvailable =
6123 MBB.isReturnBlock() && !MBB.back().isCall()
6124 ? isLRAvailable(TRI: getRegisterInfo(), I: MBB.rbegin(), E: MBB.rend())
6125 : LRU.available(Reg: ARM::LR);
6126 if (!LRIsAvailable)
6127 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6128
6129 return true;
6130}
6131
6132outliner::InstrType
6133ARMBaseInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
6134 MachineBasicBlock::iterator &MIT,
6135 unsigned Flags) const {
6136 MachineInstr &MI = *MIT;
6137 const TargetRegisterInfo *TRI = &getRegisterInfo();
6138
6139 // PIC instructions contain labels, outlining them would break offset
6140 // computing. unsigned Opc = MI.getOpcode();
6141 unsigned Opc = MI.getOpcode();
6142 if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
6143 Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
6144 Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
6145 Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
6146 Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
6147 Opc == ARM::t2MOV_ga_pcrel)
6148 return outliner::InstrType::Illegal;
6149
6150 // Be conservative with ARMv8.1 MVE instructions.
6151 if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
6152 Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
6153 Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||
6154 Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
6155 Opc == ARM::t2LoopEndDec)
6156 return outliner::InstrType::Illegal;
6157
6158 const MCInstrDesc &MCID = MI.getDesc();
6159 uint64_t MIFlags = MCID.TSFlags;
6160 if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
6161 return outliner::InstrType::Illegal;
6162
6163 // Is this a terminator for a basic block?
6164 if (MI.isTerminator())
6165 // TargetInstrInfo::getOutliningType has already filtered out anything
6166 // that would break this, so we can allow it here.
6167 return outliner::InstrType::Legal;
6168
6169 // Don't outline if link register or program counter value are used.
6170 if (MI.readsRegister(Reg: ARM::LR, TRI) || MI.readsRegister(Reg: ARM::PC, TRI))
6171 return outliner::InstrType::Illegal;
6172
6173 if (MI.isCall()) {
6174 // Get the function associated with the call. Look at each operand and find
6175 // the one that represents the calle and get its name.
6176 const Function *Callee = nullptr;
6177 for (const MachineOperand &MOP : MI.operands()) {
6178 if (MOP.isGlobal()) {
6179 Callee = dyn_cast<Function>(Val: MOP.getGlobal());
6180 break;
6181 }
6182 }
6183
6184 // Dont't outline calls to "mcount" like functions, in particular Linux
6185 // kernel function tracing relies on it.
6186 if (Callee &&
6187 (Callee->getName() == "\01__gnu_mcount_nc" ||
6188 Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
6189 return outliner::InstrType::Illegal;
6190
6191 // If we don't know anything about the callee, assume it depends on the
6192 // stack layout of the caller. In that case, it's only legal to outline
6193 // as a tail-call. Explicitly list the call instructions we know about so
6194 // we don't get unexpected results with call pseudo-instructions.
6195 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6196 if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
6197 Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
6198 Opc == ARM::tBLXi)
6199 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6200
6201 if (!Callee)
6202 return UnknownCallOutlineType;
6203
6204 // We have a function we have information about. Check if it's something we
6205 // can safely outline.
6206 MachineFunction *CalleeMF = MMI.getMachineFunction(F: *Callee);
6207
6208 // We don't know what's going on with the callee at all. Don't touch it.
6209 if (!CalleeMF)
6210 return UnknownCallOutlineType;
6211
6212 // Check if we know anything about the callee saves on the function. If we
6213 // don't, then don't touch it, since that implies that we haven't computed
6214 // anything about its stack frame yet.
6215 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6216 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6217 MFI.getNumObjects() > 0)
6218 return UnknownCallOutlineType;
6219
6220 // At this point, we can say that CalleeMF ought to not pass anything on the
6221 // stack. Therefore, we can outline it.
6222 return outliner::InstrType::Legal;
6223 }
6224
6225 // Since calls are handled, don't touch LR or PC
6226 if (MI.modifiesRegister(Reg: ARM::LR, TRI) || MI.modifiesRegister(Reg: ARM::PC, TRI))
6227 return outliner::InstrType::Illegal;
6228
6229 // Does this use the stack?
6230 if (MI.modifiesRegister(Reg: ARM::SP, TRI) || MI.readsRegister(Reg: ARM::SP, TRI)) {
6231 // True if there is no chance that any outlined candidate from this range
6232 // could require stack fixups. That is, both
6233 // * LR is available in the range (No save/restore around call)
6234 // * The range doesn't include calls (No save/restore in outlined frame)
6235 // are true.
6236 // These conditions also ensure correctness of the return address
6237 // authentication - we insert sign and authentication instructions only if
6238 // we save/restore LR on stack, but then this condition ensures that the
6239 // outlined range does not modify the SP, therefore the SP value used for
6240 // signing is the same as the one used for authentication.
6241 // FIXME: This is very restrictive; the flags check the whole block,
6242 // not just the bit we will try to outline.
6243 bool MightNeedStackFixUp =
6244 (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
6245 MachineOutlinerMBBFlags::HasCalls));
6246
6247 if (!MightNeedStackFixUp)
6248 return outliner::InstrType::Legal;
6249
6250 // Any modification of SP will break our code to save/restore LR.
6251 // FIXME: We could handle some instructions which add a constant offset to
6252 // SP, with a bit more work.
6253 if (MI.modifiesRegister(Reg: ARM::SP, TRI))
6254 return outliner::InstrType::Illegal;
6255
6256 // At this point, we have a stack instruction that we might need to fix up.
6257 // up. We'll handle it if it's a load or store.
6258 if (checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(),
6259 Updt: false))
6260 return outliner::InstrType::Legal;
6261
6262 // We can't fix it up, so don't outline it.
6263 return outliner::InstrType::Illegal;
6264 }
6265
6266 // Be conservative with IT blocks.
6267 if (MI.readsRegister(Reg: ARM::ITSTATE, TRI) ||
6268 MI.modifiesRegister(Reg: ARM::ITSTATE, TRI))
6269 return outliner::InstrType::Illegal;
6270
6271 // Don't outline CFI instructions.
6272 if (MI.isCFIInstruction())
6273 return outliner::InstrType::Illegal;
6274
6275 return outliner::InstrType::Legal;
6276}
6277
6278void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6279 for (MachineInstr &MI : MBB) {
6280 checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), Updt: true);
6281 }
6282}
6283
6284void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
6285 MachineBasicBlock::iterator It, bool CFI,
6286 bool Auth) const {
6287 int Align = std::max(a: Subtarget.getStackAlignment().value(), b: uint64_t(8));
6288 unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;
6289 assert(Align >= 8 && Align <= 256);
6290 if (Auth) {
6291 assert(Subtarget.isThumb2());
6292 // Compute PAC in R12. Outlining ensures R12 is dead across the outlined
6293 // sequence.
6294 BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2PAC)).setMIFlags(MIFlags);
6295 BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2STRD_PRE), DestReg: ARM::SP)
6296 .addReg(RegNo: ARM::R12, flags: RegState::Kill)
6297 .addReg(RegNo: ARM::LR, flags: RegState::Kill)
6298 .addReg(RegNo: ARM::SP)
6299 .addImm(Val: -Align)
6300 .add(MOs: predOps(Pred: ARMCC::AL))
6301 .setMIFlags(MIFlags);
6302 } else {
6303 unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
6304 BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::SP)
6305 .addReg(RegNo: ARM::LR, flags: RegState::Kill)
6306 .addReg(RegNo: ARM::SP)
6307 .addImm(Val: -Align)
6308 .add(MOs: predOps(Pred: ARMCC::AL))
6309 .setMIFlags(MIFlags);
6310 }
6311
6312 if (!CFI)
6313 return;
6314
6315 // Add a CFI, saying CFA is offset by Align bytes from SP.
6316 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
6317 CFIBuilder.buildDefCFAOffset(Offset: Align);
6318
6319 // Add a CFI saying that the LR that we want to find is now higher than
6320 // before.
6321 int LROffset = Auth ? Align - 4 : Align;
6322 CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -LROffset);
6323 if (Auth) {
6324 // Add a CFI for the location of the return adddress PAC.
6325 CFIBuilder.buildOffset(Reg: ARM::RA_AUTH_CODE, Offset: -Align);
6326 }
6327}
6328
6329void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,
6330 MachineBasicBlock::iterator It,
6331 bool CFI, bool Auth) const {
6332 int Align = Subtarget.getStackAlignment().value();
6333 unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;
6334 if (Auth) {
6335 assert(Subtarget.isThumb2());
6336 // Restore return address PAC and LR.
6337 BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2LDRD_POST))
6338 .addReg(RegNo: ARM::R12, flags: RegState::Define)
6339 .addReg(RegNo: ARM::LR, flags: RegState::Define)
6340 .addReg(RegNo: ARM::SP, flags: RegState::Define)
6341 .addReg(RegNo: ARM::SP)
6342 .addImm(Val: Align)
6343 .add(MOs: predOps(Pred: ARMCC::AL))
6344 .setMIFlags(MIFlags);
6345 // LR authentication is after the CFI instructions, below.
6346 } else {
6347 unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
6348 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::LR)
6349 .addReg(RegNo: ARM::SP, flags: RegState::Define)
6350 .addReg(RegNo: ARM::SP);
6351 if (!Subtarget.isThumb())
6352 MIB.addReg(RegNo: 0);
6353 MIB.addImm(Val: Subtarget.getStackAlignment().value())
6354 .add(MOs: predOps(Pred: ARMCC::AL))
6355 .setMIFlags(MIFlags);
6356 }
6357
6358 if (CFI) {
6359 // Now stack has moved back up and we have restored LR.
6360 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy);
6361 CFIBuilder.buildDefCFAOffset(Offset: 0);
6362 CFIBuilder.buildRestore(Reg: ARM::LR);
6363 if (Auth)
6364 CFIBuilder.buildUndefined(Reg: ARM::RA_AUTH_CODE);
6365 }
6366
6367 if (Auth)
6368 BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2AUT));
6369}
6370
6371void ARMBaseInstrInfo::buildOutlinedFrame(
6372 MachineBasicBlock &MBB, MachineFunction &MF,
6373 const outliner::OutlinedFunction &OF) const {
6374 // For thunk outlining, rewrite the last instruction from a call to a
6375 // tail-call.
6376 if (OF.FrameConstructionID == MachineOutlinerThunk) {
6377 MachineInstr *Call = &*--MBB.instr_end();
6378 bool isThumb = Subtarget.isThumb();
6379 unsigned FuncOp = isThumb ? 2 : 0;
6380 unsigned Opc = Call->getOperand(i: FuncOp).isReg()
6381 ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
6382 : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
6383 : ARM::tTAILJMPdND
6384 : ARM::TAILJMPd;
6385 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Opc))
6386 .add(MO: Call->getOperand(i: FuncOp));
6387 if (isThumb && !Call->getOperand(i: FuncOp).isReg())
6388 MIB.add(MOs: predOps(Pred: ARMCC::AL));
6389 Call->eraseFromParent();
6390 }
6391
6392 // Is there a call in the outlined range?
6393 auto IsNonTailCall = [](MachineInstr &MI) {
6394 return MI.isCall() && !MI.isReturn();
6395 };
6396 if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) {
6397 MachineBasicBlock::iterator It = MBB.begin();
6398 MachineBasicBlock::iterator Et = MBB.end();
6399
6400 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6401 OF.FrameConstructionID == MachineOutlinerThunk)
6402 Et = std::prev(x: MBB.end());
6403
6404 // We have to save and restore LR, we need to add it to the liveins if it
6405 // is not already part of the set. This is suffient since outlined
6406 // functions only have one block.
6407 if (!MBB.isLiveIn(Reg: ARM::LR))
6408 MBB.addLiveIn(PhysReg: ARM::LR);
6409
6410 // Insert a save before the outlined region
6411 bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(SpillsLR: true);
6412 saveLROnStack(MBB, It, CFI: true, Auth);
6413
6414 // Fix up the instructions in the range, since we're going to modify the
6415 // stack.
6416 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6417 "Can only fix up stack references once");
6418 fixupPostOutline(MBB);
6419
6420 // Insert a restore before the terminator for the function. Restore LR.
6421 restoreLRFromStack(MBB, It: Et, CFI: true, Auth);
6422 }
6423
6424 // If this is a tail call outlined function, then there's already a return.
6425 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6426 OF.FrameConstructionID == MachineOutlinerThunk)
6427 return;
6428
6429 // Here we have to insert the return ourselves. Get the correct opcode from
6430 // current feature set.
6431 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Subtarget.getReturnOpcode()))
6432 .add(MOs: predOps(Pred: ARMCC::AL));
6433
6434 // Did we have to modify the stack by saving the link register?
6435 if (OF.FrameConstructionID != MachineOutlinerDefault &&
6436 OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
6437 return;
6438
6439 // We modified the stack.
6440 // Walk over the basic block and fix up all the stack accesses.
6441 fixupPostOutline(MBB);
6442}
6443
6444MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
6445 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6446 MachineFunction &MF, outliner::Candidate &C) const {
6447 MachineInstrBuilder MIB;
6448 MachineBasicBlock::iterator CallPt;
6449 unsigned Opc;
6450 bool isThumb = Subtarget.isThumb();
6451
6452 // Are we tail calling?
6453 if (C.CallConstructionID == MachineOutlinerTailCall) {
6454 // If yes, then we can just branch to the label.
6455 Opc = isThumb
6456 ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
6457 : ARM::TAILJMPd;
6458 MIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc))
6459 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()));
6460 if (isThumb)
6461 MIB.add(MOs: predOps(Pred: ARMCC::AL));
6462 It = MBB.insert(I: It, MI: MIB);
6463 return It;
6464 }
6465
6466 // Create the call instruction.
6467 Opc = isThumb ? ARM::tBL : ARM::BL;
6468 MachineInstrBuilder CallMIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc));
6469 if (isThumb)
6470 CallMIB.add(MOs: predOps(Pred: ARMCC::AL));
6471 CallMIB.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()));
6472
6473 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6474 C.CallConstructionID == MachineOutlinerThunk) {
6475 // No, so just insert the call.
6476 It = MBB.insert(I: It, MI: CallMIB);
6477 return It;
6478 }
6479
6480 const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
6481 // Can we save to a register?
6482 if (C.CallConstructionID == MachineOutlinerRegSave) {
6483 Register Reg = findRegisterToSaveLRTo(C);
6484 assert(Reg != 0 && "No callee-saved register available?");
6485
6486 // Save and restore LR from that register.
6487 copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: Reg, SrcReg: ARM::LR, KillSrc: true);
6488 if (!AFI.isLRSpilled())
6489 CFIInstBuilder(MBB, It, MachineInstr::FrameSetup)
6490 .buildRegister(Reg1: ARM::LR, Reg2: Reg);
6491 CallPt = MBB.insert(I: It, MI: CallMIB);
6492 copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: ARM::LR, SrcReg: Reg, KillSrc: true);
6493 if (!AFI.isLRSpilled())
6494 CFIInstBuilder(MBB, It, MachineInstr::FrameDestroy).buildRestore(Reg: ARM::LR);
6495 It--;
6496 return CallPt;
6497 }
6498 // We have the default case. Save and restore from SP.
6499 if (!MBB.isLiveIn(Reg: ARM::LR))
6500 MBB.addLiveIn(PhysReg: ARM::LR);
6501 bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(SpillsLR: true);
6502 saveLROnStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth);
6503 CallPt = MBB.insert(I: It, MI: CallMIB);
6504 restoreLRFromStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth);
6505 It--;
6506 return CallPt;
6507}
6508
6509bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
6510 MachineFunction &MF) const {
6511 return Subtarget.isMClass() && MF.getFunction().hasMinSize();
6512}
6513
6514bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
6515 const MachineInstr &MI) const {
6516 // Try hard to rematerialize any VCTPs because if we spill P0, it will block
6517 // the tail predication conversion. This means that the element count
6518 // register has to be live for longer, but that has to be better than
6519 // spill/restore and VPT predication.
6520 return (isVCTP(MI: &MI) && !isPredicated(MI)) ||
6521 TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
6522}
6523
6524unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
6525 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
6526 : ARM::BLX;
6527}
6528
6529unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {
6530 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
6531 : ARM::tBLXr;
6532}
6533
6534unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
6535 return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
6536 : ARM::BLX_pred;
6537}
6538
6539namespace {
6540class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
6541 MachineInstr *EndLoop, *LoopCount;
6542 MachineFunction *MF;
6543 const TargetInstrInfo *TII;
6544
6545 // Bitset[0 .. MAX_STAGES-1] ... iterations needed
6546 // [LAST_IS_USE] : last reference to register in schedule is a use
6547 // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
6548 static int constexpr MAX_STAGES = 30;
6549 static int constexpr LAST_IS_USE = MAX_STAGES;
6550 static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
6551 typedef std::bitset<MAX_STAGES + 2> IterNeed;
6552 typedef std::map<unsigned, IterNeed> IterNeeds;
6553
6554 void bumpCrossIterationPressure(RegPressureTracker &RPT,
6555 const IterNeeds &CIN);
6556 bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
6557
6558 // Meanings of the various stuff with loop types:
6559 // t2Bcc:
6560 // EndLoop = branch at end of original BB that will become a kernel
6561 // LoopCount = CC setter live into branch
6562 // t2LoopEnd:
6563 // EndLoop = branch at end of original BB
6564 // LoopCount = t2LoopDec
6565public:
6566 ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
6567 : EndLoop(EndLoop), LoopCount(LoopCount),
6568 MF(EndLoop->getParent()->getParent()),
6569 TII(MF->getSubtarget().getInstrInfo()) {}
6570
6571 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
6572 // Only ignore the terminator.
6573 return MI == EndLoop || MI == LoopCount;
6574 }
6575
6576 bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
6577 if (tooMuchRegisterPressure(SSD, SMS))
6578 return false;
6579
6580 return true;
6581 }
6582
6583 std::optional<bool> createTripCountGreaterCondition(
6584 int TC, MachineBasicBlock &MBB,
6585 SmallVectorImpl<MachineOperand> &Cond) override {
6586
6587 if (isCondBranchOpcode(Opc: EndLoop->getOpcode())) {
6588 Cond.push_back(Elt: EndLoop->getOperand(i: 1));
6589 Cond.push_back(Elt: EndLoop->getOperand(i: 2));
6590 if (EndLoop->getOperand(i: 0).getMBB() == EndLoop->getParent()) {
6591 TII->reverseBranchCondition(Cond);
6592 }
6593 return {};
6594 } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
6595 // General case just lets the unrolled t2LoopDec do the subtraction and
6596 // therefore just needs to check if zero has been reached.
6597 MachineInstr *LoopDec = nullptr;
6598 for (auto &I : MBB.instrs())
6599 if (I.getOpcode() == ARM::t2LoopDec)
6600 LoopDec = &I;
6601 assert(LoopDec && "Unable to find copied LoopDec");
6602 // Check if we're done with the loop.
6603 BuildMI(BB: &MBB, MIMD: LoopDec->getDebugLoc(), MCID: TII->get(Opcode: ARM::t2CMPri))
6604 .addReg(RegNo: LoopDec->getOperand(i: 0).getReg())
6605 .addImm(Val: 0)
6606 .addImm(Val: ARMCC::AL)
6607 .addReg(RegNo: ARM::NoRegister);
6608 Cond.push_back(Elt: MachineOperand::CreateImm(Val: ARMCC::EQ));
6609 Cond.push_back(Elt: MachineOperand::CreateReg(Reg: ARM::CPSR, isDef: false));
6610 return {};
6611 } else
6612 llvm_unreachable("Unknown EndLoop");
6613 }
6614
6615 void setPreheader(MachineBasicBlock *NewPreheader) override {}
6616
6617 void adjustTripCount(int TripCountAdjust) override {}
6618};
6619
6620void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
6621 const IterNeeds &CIN) {
6622 // Increase pressure by the amounts in CrossIterationNeeds
6623 for (const auto &N : CIN) {
6624 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6625 for (int I = 0; I < Cnt; ++I)
6626 RPT.increaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getNone(),
6627 NewMask: LaneBitmask::getAll());
6628 }
6629 // Decrease pressure by the amounts in CrossIterationNeeds
6630 for (const auto &N : CIN) {
6631 int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
6632 for (int I = 0; I < Cnt; ++I)
6633 RPT.decreaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getAll(),
6634 NewMask: LaneBitmask::getNone());
6635 }
6636}
6637
6638bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
6639 SMSchedule &SMS) {
6640 IterNeeds CrossIterationNeeds;
6641
6642 // Determine which values will be loop-carried after the schedule is
6643 // applied
6644
6645 for (auto &SU : SSD.SUnits) {
6646 const MachineInstr *MI = SU.getInstr();
6647 int Stg = SMS.stageScheduled(SU: const_cast<SUnit *>(&SU));
6648 for (auto &S : SU.Succs)
6649 if (MI->isPHI() && S.getKind() == SDep::Anti) {
6650 Register Reg = S.getReg();
6651 if (Reg.isVirtual())
6652 CrossIterationNeeds[Reg.id()].set(position: 0);
6653 } else if (S.isAssignedRegDep()) {
6654 int OStg = SMS.stageScheduled(SU: S.getSUnit());
6655 if (OStg >= 0 && OStg != Stg) {
6656 Register Reg = S.getReg();
6657 if (Reg.isVirtual())
6658 CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1);
6659 }
6660 }
6661 }
6662
6663 // Determine more-or-less what the proposed schedule (reversed) is going to
6664 // be; it might not be quite the same because the within-cycle ordering
6665 // created by SMSchedule depends upon changes to help with address offsets and
6666 // the like.
6667 std::vector<SUnit *> ProposedSchedule;
6668 for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
6669 for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
6670 ++Stage) {
6671 std::deque<SUnit *> Instrs =
6672 SMS.getInstructions(cycle: Cycle + Stage * SMS.getInitiationInterval());
6673 std::sort(first: Instrs.begin(), last: Instrs.end(),
6674 comp: [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
6675 llvm::append_range(C&: ProposedSchedule, R&: Instrs);
6676 }
6677
6678 // Learn whether the last use/def of each cross-iteration register is a use or
6679 // def. If it is a def, RegisterPressure will implicitly increase max pressure
6680 // and we do not have to add the pressure.
6681 for (auto *SU : ProposedSchedule)
6682 for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
6683 ++OperI) {
6684 auto MO = *OperI;
6685 if (!MO.isReg() || !MO.getReg())
6686 continue;
6687 Register Reg = MO.getReg();
6688 auto CIter = CrossIterationNeeds.find(x: Reg.id());
6689 if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
6690 CIter->second[SEEN_AS_LIVE])
6691 continue;
6692 if (MO.isDef() && !MO.isDead())
6693 CIter->second.set(position: SEEN_AS_LIVE);
6694 else if (MO.isUse())
6695 CIter->second.set(position: LAST_IS_USE);
6696 }
6697 for (auto &CI : CrossIterationNeeds)
6698 CI.second.reset(position: LAST_IS_USE);
6699
6700 RegionPressure RecRegPressure;
6701 RegPressureTracker RPTracker(RecRegPressure);
6702 RegisterClassInfo RegClassInfo;
6703 RegClassInfo.runOnMachineFunction(MF: *MF);
6704 RPTracker.init(mf: MF, rci: &RegClassInfo, lis: nullptr, mbb: EndLoop->getParent(),
6705 pos: EndLoop->getParent()->end(), TrackLaneMasks: false, TrackUntiedDefs: false);
6706
6707 bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds);
6708
6709 for (auto *SU : ProposedSchedule) {
6710 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
6711 RPTracker.setPos(std::next(x: CurInstI));
6712 RPTracker.recede();
6713
6714 // Track what cross-iteration registers would be seen as live
6715 for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
6716 auto MO = *OperI;
6717 if (!MO.isReg() || !MO.getReg())
6718 continue;
6719 Register Reg = MO.getReg();
6720 if (MO.isDef() && !MO.isDead()) {
6721 auto CIter = CrossIterationNeeds.find(x: Reg.id());
6722 if (CIter != CrossIterationNeeds.end()) {
6723 CIter->second.reset(position: 0);
6724 CIter->second.reset(position: SEEN_AS_LIVE);
6725 }
6726 }
6727 }
6728 for (auto &S : SU->Preds) {
6729 auto Stg = SMS.stageScheduled(SU);
6730 if (S.isAssignedRegDep()) {
6731 Register Reg = S.getReg();
6732 auto CIter = CrossIterationNeeds.find(x: Reg.id());
6733 if (CIter != CrossIterationNeeds.end()) {
6734 auto Stg2 = SMS.stageScheduled(SU: const_cast<SUnit *>(S.getSUnit()));
6735 assert(Stg2 <= Stg && "Data dependence upon earlier stage");
6736 if (Stg - Stg2 < MAX_STAGES)
6737 CIter->second.set(position: Stg - Stg2);
6738 CIter->second.set(position: SEEN_AS_LIVE);
6739 }
6740 }
6741 }
6742
6743 bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds);
6744 }
6745
6746 auto &P = RPTracker.getPressure().MaxSetPressure;
6747 for (unsigned I = 0, E = P.size(); I < E; ++I) {
6748 // Exclude some Neon register classes.
6749 if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 ||
6750 I == ARM::DTriple_with_qsub_0_in_QPR)
6751 continue;
6752
6753 if (P[I] > RegClassInfo.getRegPressureSetLimit(Idx: I)) {
6754 return true;
6755 }
6756 }
6757 return false;
6758}
6759
6760} // namespace
6761
6762std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
6763ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
6764 MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
6765 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
6766 if (Preheader == LoopBB)
6767 Preheader = *std::next(x: LoopBB->pred_begin());
6768
6769 if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
6770 // If the branch is a Bcc, then the CPSR should be set somewhere within the
6771 // block. We need to determine the reaching definition of CPSR so that
6772 // it can be marked as non-pipelineable, allowing the pipeliner to force
6773 // it into stage 0 or give up if it cannot or will not do so.
6774 MachineInstr *CCSetter = nullptr;
6775 for (auto &L : LoopBB->instrs()) {
6776 if (L.isCall())
6777 return nullptr;
6778 if (isCPSRDefined(MI: L))
6779 CCSetter = &L;
6780 }
6781 if (CCSetter)
6782 return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: CCSetter);
6783 else
6784 return nullptr; // Unable to find the CC setter, so unable to guarantee
6785 // that pipeline will work
6786 }
6787
6788 // Recognize:
6789 // preheader:
6790 // %1 = t2DoopLoopStart %0
6791 // loop:
6792 // %2 = phi %1, <not loop>, %..., %loop
6793 // %3 = t2LoopDec %2, <imm>
6794 // t2LoopEnd %3, %loop
6795
6796 if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
6797 for (auto &L : LoopBB->instrs())
6798 if (L.isCall())
6799 return nullptr;
6800 else if (isVCTP(MI: &L))
6801 return nullptr;
6802 Register LoopDecResult = I->getOperand(i: 0).getReg();
6803 MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
6804 MachineInstr *LoopDec = MRI.getUniqueVRegDef(Reg: LoopDecResult);
6805 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
6806 return nullptr;
6807 MachineInstr *LoopStart = nullptr;
6808 for (auto &J : Preheader->instrs())
6809 if (J.getOpcode() == ARM::t2DoLoopStart)
6810 LoopStart = &J;
6811 if (!LoopStart)
6812 return nullptr;
6813 return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: LoopDec);
6814 }
6815 return nullptr;
6816}
6817