1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
15#include "AArch64MachineFunctionInfo.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
18#include "MCTargetDesc/AArch64AddressingModes.h"
19#include "MCTargetDesc/AArch64MCTargetDesc.h"
20#include "Utils/AArch64BaseInfo.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/CodeGen/CFIInstBuilder.h"
25#include "llvm/CodeGen/LivePhysRegs.h"
26#include "llvm/CodeGen/MachineBasicBlock.h"
27#include "llvm/CodeGen/MachineCombinerPattern.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/MachineModuleInfo.h"
34#include "llvm/CodeGen/MachineOperand.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/RegisterScavenging.h"
37#include "llvm/CodeGen/StackMaps.h"
38#include "llvm/CodeGen/TargetRegisterInfo.h"
39#include "llvm/CodeGen/TargetSubtargetInfo.h"
40#include "llvm/IR/DebugInfoMetadata.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/Module.h"
44#include "llvm/MC/MCAsmInfo.h"
45#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCInstBuilder.h"
47#include "llvm/MC/MCInstrDesc.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CodeGen.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/ErrorHandling.h"
52#include "llvm/Support/LEB128.h"
53#include "llvm/Support/MathExtras.h"
54#include "llvm/Target/TargetMachine.h"
55#include "llvm/Target/TargetOptions.h"
56#include <cassert>
57#include <cstdint>
58#include <iterator>
59#include <utility>
60
61using namespace llvm;
62
63#define GET_INSTRINFO_CTOR_DTOR
64#include "AArch64GenInstrInfo.inc"
65
66static cl::opt<unsigned>
67 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(Val: 9),
68 cl::desc("Restrict range of CB instructions (DEBUG)"));
69
70static cl::opt<unsigned> TBZDisplacementBits(
71 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(Val: 14),
72 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
73
74static cl::opt<unsigned> CBZDisplacementBits(
75 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(Val: 19),
76 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
77
78static cl::opt<unsigned>
79 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(Val: 19),
80 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
81
82static cl::opt<unsigned>
83 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(Val: 26),
84 cl::desc("Restrict range of B instructions (DEBUG)"));
85
86AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
87 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
88 AArch64::CATCHRET),
89 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
90
91/// GetInstSize - Return the number of bytes of code the specified
92/// instruction may be. This returns the maximum number of bytes.
93unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
94 const MachineBasicBlock &MBB = *MI.getParent();
95 const MachineFunction *MF = MBB.getParent();
96 const Function &F = MF->getFunction();
97 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
98
99 {
100 auto Op = MI.getOpcode();
101 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
102 return getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(), MAI: *MAI);
103 }
104
105 // Meta-instructions emit no code.
106 if (MI.isMetaInstruction())
107 return 0;
108
109 // FIXME: We currently only handle pseudoinstructions that don't get expanded
110 // before the assembly printer.
111 unsigned NumBytes = 0;
112 const MCInstrDesc &Desc = MI.getDesc();
113
114 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
115 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
116
117 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
118 if (!MFI->shouldSignReturnAddress(SpillsLR: MF))
119 return NumBytes;
120
121 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
122 auto Method = STI.getAuthenticatedLRCheckMethod(MF: *MF);
123 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
124 return NumBytes;
125 }
126
127 // Size should be preferably set in
128 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
129 // Specific cases handle instructions of variable sizes
130 switch (Desc.getOpcode()) {
131 default:
132 if (Desc.getSize())
133 return Desc.getSize();
134
135 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
136 // with fixed constant size but not specified in .td file) is a normal
137 // 4-byte insn.
138 NumBytes = 4;
139 break;
140 case TargetOpcode::STACKMAP:
141 // The upper bound for a stackmap intrinsic is the full length of its shadow
142 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
143 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
144 break;
145 case TargetOpcode::PATCHPOINT:
146 // The size of the patchpoint intrinsic is the number of bytes requested
147 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
148 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
149 break;
150 case TargetOpcode::STATEPOINT:
151 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
152 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
153 // No patch bytes means a normal call inst is emitted
154 if (NumBytes == 0)
155 NumBytes = 4;
156 break;
157 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
158 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
159 // instructions are expanded to the specified number of NOPs. Otherwise,
160 // they are expanded to 36-byte XRay sleds.
161 NumBytes =
162 F.getFnAttributeAsParsedInteger(Kind: "patchable-function-entry", Default: 9) * 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
165 case TargetOpcode::PATCHABLE_TAIL_CALL:
166 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
167 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
168 NumBytes = 36;
169 break;
170 case TargetOpcode::PATCHABLE_EVENT_CALL:
171 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
172 NumBytes = 24;
173 break;
174
175 case AArch64::SPACE:
176 NumBytes = MI.getOperand(i: 1).getImm();
177 break;
178 case TargetOpcode::BUNDLE:
179 NumBytes = getInstBundleLength(MI);
180 break;
181 }
182
183 return NumBytes;
184}
185
186unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
187 unsigned Size = 0;
188 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
189 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
190 while (++I != E && I->isInsideBundle()) {
191 assert(!I->isBundle() && "No nested bundle!");
192 Size += getInstSizeInBytes(MI: *I);
193 }
194 return Size;
195}
196
197static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
198 SmallVectorImpl<MachineOperand> &Cond) {
199 // Block ends with fall-through condbranch.
200 switch (LastInst->getOpcode()) {
201 default:
202 llvm_unreachable("Unknown branch instruction?");
203 case AArch64::Bcc:
204 Target = LastInst->getOperand(i: 1).getMBB();
205 Cond.push_back(Elt: LastInst->getOperand(i: 0));
206 break;
207 case AArch64::CBZW:
208 case AArch64::CBZX:
209 case AArch64::CBNZW:
210 case AArch64::CBNZX:
211 Target = LastInst->getOperand(i: 1).getMBB();
212 Cond.push_back(Elt: MachineOperand::CreateImm(Val: -1));
213 Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
214 Cond.push_back(Elt: LastInst->getOperand(i: 0));
215 break;
216 case AArch64::TBZW:
217 case AArch64::TBZX:
218 case AArch64::TBNZW:
219 case AArch64::TBNZX:
220 Target = LastInst->getOperand(i: 2).getMBB();
221 Cond.push_back(Elt: MachineOperand::CreateImm(Val: -1));
222 Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
223 Cond.push_back(Elt: LastInst->getOperand(i: 0));
224 Cond.push_back(Elt: LastInst->getOperand(i: 1));
225 break;
226 case AArch64::CBWPri:
227 case AArch64::CBXPri:
228 case AArch64::CBWPrr:
229 case AArch64::CBXPrr:
230 Target = LastInst->getOperand(i: 3).getMBB();
231 Cond.push_back(Elt: MachineOperand::CreateImm(Val: -1));
232 Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
233 Cond.push_back(Elt: LastInst->getOperand(i: 0));
234 Cond.push_back(Elt: LastInst->getOperand(i: 1));
235 Cond.push_back(Elt: LastInst->getOperand(i: 2));
236 break;
237 }
238}
239
240static unsigned getBranchDisplacementBits(unsigned Opc) {
241 switch (Opc) {
242 default:
243 llvm_unreachable("unexpected opcode!");
244 case AArch64::B:
245 return BDisplacementBits;
246 case AArch64::TBNZW:
247 case AArch64::TBZW:
248 case AArch64::TBNZX:
249 case AArch64::TBZX:
250 return TBZDisplacementBits;
251 case AArch64::CBNZW:
252 case AArch64::CBZW:
253 case AArch64::CBNZX:
254 case AArch64::CBZX:
255 return CBZDisplacementBits;
256 case AArch64::Bcc:
257 return BCCDisplacementBits;
258 case AArch64::CBWPri:
259 case AArch64::CBXPri:
260 case AArch64::CBWPrr:
261 case AArch64::CBXPrr:
262 return CBDisplacementBits;
263 }
264}
265
266bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
267 int64_t BrOffset) const {
268 unsigned Bits = getBranchDisplacementBits(Opc: BranchOp);
269 assert(Bits >= 3 && "max branch displacement must be enough to jump"
270 "over conditional branch expansion");
271 return isIntN(N: Bits, x: BrOffset / 4);
272}
273
274MachineBasicBlock *
275AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
276 switch (MI.getOpcode()) {
277 default:
278 llvm_unreachable("unexpected opcode!");
279 case AArch64::B:
280 return MI.getOperand(i: 0).getMBB();
281 case AArch64::TBZW:
282 case AArch64::TBNZW:
283 case AArch64::TBZX:
284 case AArch64::TBNZX:
285 return MI.getOperand(i: 2).getMBB();
286 case AArch64::CBZW:
287 case AArch64::CBNZW:
288 case AArch64::CBZX:
289 case AArch64::CBNZX:
290 case AArch64::Bcc:
291 return MI.getOperand(i: 1).getMBB();
292 case AArch64::CBWPri:
293 case AArch64::CBXPri:
294 case AArch64::CBWPrr:
295 case AArch64::CBXPrr:
296 return MI.getOperand(i: 3).getMBB();
297 }
298}
299
300void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
301 MachineBasicBlock &NewDestBB,
302 MachineBasicBlock &RestoreBB,
303 const DebugLoc &DL,
304 int64_t BrOffset,
305 RegScavenger *RS) const {
306 assert(RS && "RegScavenger required for long branching");
307 assert(MBB.empty() &&
308 "new block should be inserted for expanding unconditional branch");
309 assert(MBB.pred_size() == 1);
310 assert(RestoreBB.empty() &&
311 "restore block should be inserted for restoring clobbered registers");
312
313 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
314 // Offsets outside of the signed 33-bit range are not supported for ADRP +
315 // ADD.
316 if (!isInt<33>(x: BrOffset))
317 report_fatal_error(
318 reason: "Branch offsets outside of the signed 33-bit range not supported");
319
320 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::ADRP), DestReg: Reg)
321 .addSym(Sym: DestBB.getSymbol(), TargetFlags: AArch64II::MO_PAGE);
322 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::ADDXri), DestReg: Reg)
323 .addReg(RegNo: Reg)
324 .addSym(Sym: DestBB.getSymbol(), TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
325 .addImm(Val: 0);
326 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::BR)).addReg(RegNo: Reg);
327 };
328
329 RS->enterBasicBlockEnd(MBB);
330 // If X16 is unused, we can rely on the linker to insert a range extension
331 // thunk if NewDestBB is out of range of a single B instruction.
332 constexpr Register Reg = AArch64::X16;
333 if (!RS->isRegUsed(Reg)) {
334 insertUnconditionalBranch(MBB, DestBB: &NewDestBB, DL);
335 RS->setRegUsed(Reg);
336 return;
337 }
338
339 // If there's a free register and it's worth inflating the code size,
340 // manually insert the indirect branch.
341 Register Scavenged = RS->FindUnusedReg(RC: &AArch64::GPR64RegClass);
342 if (Scavenged != AArch64::NoRegister &&
343 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
344 buildIndirectBranch(Scavenged, NewDestBB);
345 RS->setRegUsed(Reg: Scavenged);
346 return;
347 }
348
349 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
350 // with red zones.
351 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
352 if (!AFI || AFI->hasRedZone().value_or(u: true))
353 report_fatal_error(
354 reason: "Unable to insert indirect branch inside function that has red zone");
355
356 // Otherwise, spill X16 and defer range extension to the linker.
357 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::STRXpre))
358 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
359 .addReg(RegNo: Reg)
360 .addReg(RegNo: AArch64::SP)
361 .addImm(Val: -16);
362
363 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::B)).addMBB(MBB: &RestoreBB);
364
365 BuildMI(BB&: RestoreBB, I: RestoreBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::LDRXpost))
366 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
367 .addReg(RegNo: Reg, flags: RegState::Define)
368 .addReg(RegNo: AArch64::SP)
369 .addImm(Val: 16);
370}
371
372// Branch analysis.
373bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
374 MachineBasicBlock *&TBB,
375 MachineBasicBlock *&FBB,
376 SmallVectorImpl<MachineOperand> &Cond,
377 bool AllowModify) const {
378 // If the block has no terminators, it just falls into the block after it.
379 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
380 if (I == MBB.end())
381 return false;
382
383 // Skip over SpeculationBarrierEndBB terminators
384 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
385 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
386 --I;
387 }
388
389 if (!isUnpredicatedTerminator(MI: *I))
390 return false;
391
392 // Get the last instruction in the block.
393 MachineInstr *LastInst = &*I;
394
395 // If there is only one terminator instruction, process it.
396 unsigned LastOpc = LastInst->getOpcode();
397 if (I == MBB.begin() || !isUnpredicatedTerminator(MI: *--I)) {
398 if (isUncondBranchOpcode(Opc: LastOpc)) {
399 TBB = LastInst->getOperand(i: 0).getMBB();
400 return false;
401 }
402 if (isCondBranchOpcode(Opc: LastOpc)) {
403 // Block ends with fall-through condbranch.
404 parseCondBranch(LastInst, Target&: TBB, Cond);
405 return false;
406 }
407 return true; // Can't handle indirect branch.
408 }
409
410 // Get the instruction before it if it is a terminator.
411 MachineInstr *SecondLastInst = &*I;
412 unsigned SecondLastOpc = SecondLastInst->getOpcode();
413
414 // If AllowModify is true and the block ends with two or more unconditional
415 // branches, delete all but the first unconditional branch.
416 if (AllowModify && isUncondBranchOpcode(Opc: LastOpc)) {
417 while (isUncondBranchOpcode(Opc: SecondLastOpc)) {
418 LastInst->eraseFromParent();
419 LastInst = SecondLastInst;
420 LastOpc = LastInst->getOpcode();
421 if (I == MBB.begin() || !isUnpredicatedTerminator(MI: *--I)) {
422 // Return now the only terminator is an unconditional branch.
423 TBB = LastInst->getOperand(i: 0).getMBB();
424 return false;
425 }
426 SecondLastInst = &*I;
427 SecondLastOpc = SecondLastInst->getOpcode();
428 }
429 }
430
431 // If we're allowed to modify and the block ends in a unconditional branch
432 // which could simply fallthrough, remove the branch. (Note: This case only
433 // matters when we can't understand the whole sequence, otherwise it's also
434 // handled by BranchFolding.cpp.)
435 if (AllowModify && isUncondBranchOpcode(Opc: LastOpc) &&
436 MBB.isLayoutSuccessor(MBB: getBranchDestBlock(MI: *LastInst))) {
437 LastInst->eraseFromParent();
438 LastInst = SecondLastInst;
439 LastOpc = LastInst->getOpcode();
440 if (I == MBB.begin() || !isUnpredicatedTerminator(MI: *--I)) {
441 assert(!isUncondBranchOpcode(LastOpc) &&
442 "unreachable unconditional branches removed above");
443
444 if (isCondBranchOpcode(Opc: LastOpc)) {
445 // Block ends with fall-through condbranch.
446 parseCondBranch(LastInst, Target&: TBB, Cond);
447 return false;
448 }
449 return true; // Can't handle indirect branch.
450 }
451 SecondLastInst = &*I;
452 SecondLastOpc = SecondLastInst->getOpcode();
453 }
454
455 // If there are three terminators, we don't know what sort of block this is.
456 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(MI: *--I))
457 return true;
458
459 // If the block ends with a B and a Bcc, handle it.
460 if (isCondBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
461 parseCondBranch(LastInst: SecondLastInst, Target&: TBB, Cond);
462 FBB = LastInst->getOperand(i: 0).getMBB();
463 return false;
464 }
465
466 // If the block ends with two unconditional branches, handle it. The second
467 // one is not executed, so remove it.
468 if (isUncondBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
469 TBB = SecondLastInst->getOperand(i: 0).getMBB();
470 I = LastInst;
471 if (AllowModify)
472 I->eraseFromParent();
473 return false;
474 }
475
476 // ...likewise if it ends with an indirect branch followed by an unconditional
477 // branch.
478 if (isIndirectBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
479 I = LastInst;
480 if (AllowModify)
481 I->eraseFromParent();
482 return true;
483 }
484
485 // Otherwise, can't handle this.
486 return true;
487}
488
489bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
490 MachineBranchPredicate &MBP,
491 bool AllowModify) const {
492 // For the moment, handle only a block which ends with a cb(n)zx followed by
493 // a fallthrough. Why this? Because it is a common form.
494 // TODO: Should we handle b.cc?
495
496 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
497 if (I == MBB.end())
498 return true;
499
500 // Skip over SpeculationBarrierEndBB terminators
501 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
502 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
503 --I;
504 }
505
506 if (!isUnpredicatedTerminator(MI: *I))
507 return true;
508
509 // Get the last instruction in the block.
510 MachineInstr *LastInst = &*I;
511 unsigned LastOpc = LastInst->getOpcode();
512 if (!isCondBranchOpcode(Opc: LastOpc))
513 return true;
514
515 switch (LastOpc) {
516 default:
517 return true;
518 case AArch64::CBZW:
519 case AArch64::CBZX:
520 case AArch64::CBNZW:
521 case AArch64::CBNZX:
522 break;
523 };
524
525 MBP.TrueDest = LastInst->getOperand(i: 1).getMBB();
526 assert(MBP.TrueDest && "expected!");
527 MBP.FalseDest = MBB.getNextNode();
528
529 MBP.ConditionDef = nullptr;
530 MBP.SingleUseCondition = false;
531
532 MBP.LHS = LastInst->getOperand(i: 0);
533 MBP.RHS = MachineOperand::CreateImm(Val: 0);
534 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
535 : MachineBranchPredicate::PRED_EQ;
536 return false;
537}
538
539bool AArch64InstrInfo::reverseBranchCondition(
540 SmallVectorImpl<MachineOperand> &Cond) const {
541 if (Cond[0].getImm() != -1) {
542 // Regular Bcc
543 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
544 Cond[0].setImm(AArch64CC::getInvertedCondCode(Code: CC));
545 } else {
546 // Folded compare-and-branch
547 switch (Cond[1].getImm()) {
548 default:
549 llvm_unreachable("Unknown conditional branch!");
550 case AArch64::CBZW:
551 Cond[1].setImm(AArch64::CBNZW);
552 break;
553 case AArch64::CBNZW:
554 Cond[1].setImm(AArch64::CBZW);
555 break;
556 case AArch64::CBZX:
557 Cond[1].setImm(AArch64::CBNZX);
558 break;
559 case AArch64::CBNZX:
560 Cond[1].setImm(AArch64::CBZX);
561 break;
562 case AArch64::TBZW:
563 Cond[1].setImm(AArch64::TBNZW);
564 break;
565 case AArch64::TBNZW:
566 Cond[1].setImm(AArch64::TBZW);
567 break;
568 case AArch64::TBZX:
569 Cond[1].setImm(AArch64::TBNZX);
570 break;
571 case AArch64::TBNZX:
572 Cond[1].setImm(AArch64::TBZX);
573 break;
574
575 // Cond is { -1, Opcode, CC, Op0, Op1 }
576 case AArch64::CBWPri:
577 case AArch64::CBXPri:
578 case AArch64::CBWPrr:
579 case AArch64::CBXPrr: {
580 // Pseudos using standard 4bit Arm condition codes
581 AArch64CC::CondCode CC =
582 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
583 Cond[2].setImm(AArch64CC::getInvertedCondCode(Code: CC));
584 }
585 }
586 }
587
588 return false;
589}
590
591unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
592 int *BytesRemoved) const {
593 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
594 if (I == MBB.end())
595 return 0;
596
597 if (!isUncondBranchOpcode(Opc: I->getOpcode()) &&
598 !isCondBranchOpcode(Opc: I->getOpcode()))
599 return 0;
600
601 // Remove the branch.
602 I->eraseFromParent();
603
604 I = MBB.end();
605
606 if (I == MBB.begin()) {
607 if (BytesRemoved)
608 *BytesRemoved = 4;
609 return 1;
610 }
611 --I;
612 if (!isCondBranchOpcode(Opc: I->getOpcode())) {
613 if (BytesRemoved)
614 *BytesRemoved = 4;
615 return 1;
616 }
617
618 // Remove the branch.
619 I->eraseFromParent();
620 if (BytesRemoved)
621 *BytesRemoved = 8;
622
623 return 2;
624}
625
626void AArch64InstrInfo::instantiateCondBranch(
627 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
628 ArrayRef<MachineOperand> Cond) const {
629 if (Cond[0].getImm() != -1) {
630 // Regular Bcc
631 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: AArch64::Bcc)).addImm(Val: Cond[0].getImm()).addMBB(MBB: TBB);
632 } else {
633 // Folded compare-and-branch
634 // Note that we use addOperand instead of addReg to keep the flags.
635
636 // cbz, cbnz
637 const MachineInstrBuilder MIB =
638 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[1].getImm())).add(MO: Cond[2]);
639
640 // tbz/tbnz
641 if (Cond.size() > 3)
642 MIB.add(MO: Cond[3]);
643
644 // cb
645 if (Cond.size() > 4)
646 MIB.add(MO: Cond[4]);
647
648 MIB.addMBB(MBB: TBB);
649 }
650}
651
652unsigned AArch64InstrInfo::insertBranch(
653 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
654 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
655 // Shouldn't be a fall through.
656 assert(TBB && "insertBranch must not be told to insert a fallthrough");
657
658 if (!FBB) {
659 if (Cond.empty()) // Unconditional branch?
660 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: AArch64::B)).addMBB(MBB: TBB);
661 else
662 instantiateCondBranch(MBB, DL, TBB, Cond);
663
664 if (BytesAdded)
665 *BytesAdded = 4;
666
667 return 1;
668 }
669
670 // Two-way conditional branch.
671 instantiateCondBranch(MBB, DL, TBB, Cond);
672 BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: AArch64::B)).addMBB(MBB: FBB);
673
674 if (BytesAdded)
675 *BytesAdded = 8;
676
677 return 2;
678}
679
680// Find the original register that VReg is copied from.
681static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
682 while (Register::isVirtualRegister(Reg: VReg)) {
683 const MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
684 if (!DefMI->isFullCopy())
685 return VReg;
686 VReg = DefMI->getOperand(i: 1).getReg();
687 }
688 return VReg;
689}
690
691// Determine if VReg is defined by an instruction that can be folded into a
692// csel instruction. If so, return the folded opcode, and the replacement
693// register.
694static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
695 unsigned *NewVReg = nullptr) {
696 VReg = removeCopies(MRI, VReg);
697 if (!Register::isVirtualRegister(Reg: VReg))
698 return 0;
699
700 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(RC: MRI.getRegClass(Reg: VReg));
701 const MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
702 unsigned Opc = 0;
703 unsigned SrcOpNum = 0;
704 switch (DefMI->getOpcode()) {
705 case AArch64::ADDSXri:
706 case AArch64::ADDSWri:
707 // if NZCV is used, do not fold.
708 if (DefMI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr,
709 isDead: true) == -1)
710 return 0;
711 // fall-through to ADDXri and ADDWri.
712 [[fallthrough]];
713 case AArch64::ADDXri:
714 case AArch64::ADDWri:
715 // add x, 1 -> csinc.
716 if (!DefMI->getOperand(i: 2).isImm() || DefMI->getOperand(i: 2).getImm() != 1 ||
717 DefMI->getOperand(i: 3).getImm() != 0)
718 return 0;
719 SrcOpNum = 1;
720 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
721 break;
722
723 case AArch64::ORNXrr:
724 case AArch64::ORNWrr: {
725 // not x -> csinv, represented as orn dst, xzr, src.
726 unsigned ZReg = removeCopies(MRI, VReg: DefMI->getOperand(i: 1).getReg());
727 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
728 return 0;
729 SrcOpNum = 2;
730 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
731 break;
732 }
733
734 case AArch64::SUBSXrr:
735 case AArch64::SUBSWrr:
736 // if NZCV is used, do not fold.
737 if (DefMI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr,
738 isDead: true) == -1)
739 return 0;
740 // fall-through to SUBXrr and SUBWrr.
741 [[fallthrough]];
742 case AArch64::SUBXrr:
743 case AArch64::SUBWrr: {
744 // neg x -> csneg, represented as sub dst, xzr, src.
745 unsigned ZReg = removeCopies(MRI, VReg: DefMI->getOperand(i: 1).getReg());
746 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
747 return 0;
748 SrcOpNum = 2;
749 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
750 break;
751 }
752 default:
753 return 0;
754 }
755 assert(Opc && SrcOpNum && "Missing parameters");
756
757 if (NewVReg)
758 *NewVReg = DefMI->getOperand(i: SrcOpNum).getReg();
759 return Opc;
760}
761
762bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
763 ArrayRef<MachineOperand> Cond,
764 Register DstReg, Register TrueReg,
765 Register FalseReg, int &CondCycles,
766 int &TrueCycles,
767 int &FalseCycles) const {
768 // Check register classes.
769 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
770 const TargetRegisterClass *RC =
771 RI.getCommonSubClass(A: MRI.getRegClass(Reg: TrueReg), B: MRI.getRegClass(Reg: FalseReg));
772 if (!RC)
773 return false;
774
775 // Also need to check the dest regclass, in case we're trying to optimize
776 // something like:
777 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
778 if (!RI.getCommonSubClass(A: RC, B: MRI.getRegClass(Reg: DstReg)))
779 return false;
780
781 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
782 unsigned ExtraCondLat = Cond.size() != 1;
783
784 // GPRs are handled by csel.
785 // FIXME: Fold in x+1, -x, and ~x when applicable.
786 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
787 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
788 // Single-cycle csel, csinc, csinv, and csneg.
789 CondCycles = 1 + ExtraCondLat;
790 TrueCycles = FalseCycles = 1;
791 if (canFoldIntoCSel(MRI, VReg: TrueReg))
792 TrueCycles = 0;
793 else if (canFoldIntoCSel(MRI, VReg: FalseReg))
794 FalseCycles = 0;
795 return true;
796 }
797
798 // Scalar floating point is handled by fcsel.
799 // FIXME: Form fabs, fmin, and fmax when applicable.
800 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
801 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
802 CondCycles = 5 + ExtraCondLat;
803 TrueCycles = FalseCycles = 2;
804 return true;
805 }
806
807 // Can't do vectors.
808 return false;
809}
810
811void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
812 MachineBasicBlock::iterator I,
813 const DebugLoc &DL, Register DstReg,
814 ArrayRef<MachineOperand> Cond,
815 Register TrueReg, Register FalseReg) const {
816 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
817
818 // Parse the condition code, see parseCondBranch() above.
819 AArch64CC::CondCode CC;
820 switch (Cond.size()) {
821 default:
822 llvm_unreachable("Unknown condition opcode in Cond");
823 case 1: // b.cc
824 CC = AArch64CC::CondCode(Cond[0].getImm());
825 break;
826 case 3: { // cbz/cbnz
827 // We must insert a compare against 0.
828 bool Is64Bit;
829 switch (Cond[1].getImm()) {
830 default:
831 llvm_unreachable("Unknown branch opcode in Cond");
832 case AArch64::CBZW:
833 Is64Bit = false;
834 CC = AArch64CC::EQ;
835 break;
836 case AArch64::CBZX:
837 Is64Bit = true;
838 CC = AArch64CC::EQ;
839 break;
840 case AArch64::CBNZW:
841 Is64Bit = false;
842 CC = AArch64CC::NE;
843 break;
844 case AArch64::CBNZX:
845 Is64Bit = true;
846 CC = AArch64CC::NE;
847 break;
848 }
849 Register SrcReg = Cond[2].getReg();
850 if (Is64Bit) {
851 // cmp reg, #0 is actually subs xzr, reg, #0.
852 MRI.constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR64spRegClass);
853 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::SUBSXri), DestReg: AArch64::XZR)
854 .addReg(RegNo: SrcReg)
855 .addImm(Val: 0)
856 .addImm(Val: 0);
857 } else {
858 MRI.constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR32spRegClass);
859 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::SUBSWri), DestReg: AArch64::WZR)
860 .addReg(RegNo: SrcReg)
861 .addImm(Val: 0)
862 .addImm(Val: 0);
863 }
864 break;
865 }
866 case 4: { // tbz/tbnz
867 // We must insert a tst instruction.
868 switch (Cond[1].getImm()) {
869 default:
870 llvm_unreachable("Unknown branch opcode in Cond");
871 case AArch64::TBZW:
872 case AArch64::TBZX:
873 CC = AArch64CC::EQ;
874 break;
875 case AArch64::TBNZW:
876 case AArch64::TBNZX:
877 CC = AArch64CC::NE;
878 break;
879 }
880 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
881 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
882 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ANDSWri), DestReg: AArch64::WZR)
883 .addReg(RegNo: Cond[2].getReg())
884 .addImm(
885 Val: AArch64_AM::encodeLogicalImmediate(imm: 1ull << Cond[3].getImm(), regSize: 32));
886 else
887 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ANDSXri), DestReg: AArch64::XZR)
888 .addReg(RegNo: Cond[2].getReg())
889 .addImm(
890 Val: AArch64_AM::encodeLogicalImmediate(imm: 1ull << Cond[3].getImm(), regSize: 64));
891 break;
892 }
893 case 5: { // cb
894 // We must insert a cmp, that is a subs
895 // 0 1 2 3 4
896 // Cond is { -1, Opcode, CC, Op0, Op1 }
897 unsigned SUBSOpC, SUBSDestReg;
898 bool IsImm = false;
899 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
900 switch (Cond[1].getImm()) {
901 default:
902 llvm_unreachable("Unknown branch opcode in Cond");
903 case AArch64::CBWPri:
904 SUBSOpC = AArch64::SUBSWri;
905 SUBSDestReg = AArch64::WZR;
906 IsImm = true;
907 break;
908 case AArch64::CBXPri:
909 SUBSOpC = AArch64::SUBSXri;
910 SUBSDestReg = AArch64::XZR;
911 IsImm = true;
912 break;
913 case AArch64::CBWPrr:
914 SUBSOpC = AArch64::SUBSWrr;
915 SUBSDestReg = AArch64::WZR;
916 IsImm = false;
917 break;
918 case AArch64::CBXPrr:
919 SUBSOpC = AArch64::SUBSXrr;
920 SUBSDestReg = AArch64::XZR;
921 IsImm = false;
922 break;
923 }
924
925 if (IsImm)
926 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: SUBSOpC), DestReg: SUBSDestReg)
927 .addReg(RegNo: Cond[3].getReg())
928 .addImm(Val: Cond[4].getImm())
929 .addImm(Val: 0);
930 else
931 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: SUBSOpC), DestReg: SUBSDestReg)
932 .addReg(RegNo: Cond[3].getReg())
933 .addReg(RegNo: Cond[4].getReg());
934 }
935 }
936
937 unsigned Opc = 0;
938 const TargetRegisterClass *RC = nullptr;
939 bool TryFold = false;
940 if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::GPR64RegClass)) {
941 RC = &AArch64::GPR64RegClass;
942 Opc = AArch64::CSELXr;
943 TryFold = true;
944 } else if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::GPR32RegClass)) {
945 RC = &AArch64::GPR32RegClass;
946 Opc = AArch64::CSELWr;
947 TryFold = true;
948 } else if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::FPR64RegClass)) {
949 RC = &AArch64::FPR64RegClass;
950 Opc = AArch64::FCSELDrrr;
951 } else if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::FPR32RegClass)) {
952 RC = &AArch64::FPR32RegClass;
953 Opc = AArch64::FCSELSrrr;
954 }
955 assert(RC && "Unsupported regclass");
956
957 // Try folding simple instructions into the csel.
958 if (TryFold) {
959 unsigned NewVReg = 0;
960 unsigned FoldedOpc = canFoldIntoCSel(MRI, VReg: TrueReg, NewVReg: &NewVReg);
961 if (FoldedOpc) {
962 // The folded opcodes csinc, csinc and csneg apply the operation to
963 // FalseReg, so we need to invert the condition.
964 CC = AArch64CC::getInvertedCondCode(Code: CC);
965 TrueReg = FalseReg;
966 } else
967 FoldedOpc = canFoldIntoCSel(MRI, VReg: FalseReg, NewVReg: &NewVReg);
968
969 // Fold the operation. Leave any dead instructions for DCE to clean up.
970 if (FoldedOpc) {
971 FalseReg = NewVReg;
972 Opc = FoldedOpc;
973 // The extends the live range of NewVReg.
974 MRI.clearKillFlags(Reg: NewVReg);
975 }
976 }
977
978 // Pull all virtual register into the appropriate class.
979 MRI.constrainRegClass(Reg: TrueReg, RC);
980 MRI.constrainRegClass(Reg: FalseReg, RC);
981
982 // Insert the csel.
983 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Opc), DestReg: DstReg)
984 .addReg(RegNo: TrueReg)
985 .addReg(RegNo: FalseReg)
986 .addImm(Val: CC);
987}
988
989// Return true if Imm can be loaded into a register by a "cheap" sequence of
990// instructions. For now, "cheap" means at most two instructions.
991static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
992 if (BitSize == 32)
993 return true;
994
995 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
996 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(i: 1).getImm());
997 SmallVector<AArch64_IMM::ImmInsnModel, 4> Is;
998 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn&: Is);
999
1000 return Is.size() <= 2;
1001}
1002
1003// FIXME: this implementation should be micro-architecture dependent, so a
1004// micro-architecture target hook should be introduced here in future.
1005bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1006 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1007 if (isExynosCheapAsMove(MI))
1008 return true;
1009 return MI.isAsCheapAsAMove();
1010 }
1011
1012 switch (MI.getOpcode()) {
1013 default:
1014 return MI.isAsCheapAsAMove();
1015
1016 case AArch64::ADDWrs:
1017 case AArch64::ADDXrs:
1018 case AArch64::SUBWrs:
1019 case AArch64::SUBXrs:
1020 return Subtarget.hasALULSLFast() && MI.getOperand(i: 3).getImm() <= 4;
1021
1022 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1023 // ORRXri, it is as cheap as MOV.
1024 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1025 case AArch64::MOVi32imm:
1026 return isCheapImmediate(MI, BitSize: 32);
1027 case AArch64::MOVi64imm:
1028 return isCheapImmediate(MI, BitSize: 64);
1029 }
1030}
1031
1032bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1033 switch (MI.getOpcode()) {
1034 default:
1035 return false;
1036
1037 case AArch64::ADDWrs:
1038 case AArch64::ADDXrs:
1039 case AArch64::ADDSWrs:
1040 case AArch64::ADDSXrs: {
1041 unsigned Imm = MI.getOperand(i: 3).getImm();
1042 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1043 if (ShiftVal == 0)
1044 return true;
1045 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1046 }
1047
1048 case AArch64::ADDWrx:
1049 case AArch64::ADDXrx:
1050 case AArch64::ADDXrx64:
1051 case AArch64::ADDSWrx:
1052 case AArch64::ADDSXrx:
1053 case AArch64::ADDSXrx64: {
1054 unsigned Imm = MI.getOperand(i: 3).getImm();
1055 switch (AArch64_AM::getArithExtendType(Imm)) {
1056 default:
1057 return false;
1058 case AArch64_AM::UXTB:
1059 case AArch64_AM::UXTH:
1060 case AArch64_AM::UXTW:
1061 case AArch64_AM::UXTX:
1062 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1063 }
1064 }
1065
1066 case AArch64::SUBWrs:
1067 case AArch64::SUBSWrs: {
1068 unsigned Imm = MI.getOperand(i: 3).getImm();
1069 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1070 return ShiftVal == 0 ||
1071 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1072 }
1073
1074 case AArch64::SUBXrs:
1075 case AArch64::SUBSXrs: {
1076 unsigned Imm = MI.getOperand(i: 3).getImm();
1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078 return ShiftVal == 0 ||
1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1080 }
1081
1082 case AArch64::SUBWrx:
1083 case AArch64::SUBXrx:
1084 case AArch64::SUBXrx64:
1085 case AArch64::SUBSWrx:
1086 case AArch64::SUBSXrx:
1087 case AArch64::SUBSXrx64: {
1088 unsigned Imm = MI.getOperand(i: 3).getImm();
1089 switch (AArch64_AM::getArithExtendType(Imm)) {
1090 default:
1091 return false;
1092 case AArch64_AM::UXTB:
1093 case AArch64_AM::UXTH:
1094 case AArch64_AM::UXTW:
1095 case AArch64_AM::UXTX:
1096 return AArch64_AM::getArithShiftValue(Imm) == 0;
1097 }
1098 }
1099
1100 case AArch64::LDRBBroW:
1101 case AArch64::LDRBBroX:
1102 case AArch64::LDRBroW:
1103 case AArch64::LDRBroX:
1104 case AArch64::LDRDroW:
1105 case AArch64::LDRDroX:
1106 case AArch64::LDRHHroW:
1107 case AArch64::LDRHHroX:
1108 case AArch64::LDRHroW:
1109 case AArch64::LDRHroX:
1110 case AArch64::LDRQroW:
1111 case AArch64::LDRQroX:
1112 case AArch64::LDRSBWroW:
1113 case AArch64::LDRSBWroX:
1114 case AArch64::LDRSBXroW:
1115 case AArch64::LDRSBXroX:
1116 case AArch64::LDRSHWroW:
1117 case AArch64::LDRSHWroX:
1118 case AArch64::LDRSHXroW:
1119 case AArch64::LDRSHXroX:
1120 case AArch64::LDRSWroW:
1121 case AArch64::LDRSWroX:
1122 case AArch64::LDRSroW:
1123 case AArch64::LDRSroX:
1124 case AArch64::LDRWroW:
1125 case AArch64::LDRWroX:
1126 case AArch64::LDRXroW:
1127 case AArch64::LDRXroX:
1128 case AArch64::PRFMroW:
1129 case AArch64::PRFMroX:
1130 case AArch64::STRBBroW:
1131 case AArch64::STRBBroX:
1132 case AArch64::STRBroW:
1133 case AArch64::STRBroX:
1134 case AArch64::STRDroW:
1135 case AArch64::STRDroX:
1136 case AArch64::STRHHroW:
1137 case AArch64::STRHHroX:
1138 case AArch64::STRHroW:
1139 case AArch64::STRHroX:
1140 case AArch64::STRQroW:
1141 case AArch64::STRQroX:
1142 case AArch64::STRSroW:
1143 case AArch64::STRSroX:
1144 case AArch64::STRWroW:
1145 case AArch64::STRWroX:
1146 case AArch64::STRXroW:
1147 case AArch64::STRXroX: {
1148 unsigned IsSigned = MI.getOperand(i: 3).getImm();
1149 return !IsSigned;
1150 }
1151 }
1152}
1153
1154bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1155 unsigned Opc = MI.getOpcode();
1156 switch (Opc) {
1157 default:
1158 return false;
1159 case AArch64::SEH_StackAlloc:
1160 case AArch64::SEH_SaveFPLR:
1161 case AArch64::SEH_SaveFPLR_X:
1162 case AArch64::SEH_SaveReg:
1163 case AArch64::SEH_SaveReg_X:
1164 case AArch64::SEH_SaveRegP:
1165 case AArch64::SEH_SaveRegP_X:
1166 case AArch64::SEH_SaveFReg:
1167 case AArch64::SEH_SaveFReg_X:
1168 case AArch64::SEH_SaveFRegP:
1169 case AArch64::SEH_SaveFRegP_X:
1170 case AArch64::SEH_SetFP:
1171 case AArch64::SEH_AddFP:
1172 case AArch64::SEH_Nop:
1173 case AArch64::SEH_PrologEnd:
1174 case AArch64::SEH_EpilogStart:
1175 case AArch64::SEH_EpilogEnd:
1176 case AArch64::SEH_PACSignLR:
1177 case AArch64::SEH_SaveAnyRegQP:
1178 case AArch64::SEH_SaveAnyRegQPX:
1179 case AArch64::SEH_AllocZ:
1180 case AArch64::SEH_SaveZReg:
1181 case AArch64::SEH_SavePReg:
1182 return true;
1183 }
1184}
1185
1186bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1187 Register &SrcReg, Register &DstReg,
1188 unsigned &SubIdx) const {
1189 switch (MI.getOpcode()) {
1190 default:
1191 return false;
1192 case AArch64::SBFMXri: // aka sxtw
1193 case AArch64::UBFMXri: // aka uxtw
1194 // Check for the 32 -> 64 bit extension case, these instructions can do
1195 // much more.
1196 if (MI.getOperand(i: 2).getImm() != 0 || MI.getOperand(i: 3).getImm() != 31)
1197 return false;
1198 // This is a signed or unsigned 32 -> 64 bit extension.
1199 SrcReg = MI.getOperand(i: 1).getReg();
1200 DstReg = MI.getOperand(i: 0).getReg();
1201 SubIdx = AArch64::sub_32;
1202 return true;
1203 }
1204}
1205
1206bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1207 const MachineInstr &MIa, const MachineInstr &MIb) const {
1208 const TargetRegisterInfo *TRI = &getRegisterInfo();
1209 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1210 int64_t OffsetA = 0, OffsetB = 0;
1211 TypeSize WidthA(0, false), WidthB(0, false);
1212 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1213
1214 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1215 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1216
1217 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1218 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1219 return false;
1220
1221 // Retrieve the base, offset from the base and width. Width
1222 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1223 // base are identical, and the offset of a lower memory access +
1224 // the width doesn't overlap the offset of a higher memory access,
1225 // then the memory accesses are different.
1226 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1227 // are assumed to have the same scale (vscale).
1228 if (getMemOperandWithOffsetWidth(MI: MIa, BaseOp&: BaseOpA, Offset&: OffsetA, OffsetIsScalable&: OffsetAIsScalable,
1229 Width&: WidthA, TRI) &&
1230 getMemOperandWithOffsetWidth(MI: MIb, BaseOp&: BaseOpB, Offset&: OffsetB, OffsetIsScalable&: OffsetBIsScalable,
1231 Width&: WidthB, TRI)) {
1232 if (BaseOpA->isIdenticalTo(Other: *BaseOpB) &&
1233 OffsetAIsScalable == OffsetBIsScalable) {
1234 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1235 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1236 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1237 if (LowWidth.isScalable() == OffsetAIsScalable &&
1238 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1239 return true;
1240 }
1241 }
1242 return false;
1243}
1244
1245bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1246 const MachineBasicBlock *MBB,
1247 const MachineFunction &MF) const {
1248 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
1249 return true;
1250
1251 // Do not move an instruction that can be recognized as a branch target.
1252 if (hasBTISemantics(MI))
1253 return true;
1254
1255 switch (MI.getOpcode()) {
1256 case AArch64::HINT:
1257 // CSDB hints are scheduling barriers.
1258 if (MI.getOperand(i: 0).getImm() == 0x14)
1259 return true;
1260 break;
1261 case AArch64::DSB:
1262 case AArch64::ISB:
1263 // DSB and ISB also are scheduling barriers.
1264 return true;
1265 case AArch64::MSRpstatesvcrImm1:
1266 // SMSTART and SMSTOP are also scheduling barriers.
1267 return true;
1268 default:;
1269 }
1270 if (isSEHInstruction(MI))
1271 return true;
1272 auto Next = std::next(x: MI.getIterator());
1273 return Next != MBB->end() && Next->isCFIInstruction();
1274}
1275
1276/// analyzeCompare - For a comparison instruction, return the source registers
1277/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1278/// Return true if the comparison instruction can be analyzed.
1279bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1280 Register &SrcReg2, int64_t &CmpMask,
1281 int64_t &CmpValue) const {
1282 // The first operand can be a frame index where we'd normally expect a
1283 // register.
1284 // FIXME: Pass subregisters out of analyzeCompare
1285 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1286 if (!MI.getOperand(i: 1).isReg() || MI.getOperand(i: 1).getSubReg())
1287 return false;
1288
1289 switch (MI.getOpcode()) {
1290 default:
1291 break;
1292 case AArch64::PTEST_PP:
1293 case AArch64::PTEST_PP_ANY:
1294 SrcReg = MI.getOperand(i: 0).getReg();
1295 SrcReg2 = MI.getOperand(i: 1).getReg();
1296 if (MI.getOperand(i: 2).getSubReg())
1297 return false;
1298
1299 // Not sure about the mask and value for now...
1300 CmpMask = ~0;
1301 CmpValue = 0;
1302 return true;
1303 case AArch64::SUBSWrr:
1304 case AArch64::SUBSWrs:
1305 case AArch64::SUBSWrx:
1306 case AArch64::SUBSXrr:
1307 case AArch64::SUBSXrs:
1308 case AArch64::SUBSXrx:
1309 case AArch64::ADDSWrr:
1310 case AArch64::ADDSWrs:
1311 case AArch64::ADDSWrx:
1312 case AArch64::ADDSXrr:
1313 case AArch64::ADDSXrs:
1314 case AArch64::ADDSXrx:
1315 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1316 SrcReg = MI.getOperand(i: 1).getReg();
1317 SrcReg2 = MI.getOperand(i: 2).getReg();
1318
1319 // FIXME: Pass subregisters out of analyzeCompare
1320 if (MI.getOperand(i: 2).getSubReg())
1321 return false;
1322
1323 CmpMask = ~0;
1324 CmpValue = 0;
1325 return true;
1326 case AArch64::SUBSWri:
1327 case AArch64::ADDSWri:
1328 case AArch64::SUBSXri:
1329 case AArch64::ADDSXri:
1330 SrcReg = MI.getOperand(i: 1).getReg();
1331 SrcReg2 = 0;
1332 CmpMask = ~0;
1333 CmpValue = MI.getOperand(i: 2).getImm();
1334 return true;
1335 case AArch64::ANDSWri:
1336 case AArch64::ANDSXri:
1337 // ANDS does not use the same encoding scheme as the others xxxS
1338 // instructions.
1339 SrcReg = MI.getOperand(i: 1).getReg();
1340 SrcReg2 = 0;
1341 CmpMask = ~0;
1342 CmpValue = AArch64_AM::decodeLogicalImmediate(
1343 val: MI.getOperand(i: 2).getImm(),
1344 regSize: MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1345 return true;
1346 }
1347
1348 return false;
1349}
1350
1351static bool UpdateOperandRegClass(MachineInstr &Instr) {
1352 MachineBasicBlock *MBB = Instr.getParent();
1353 assert(MBB && "Can't get MachineBasicBlock here");
1354 MachineFunction *MF = MBB->getParent();
1355 assert(MF && "Can't get MachineFunction here");
1356 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1357 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1358 MachineRegisterInfo *MRI = &MF->getRegInfo();
1359
1360 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1361 ++OpIdx) {
1362 MachineOperand &MO = Instr.getOperand(i: OpIdx);
1363 const TargetRegisterClass *OpRegCstraints =
1364 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1365
1366 // If there's no constraint, there's nothing to do.
1367 if (!OpRegCstraints)
1368 continue;
1369 // If the operand is a frame index, there's nothing to do here.
1370 // A frame index operand will resolve correctly during PEI.
1371 if (MO.isFI())
1372 continue;
1373
1374 assert(MO.isReg() &&
1375 "Operand has register constraints without being a register!");
1376
1377 Register Reg = MO.getReg();
1378 if (Reg.isPhysical()) {
1379 if (!OpRegCstraints->contains(Reg))
1380 return false;
1381 } else if (!OpRegCstraints->hasSubClassEq(RC: MRI->getRegClass(Reg)) &&
1382 !MRI->constrainRegClass(Reg, RC: OpRegCstraints))
1383 return false;
1384 }
1385
1386 return true;
1387}
1388
1389/// Return the opcode that does not set flags when possible - otherwise
1390/// return the original opcode. The caller is responsible to do the actual
1391/// substitution and legality checking.
1392static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1393 // Don't convert all compare instructions, because for some the zero register
1394 // encoding becomes the sp register.
1395 bool MIDefinesZeroReg = false;
1396 if (MI.definesRegister(Reg: AArch64::WZR, /*TRI=*/nullptr) ||
1397 MI.definesRegister(Reg: AArch64::XZR, /*TRI=*/nullptr))
1398 MIDefinesZeroReg = true;
1399
1400 switch (MI.getOpcode()) {
1401 default:
1402 return MI.getOpcode();
1403 case AArch64::ADDSWrr:
1404 return AArch64::ADDWrr;
1405 case AArch64::ADDSWri:
1406 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1407 case AArch64::ADDSWrs:
1408 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1409 case AArch64::ADDSWrx:
1410 return AArch64::ADDWrx;
1411 case AArch64::ADDSXrr:
1412 return AArch64::ADDXrr;
1413 case AArch64::ADDSXri:
1414 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1415 case AArch64::ADDSXrs:
1416 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1417 case AArch64::ADDSXrx:
1418 return AArch64::ADDXrx;
1419 case AArch64::SUBSWrr:
1420 return AArch64::SUBWrr;
1421 case AArch64::SUBSWri:
1422 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1423 case AArch64::SUBSWrs:
1424 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1425 case AArch64::SUBSWrx:
1426 return AArch64::SUBWrx;
1427 case AArch64::SUBSXrr:
1428 return AArch64::SUBXrr;
1429 case AArch64::SUBSXri:
1430 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1431 case AArch64::SUBSXrs:
1432 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1433 case AArch64::SUBSXrx:
1434 return AArch64::SUBXrx;
1435 }
1436}
1437
1438enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1439
1440/// True when condition flags are accessed (either by writing or reading)
1441/// on the instruction trace starting at From and ending at To.
1442///
1443/// Note: If From and To are from different blocks it's assumed CC are accessed
1444/// on the path.
1445static bool areCFlagsAccessedBetweenInstrs(
1446 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1447 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1448 // Early exit if To is at the beginning of the BB.
1449 if (To == To->getParent()->begin())
1450 return true;
1451
1452 // Check whether the instructions are in the same basic block
1453 // If not, assume the condition flags might get modified somewhere.
1454 if (To->getParent() != From->getParent())
1455 return true;
1456
1457 // From must be above To.
1458 assert(std::any_of(
1459 ++To.getReverse(), To->getParent()->rend(),
1460 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1461
1462 // We iterate backward starting at \p To until we hit \p From.
1463 for (const MachineInstr &Instr :
1464 instructionsWithoutDebug(It: ++To.getReverse(), End: From.getReverse())) {
1465 if (((AccessToCheck & AK_Write) &&
1466 Instr.modifiesRegister(Reg: AArch64::NZCV, TRI)) ||
1467 ((AccessToCheck & AK_Read) && Instr.readsRegister(Reg: AArch64::NZCV, TRI)))
1468 return true;
1469 }
1470 return false;
1471}
1472
1473std::optional<unsigned>
1474AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1475 MachineInstr *Pred,
1476 const MachineRegisterInfo *MRI) const {
1477 unsigned MaskOpcode = Mask->getOpcode();
1478 unsigned PredOpcode = Pred->getOpcode();
1479 bool PredIsPTestLike = isPTestLikeOpcode(Opc: PredOpcode);
1480 bool PredIsWhileLike = isWhileOpcode(Opc: PredOpcode);
1481
1482 if (PredIsWhileLike) {
1483 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1484 // instruction and the condition is "any" since WHILcc does an implicit
1485 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1486 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1487 return PredOpcode;
1488
1489 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1490 // redundant since WHILE performs an implicit PTEST with an all active
1491 // mask.
1492 if (isPTrueOpcode(Opc: MaskOpcode) && Mask->getOperand(i: 1).getImm() == 31 &&
1493 getElementSizeForOpcode(Opc: MaskOpcode) ==
1494 getElementSizeForOpcode(Opc: PredOpcode))
1495 return PredOpcode;
1496
1497 return {};
1498 }
1499
1500 if (PredIsPTestLike) {
1501 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1502 // instruction that sets the flags as PTEST would and the condition is
1503 // "any" since PG is always a subset of the governing predicate of the
1504 // ptest-like instruction.
1505 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1506 return PredOpcode;
1507
1508 auto PTestLikeMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: 1).getReg());
1509
1510 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1511 // to look through a copy and try again. This is because some instructions
1512 // take a predicate whose register class is a subset of its result class.
1513 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1514 PTestLikeMask->getOperand(i: 1).getReg().isVirtual())
1515 PTestLikeMask =
1516 MRI->getUniqueVRegDef(Reg: PTestLikeMask->getOperand(i: 1).getReg());
1517
1518 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1519 // the element size matches and either the PTEST_LIKE instruction uses
1520 // the same all active mask or the condition is "any".
1521 if (isPTrueOpcode(Opc: MaskOpcode) && Mask->getOperand(i: 1).getImm() == 31 &&
1522 getElementSizeForOpcode(Opc: MaskOpcode) ==
1523 getElementSizeForOpcode(Opc: PredOpcode)) {
1524 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1525 return PredOpcode;
1526 }
1527
1528 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1529 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1530 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1531 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1532 // performed by the compare could consider fewer lanes for these element
1533 // sizes.
1534 //
1535 // For example, consider
1536 //
1537 // ptrue p0.b ; P0=1111-1111-1111-1111
1538 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1539 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1540 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1541 // ; ^ last active
1542 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1543 // ; ^ last active
1544 //
1545 // where the compare generates a canonical all active 32-bit predicate
1546 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1547 // active flag, whereas the PTEST instruction with the same mask doesn't.
1548 // For PTEST_ANY this doesn't apply as the flags in this case would be
1549 // identical regardless of element size.
1550 uint64_t PredElementSize = getElementSizeForOpcode(Opc: PredOpcode);
1551 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1552 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1553 return PredOpcode;
1554
1555 return {};
1556 }
1557
1558 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1559 // opcode so the PTEST becomes redundant.
1560 switch (PredOpcode) {
1561 case AArch64::AND_PPzPP:
1562 case AArch64::BIC_PPzPP:
1563 case AArch64::EOR_PPzPP:
1564 case AArch64::NAND_PPzPP:
1565 case AArch64::NOR_PPzPP:
1566 case AArch64::ORN_PPzPP:
1567 case AArch64::ORR_PPzPP:
1568 case AArch64::BRKA_PPzP:
1569 case AArch64::BRKPA_PPzPP:
1570 case AArch64::BRKB_PPzP:
1571 case AArch64::BRKPB_PPzPP:
1572 case AArch64::RDFFR_PPz: {
1573 // Check to see if our mask is the same. If not the resulting flag bits
1574 // may be different and we can't remove the ptest.
1575 auto *PredMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: 1).getReg());
1576 if (Mask != PredMask)
1577 return {};
1578 break;
1579 }
1580 case AArch64::BRKN_PPzP: {
1581 // BRKN uses an all active implicit mask to set flags unlike the other
1582 // flag-setting instructions.
1583 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1584 if ((MaskOpcode != AArch64::PTRUE_B) ||
1585 (Mask->getOperand(i: 1).getImm() != 31))
1586 return {};
1587 break;
1588 }
1589 case AArch64::PTRUE_B:
1590 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1591 break;
1592 default:
1593 // Bail out if we don't recognize the input
1594 return {};
1595 }
1596
1597 return convertToFlagSettingOpc(Opc: PredOpcode);
1598}
1599
1600/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1601/// operation which could set the flags in an identical manner
1602bool AArch64InstrInfo::optimizePTestInstr(
1603 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1604 const MachineRegisterInfo *MRI) const {
1605 auto *Mask = MRI->getUniqueVRegDef(Reg: MaskReg);
1606 auto *Pred = MRI->getUniqueVRegDef(Reg: PredReg);
1607 unsigned PredOpcode = Pred->getOpcode();
1608 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1609 if (!NewOp)
1610 return false;
1611
1612 const TargetRegisterInfo *TRI = &getRegisterInfo();
1613
1614 // If another instruction between Pred and PTest accesses flags, don't remove
1615 // the ptest or update the earlier instruction to modify them.
1616 if (areCFlagsAccessedBetweenInstrs(From: Pred, To: PTest, TRI))
1617 return false;
1618
1619 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1620 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1621 // operand to be replaced with an equivalent instruction that also sets the
1622 // flags.
1623 PTest->eraseFromParent();
1624 if (*NewOp != PredOpcode) {
1625 Pred->setDesc(get(Opcode: *NewOp));
1626 bool succeeded = UpdateOperandRegClass(Instr&: *Pred);
1627 (void)succeeded;
1628 assert(succeeded && "Operands have incompatible register classes!");
1629 Pred->addRegisterDefined(Reg: AArch64::NZCV, RegInfo: TRI);
1630 }
1631
1632 // Ensure that the flags def is live.
1633 if (Pred->registerDefIsDead(Reg: AArch64::NZCV, TRI)) {
1634 unsigned i = 0, e = Pred->getNumOperands();
1635 for (; i != e; ++i) {
1636 MachineOperand &MO = Pred->getOperand(i);
1637 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1638 MO.setIsDead(false);
1639 break;
1640 }
1641 }
1642 }
1643 return true;
1644}
1645
1646/// Try to optimize a compare instruction. A compare instruction is an
1647/// instruction which produces AArch64::NZCV. It can be truly compare
1648/// instruction
1649/// when there are no uses of its destination register.
1650///
1651/// The following steps are tried in order:
1652/// 1. Convert CmpInstr into an unconditional version.
1653/// 2. Remove CmpInstr if above there is an instruction producing a needed
1654/// condition code or an instruction which can be converted into such an
1655/// instruction.
1656/// Only comparison with zero is supported.
1657bool AArch64InstrInfo::optimizeCompareInstr(
1658 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1659 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1660 assert(CmpInstr.getParent());
1661 assert(MRI);
1662
1663 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1664 int DeadNZCVIdx =
1665 CmpInstr.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr, isDead: true);
1666 if (DeadNZCVIdx != -1) {
1667 if (CmpInstr.definesRegister(Reg: AArch64::WZR, /*TRI=*/nullptr) ||
1668 CmpInstr.definesRegister(Reg: AArch64::XZR, /*TRI=*/nullptr)) {
1669 CmpInstr.eraseFromParent();
1670 return true;
1671 }
1672 unsigned Opc = CmpInstr.getOpcode();
1673 unsigned NewOpc = convertToNonFlagSettingOpc(MI: CmpInstr);
1674 if (NewOpc == Opc)
1675 return false;
1676 const MCInstrDesc &MCID = get(Opcode: NewOpc);
1677 CmpInstr.setDesc(MCID);
1678 CmpInstr.removeOperand(OpNo: DeadNZCVIdx);
1679 bool succeeded = UpdateOperandRegClass(Instr&: CmpInstr);
1680 (void)succeeded;
1681 assert(succeeded && "Some operands reg class are incompatible!");
1682 return true;
1683 }
1684
1685 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1686 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1687 return optimizePTestInstr(PTest: &CmpInstr, MaskReg: SrcReg, PredReg: SrcReg2, MRI);
1688
1689 if (SrcReg2 != 0)
1690 return false;
1691
1692 // CmpInstr is a Compare instruction if destination register is not used.
1693 if (!MRI->use_nodbg_empty(RegNo: CmpInstr.getOperand(i: 0).getReg()))
1694 return false;
1695
1696 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, MRI: *MRI))
1697 return true;
1698 return (CmpValue == 0 || CmpValue == 1) &&
1699 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, MRI: *MRI);
1700}
1701
1702/// Get opcode of S version of Instr.
1703/// If Instr is S version its opcode is returned.
1704/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1705/// or we are not interested in it.
1706static unsigned sForm(MachineInstr &Instr) {
1707 switch (Instr.getOpcode()) {
1708 default:
1709 return AArch64::INSTRUCTION_LIST_END;
1710
1711 case AArch64::ADDSWrr:
1712 case AArch64::ADDSWri:
1713 case AArch64::ADDSXrr:
1714 case AArch64::ADDSXri:
1715 case AArch64::SUBSWrr:
1716 case AArch64::SUBSWri:
1717 case AArch64::SUBSXrr:
1718 case AArch64::SUBSXri:
1719 return Instr.getOpcode();
1720
1721 case AArch64::ADDWrr:
1722 return AArch64::ADDSWrr;
1723 case AArch64::ADDWri:
1724 return AArch64::ADDSWri;
1725 case AArch64::ADDXrr:
1726 return AArch64::ADDSXrr;
1727 case AArch64::ADDXri:
1728 return AArch64::ADDSXri;
1729 case AArch64::ADCWr:
1730 return AArch64::ADCSWr;
1731 case AArch64::ADCXr:
1732 return AArch64::ADCSXr;
1733 case AArch64::SUBWrr:
1734 return AArch64::SUBSWrr;
1735 case AArch64::SUBWri:
1736 return AArch64::SUBSWri;
1737 case AArch64::SUBXrr:
1738 return AArch64::SUBSXrr;
1739 case AArch64::SUBXri:
1740 return AArch64::SUBSXri;
1741 case AArch64::SBCWr:
1742 return AArch64::SBCSWr;
1743 case AArch64::SBCXr:
1744 return AArch64::SBCSXr;
1745 case AArch64::ANDWri:
1746 return AArch64::ANDSWri;
1747 case AArch64::ANDXri:
1748 return AArch64::ANDSXri;
1749 }
1750}
1751
1752/// Check if AArch64::NZCV should be alive in successors of MBB.
1753static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
1754 for (auto *BB : MBB->successors())
1755 if (BB->isLiveIn(Reg: AArch64::NZCV))
1756 return true;
1757 return false;
1758}
1759
1760/// \returns The condition code operand index for \p Instr if it is a branch
1761/// or select and -1 otherwise.
1762static int
1763findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
1764 switch (Instr.getOpcode()) {
1765 default:
1766 return -1;
1767
1768 case AArch64::Bcc: {
1769 int Idx = Instr.findRegisterUseOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr);
1770 assert(Idx >= 2);
1771 return Idx - 2;
1772 }
1773
1774 case AArch64::CSINVWr:
1775 case AArch64::CSINVXr:
1776 case AArch64::CSINCWr:
1777 case AArch64::CSINCXr:
1778 case AArch64::CSELWr:
1779 case AArch64::CSELXr:
1780 case AArch64::CSNEGWr:
1781 case AArch64::CSNEGXr:
1782 case AArch64::FCSELSrrr:
1783 case AArch64::FCSELDrrr: {
1784 int Idx = Instr.findRegisterUseOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr);
1785 assert(Idx >= 1);
1786 return Idx - 1;
1787 }
1788 }
1789}
1790
1791/// Find a condition code used by the instruction.
1792/// Returns AArch64CC::Invalid if either the instruction does not use condition
1793/// codes or we don't optimize CmpInstr in the presence of such instructions.
1794static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1795 int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
1796 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1797 Instr.getOperand(i: CCIdx).getImm())
1798 : AArch64CC::Invalid;
1799}
1800
1801static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1802 assert(CC != AArch64CC::Invalid);
1803 UsedNZCV UsedFlags;
1804 switch (CC) {
1805 default:
1806 break;
1807
1808 case AArch64CC::EQ: // Z set
1809 case AArch64CC::NE: // Z clear
1810 UsedFlags.Z = true;
1811 break;
1812
1813 case AArch64CC::HI: // Z clear and C set
1814 case AArch64CC::LS: // Z set or C clear
1815 UsedFlags.Z = true;
1816 [[fallthrough]];
1817 case AArch64CC::HS: // C set
1818 case AArch64CC::LO: // C clear
1819 UsedFlags.C = true;
1820 break;
1821
1822 case AArch64CC::MI: // N set
1823 case AArch64CC::PL: // N clear
1824 UsedFlags.N = true;
1825 break;
1826
1827 case AArch64CC::VS: // V set
1828 case AArch64CC::VC: // V clear
1829 UsedFlags.V = true;
1830 break;
1831
1832 case AArch64CC::GT: // Z clear, N and V the same
1833 case AArch64CC::LE: // Z set, N and V differ
1834 UsedFlags.Z = true;
1835 [[fallthrough]];
1836 case AArch64CC::GE: // N and V the same
1837 case AArch64CC::LT: // N and V differ
1838 UsedFlags.N = true;
1839 UsedFlags.V = true;
1840 break;
1841 }
1842 return UsedFlags;
1843}
1844
1845/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1846/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1847/// \returns std::nullopt otherwise.
1848///
1849/// Collect instructions using that flags in \p CCUseInstrs if provided.
1850std::optional<UsedNZCV>
1851llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
1852 const TargetRegisterInfo &TRI,
1853 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1854 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1855 if (MI.getParent() != CmpParent)
1856 return std::nullopt;
1857
1858 if (areCFlagsAliveInSuccessors(MBB: CmpParent))
1859 return std::nullopt;
1860
1861 UsedNZCV NZCVUsedAfterCmp;
1862 for (MachineInstr &Instr : instructionsWithoutDebug(
1863 It: std::next(x: CmpInstr.getIterator()), End: CmpParent->instr_end())) {
1864 if (Instr.readsRegister(Reg: AArch64::NZCV, TRI: &TRI)) {
1865 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1866 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1867 return std::nullopt;
1868 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1869 if (CCUseInstrs)
1870 CCUseInstrs->push_back(Elt: &Instr);
1871 }
1872 if (Instr.modifiesRegister(Reg: AArch64::NZCV, TRI: &TRI))
1873 break;
1874 }
1875 return NZCVUsedAfterCmp;
1876}
1877
1878static bool isADDSRegImm(unsigned Opcode) {
1879 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1880}
1881
1882static bool isSUBSRegImm(unsigned Opcode) {
1883 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1884}
1885
1886/// Check if CmpInstr can be substituted by MI.
1887///
1888/// CmpInstr can be substituted:
1889/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1890/// - and, MI and CmpInstr are from the same MachineBB
1891/// - and, condition flags are not alive in successors of the CmpInstr parent
1892/// - and, if MI opcode is the S form there must be no defs of flags between
1893/// MI and CmpInstr
1894/// or if MI opcode is not the S form there must be neither defs of flags
1895/// nor uses of flags between MI and CmpInstr.
1896/// - and, if C/V flags are not used after CmpInstr
1897/// or if N flag is used but MI produces poison value if signed overflow
1898/// occurs.
1899static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
1900 const TargetRegisterInfo &TRI) {
1901 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1902 // that may or may not set flags.
1903 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1904
1905 const unsigned CmpOpcode = CmpInstr.getOpcode();
1906 if (!isADDSRegImm(Opcode: CmpOpcode) && !isSUBSRegImm(Opcode: CmpOpcode))
1907 return false;
1908
1909 assert((CmpInstr.getOperand(2).isImm() &&
1910 CmpInstr.getOperand(2).getImm() == 0) &&
1911 "Caller guarantees that CmpInstr compares with constant 0");
1912
1913 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1914 if (!NZVCUsed || NZVCUsed->C)
1915 return false;
1916
1917 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1918 // '%vreg = add ...' or '%vreg = sub ...'.
1919 // Condition flag V is used to indicate signed overflow.
1920 // 1) MI and CmpInstr set N and V to the same value.
1921 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1922 // signed overflow occurs, so CmpInstr could still be simplified away.
1923 if (NZVCUsed->V && !MI.getFlag(Flag: MachineInstr::NoSWrap))
1924 return false;
1925
1926 AccessKind AccessToCheck = AK_Write;
1927 if (sForm(Instr&: MI) != MI.getOpcode())
1928 AccessToCheck = AK_All;
1929 return !areCFlagsAccessedBetweenInstrs(From: &MI, To: &CmpInstr, TRI: &TRI, AccessToCheck);
1930}
1931
1932/// Substitute an instruction comparing to zero with another instruction
1933/// which produces needed condition flags.
1934///
1935/// Return true on success.
1936bool AArch64InstrInfo::substituteCmpToZero(
1937 MachineInstr &CmpInstr, unsigned SrcReg,
1938 const MachineRegisterInfo &MRI) const {
1939 // Get the unique definition of SrcReg.
1940 MachineInstr *MI = MRI.getUniqueVRegDef(Reg: SrcReg);
1941 if (!MI)
1942 return false;
1943
1944 const TargetRegisterInfo &TRI = getRegisterInfo();
1945
1946 unsigned NewOpc = sForm(Instr&: *MI);
1947 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1948 return false;
1949
1950 if (!canInstrSubstituteCmpInstr(MI&: *MI, CmpInstr, TRI))
1951 return false;
1952
1953 // Update the instruction to set NZCV.
1954 MI->setDesc(get(Opcode: NewOpc));
1955 CmpInstr.eraseFromParent();
1956 bool succeeded = UpdateOperandRegClass(Instr&: *MI);
1957 (void)succeeded;
1958 assert(succeeded && "Some operands reg class are incompatible!");
1959 MI->addRegisterDefined(Reg: AArch64::NZCV, RegInfo: &TRI);
1960 return true;
1961}
1962
1963/// \returns True if \p CmpInstr can be removed.
1964///
1965/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1966/// codes used in \p CCUseInstrs must be inverted.
1967static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
1968 int CmpValue, const TargetRegisterInfo &TRI,
1969 SmallVectorImpl<MachineInstr *> &CCUseInstrs,
1970 bool &IsInvertCC) {
1971 assert((CmpValue == 0 || CmpValue == 1) &&
1972 "Only comparisons to 0 or 1 considered for removal!");
1973
1974 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1975 unsigned MIOpc = MI.getOpcode();
1976 if (MIOpc == AArch64::CSINCWr) {
1977 if (MI.getOperand(i: 1).getReg() != AArch64::WZR ||
1978 MI.getOperand(i: 2).getReg() != AArch64::WZR)
1979 return false;
1980 } else if (MIOpc == AArch64::CSINCXr) {
1981 if (MI.getOperand(i: 1).getReg() != AArch64::XZR ||
1982 MI.getOperand(i: 2).getReg() != AArch64::XZR)
1983 return false;
1984 } else {
1985 return false;
1986 }
1987 AArch64CC::CondCode MICC = findCondCodeUsedByInstr(Instr: MI);
1988 if (MICC == AArch64CC::Invalid)
1989 return false;
1990
1991 // NZCV needs to be defined
1992 if (MI.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr, isDead: true) != -1)
1993 return false;
1994
1995 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1996 const unsigned CmpOpcode = CmpInstr.getOpcode();
1997 bool IsSubsRegImm = isSUBSRegImm(Opcode: CmpOpcode);
1998 if (CmpValue && !IsSubsRegImm)
1999 return false;
2000 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(Opcode: CmpOpcode))
2001 return false;
2002
2003 // MI conditions allowed: eq, ne, mi, pl
2004 UsedNZCV MIUsedNZCV = getUsedNZCV(CC: MICC);
2005 if (MIUsedNZCV.C || MIUsedNZCV.V)
2006 return false;
2007
2008 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2009 examineCFlagsUse(MI, CmpInstr, TRI, CCUseInstrs: &CCUseInstrs);
2010 // Condition flags are not used in CmpInstr basic block successors and only
2011 // Z or N flags allowed to be used after CmpInstr within its basic block
2012 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2013 return false;
2014 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2015 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2016 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2017 return false;
2018 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2019 if (MIUsedNZCV.N && !CmpValue)
2020 return false;
2021
2022 // There must be no defs of flags between MI and CmpInstr
2023 if (areCFlagsAccessedBetweenInstrs(From: &MI, To: &CmpInstr, TRI: &TRI, AccessToCheck: AK_Write))
2024 return false;
2025
2026 // Condition code is inverted in the following cases:
2027 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2028 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2029 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2030 (!CmpValue && MICC == AArch64CC::NE);
2031 return true;
2032}
2033
2034/// Remove comparison in csinc-cmp sequence
2035///
2036/// Examples:
2037/// 1. \code
2038/// csinc w9, wzr, wzr, ne
2039/// cmp w9, #0
2040/// b.eq
2041/// \endcode
2042/// to
2043/// \code
2044/// csinc w9, wzr, wzr, ne
2045/// b.ne
2046/// \endcode
2047///
2048/// 2. \code
2049/// csinc x2, xzr, xzr, mi
2050/// cmp x2, #1
2051/// b.pl
2052/// \endcode
2053/// to
2054/// \code
2055/// csinc x2, xzr, xzr, mi
2056/// b.pl
2057/// \endcode
2058///
2059/// \param CmpInstr comparison instruction
2060/// \return True when comparison removed
2061bool AArch64InstrInfo::removeCmpToZeroOrOne(
2062 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2063 const MachineRegisterInfo &MRI) const {
2064 MachineInstr *MI = MRI.getUniqueVRegDef(Reg: SrcReg);
2065 if (!MI)
2066 return false;
2067 const TargetRegisterInfo &TRI = getRegisterInfo();
2068 SmallVector<MachineInstr *, 4> CCUseInstrs;
2069 bool IsInvertCC = false;
2070 if (!canCmpInstrBeRemoved(MI&: *MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2071 IsInvertCC))
2072 return false;
2073 // Make transformation
2074 CmpInstr.eraseFromParent();
2075 if (IsInvertCC) {
2076 // Invert condition codes in CmpInstr CC users
2077 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2078 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(Instr: *CCUseInstr);
2079 assert(Idx >= 0 && "Unexpected instruction using CC.");
2080 MachineOperand &CCOperand = CCUseInstr->getOperand(i: Idx);
2081 AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
2082 Code: static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2083 CCOperand.setImm(CCUse);
2084 }
2085 }
2086 return true;
2087}
2088
2089bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2090 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2091 MI.getOpcode() != AArch64::CATCHRET)
2092 return false;
2093
2094 MachineBasicBlock &MBB = *MI.getParent();
2095 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2096 auto TRI = Subtarget.getRegisterInfo();
2097 DebugLoc DL = MI.getDebugLoc();
2098
2099 if (MI.getOpcode() == AArch64::CATCHRET) {
2100 // Skip to the first instruction before the epilog.
2101 const TargetInstrInfo *TII =
2102 MBB.getParent()->getSubtarget().getInstrInfo();
2103 MachineBasicBlock *TargetMBB = MI.getOperand(i: 0).getMBB();
2104 auto MBBI = MachineBasicBlock::iterator(MI);
2105 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(x: MBBI);
2106 while (FirstEpilogSEH->getFlag(Flag: MachineInstr::FrameDestroy) &&
2107 FirstEpilogSEH != MBB.begin())
2108 FirstEpilogSEH = std::prev(x: FirstEpilogSEH);
2109 if (FirstEpilogSEH != MBB.begin())
2110 FirstEpilogSEH = std::next(x: FirstEpilogSEH);
2111 BuildMI(BB&: MBB, I: FirstEpilogSEH, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADRP))
2112 .addReg(RegNo: AArch64::X0, flags: RegState::Define)
2113 .addMBB(MBB: TargetMBB);
2114 BuildMI(BB&: MBB, I: FirstEpilogSEH, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri))
2115 .addReg(RegNo: AArch64::X0, flags: RegState::Define)
2116 .addReg(RegNo: AArch64::X0)
2117 .addMBB(MBB: TargetMBB)
2118 .addImm(Val: 0);
2119 TargetMBB->setMachineBlockAddressTaken();
2120 return true;
2121 }
2122
2123 Register Reg = MI.getOperand(i: 0).getReg();
2124 Module &M = *MBB.getParent()->getFunction().getParent();
2125 if (M.getStackProtectorGuard() == "sysreg") {
2126 const AArch64SysReg::SysReg *SrcReg =
2127 AArch64SysReg::lookupSysRegByName(Name: M.getStackProtectorGuardReg());
2128 if (!SrcReg)
2129 report_fatal_error(reason: "Unknown SysReg for Stack Protector Guard Register");
2130
2131 // mrs xN, sysreg
2132 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MRS))
2133 .addDef(RegNo: Reg, Flags: RegState::Renamable)
2134 .addImm(Val: SrcReg->Encoding);
2135 int Offset = M.getStackProtectorGuardOffset();
2136 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2137 // ldr xN, [xN, #offset]
2138 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui))
2139 .addDef(RegNo: Reg)
2140 .addUse(RegNo: Reg, Flags: RegState::Kill)
2141 .addImm(Val: Offset / 8);
2142 } else if (Offset >= -256 && Offset <= 255) {
2143 // ldur xN, [xN, #offset]
2144 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDURXi))
2145 .addDef(RegNo: Reg)
2146 .addUse(RegNo: Reg, Flags: RegState::Kill)
2147 .addImm(Val: Offset);
2148 } else if (Offset >= -4095 && Offset <= 4095) {
2149 if (Offset > 0) {
2150 // add xN, xN, #offset
2151 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::ADDXri))
2152 .addDef(RegNo: Reg)
2153 .addUse(RegNo: Reg, Flags: RegState::Kill)
2154 .addImm(Val: Offset)
2155 .addImm(Val: 0);
2156 } else {
2157 // sub xN, xN, #offset
2158 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::SUBXri))
2159 .addDef(RegNo: Reg)
2160 .addUse(RegNo: Reg, Flags: RegState::Kill)
2161 .addImm(Val: -Offset)
2162 .addImm(Val: 0);
2163 }
2164 // ldr xN, [xN]
2165 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui))
2166 .addDef(RegNo: Reg)
2167 .addUse(RegNo: Reg, Flags: RegState::Kill)
2168 .addImm(Val: 0);
2169 } else {
2170 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2171 // than 23760.
2172 // It might be nice to use AArch64::MOVi32imm here, which would get
2173 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2174 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2175 // AArch64FrameLowering might help us find such a scratch register
2176 // though. If we failed to find a scratch register, we could emit a
2177 // stream of add instructions to build up the immediate. Or, we could try
2178 // to insert a AArch64::MOVi32imm before register allocation so that we
2179 // didn't need to scavenge for a scratch register.
2180 report_fatal_error(reason: "Unable to encode Stack Protector Guard Offset");
2181 }
2182 MBB.erase(I: MI);
2183 return true;
2184 }
2185
2186 const GlobalValue *GV =
2187 cast<GlobalValue>(Val: (*MI.memoperands_begin())->getValue());
2188 const TargetMachine &TM = MBB.getParent()->getTarget();
2189 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2190 const unsigned char MO_NC = AArch64II::MO_NC;
2191
2192 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2193 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LOADgot), DestReg: Reg)
2194 .addGlobalAddress(GV, Offset: 0, TargetFlags: OpFlags);
2195 if (Subtarget.isTargetILP32()) {
2196 unsigned Reg32 = TRI->getSubReg(Reg, Idx: AArch64::sub_32);
2197 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRWui))
2198 .addDef(RegNo: Reg32, Flags: RegState::Dead)
2199 .addUse(RegNo: Reg, Flags: RegState::Kill)
2200 .addImm(Val: 0)
2201 .addMemOperand(MMO: *MI.memoperands_begin())
2202 .addDef(RegNo: Reg, Flags: RegState::Implicit);
2203 } else {
2204 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui), DestReg: Reg)
2205 .addReg(RegNo: Reg, flags: RegState::Kill)
2206 .addImm(Val: 0)
2207 .addMemOperand(MMO: *MI.memoperands_begin());
2208 }
2209 } else if (TM.getCodeModel() == CodeModel::Large) {
2210 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2211 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVZXi), DestReg: Reg)
2212 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_G0 | MO_NC)
2213 .addImm(Val: 0);
2214 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVKXi), DestReg: Reg)
2215 .addReg(RegNo: Reg, flags: RegState::Kill)
2216 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_G1 | MO_NC)
2217 .addImm(Val: 16);
2218 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVKXi), DestReg: Reg)
2219 .addReg(RegNo: Reg, flags: RegState::Kill)
2220 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_G2 | MO_NC)
2221 .addImm(Val: 32);
2222 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVKXi), DestReg: Reg)
2223 .addReg(RegNo: Reg, flags: RegState::Kill)
2224 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_G3)
2225 .addImm(Val: 48);
2226 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui), DestReg: Reg)
2227 .addReg(RegNo: Reg, flags: RegState::Kill)
2228 .addImm(Val: 0)
2229 .addMemOperand(MMO: *MI.memoperands_begin());
2230 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2231 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::ADR), DestReg: Reg)
2232 .addGlobalAddress(GV, Offset: 0, TargetFlags: OpFlags);
2233 } else {
2234 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::ADRP), DestReg: Reg)
2235 .addGlobalAddress(GV, Offset: 0, TargetFlags: OpFlags | AArch64II::MO_PAGE);
2236 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2237 if (Subtarget.isTargetILP32()) {
2238 unsigned Reg32 = TRI->getSubReg(Reg, Idx: AArch64::sub_32);
2239 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRWui))
2240 .addDef(RegNo: Reg32, Flags: RegState::Dead)
2241 .addUse(RegNo: Reg, Flags: RegState::Kill)
2242 .addGlobalAddress(GV, Offset: 0, TargetFlags: LoFlags)
2243 .addMemOperand(MMO: *MI.memoperands_begin())
2244 .addDef(RegNo: Reg, Flags: RegState::Implicit);
2245 } else {
2246 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui), DestReg: Reg)
2247 .addReg(RegNo: Reg, flags: RegState::Kill)
2248 .addGlobalAddress(GV, Offset: 0, TargetFlags: LoFlags)
2249 .addMemOperand(MMO: *MI.memoperands_begin());
2250 }
2251 }
2252
2253 MBB.erase(I: MI);
2254
2255 return true;
2256}
2257
2258// Return true if this instruction simply sets its single destination register
2259// to zero. This is equivalent to a register rename of the zero-register.
2260bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
2261 switch (MI.getOpcode()) {
2262 default:
2263 break;
2264 case AArch64::MOVZWi:
2265 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2266 if (MI.getOperand(i: 1).isImm() && MI.getOperand(i: 1).getImm() == 0) {
2267 assert(MI.getDesc().getNumOperands() == 3 &&
2268 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2269 return true;
2270 }
2271 break;
2272 case AArch64::ANDWri: // and Rd, Rzr, #imm
2273 return MI.getOperand(i: 1).getReg() == AArch64::WZR;
2274 case AArch64::ANDXri:
2275 return MI.getOperand(i: 1).getReg() == AArch64::XZR;
2276 case TargetOpcode::COPY:
2277 return MI.getOperand(i: 1).getReg() == AArch64::WZR;
2278 }
2279 return false;
2280}
2281
2282// Return true if this instruction simply renames a general register without
2283// modifying bits.
2284bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
2285 switch (MI.getOpcode()) {
2286 default:
2287 break;
2288 case TargetOpcode::COPY: {
2289 // GPR32 copies will by lowered to ORRXrs
2290 Register DstReg = MI.getOperand(i: 0).getReg();
2291 return (AArch64::GPR32RegClass.contains(Reg: DstReg) ||
2292 AArch64::GPR64RegClass.contains(Reg: DstReg));
2293 }
2294 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2295 if (MI.getOperand(i: 1).getReg() == AArch64::XZR) {
2296 assert(MI.getDesc().getNumOperands() == 4 &&
2297 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2298 return true;
2299 }
2300 break;
2301 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2302 if (MI.getOperand(i: 2).getImm() == 0) {
2303 assert(MI.getDesc().getNumOperands() == 4 &&
2304 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2305 return true;
2306 }
2307 break;
2308 }
2309 return false;
2310}
2311
2312// Return true if this instruction simply renames a general register without
2313// modifying bits.
2314bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
2315 switch (MI.getOpcode()) {
2316 default:
2317 break;
2318 case TargetOpcode::COPY: {
2319 Register DstReg = MI.getOperand(i: 0).getReg();
2320 return AArch64::FPR128RegClass.contains(Reg: DstReg);
2321 }
2322 case AArch64::ORRv16i8:
2323 if (MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg()) {
2324 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2325 "invalid ORRv16i8 operands");
2326 return true;
2327 }
2328 break;
2329 }
2330 return false;
2331}
2332
2333Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
2334 int &FrameIndex) const {
2335 switch (MI.getOpcode()) {
2336 default:
2337 break;
2338 case AArch64::LDRWui:
2339 case AArch64::LDRXui:
2340 case AArch64::LDRBui:
2341 case AArch64::LDRHui:
2342 case AArch64::LDRSui:
2343 case AArch64::LDRDui:
2344 case AArch64::LDRQui:
2345 case AArch64::LDR_PXI:
2346 if (MI.getOperand(i: 0).getSubReg() == 0 && MI.getOperand(i: 1).isFI() &&
2347 MI.getOperand(i: 2).isImm() && MI.getOperand(i: 2).getImm() == 0) {
2348 FrameIndex = MI.getOperand(i: 1).getIndex();
2349 return MI.getOperand(i: 0).getReg();
2350 }
2351 break;
2352 }
2353
2354 return 0;
2355}
2356
2357Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
2358 int &FrameIndex) const {
2359 switch (MI.getOpcode()) {
2360 default:
2361 break;
2362 case AArch64::STRWui:
2363 case AArch64::STRXui:
2364 case AArch64::STRBui:
2365 case AArch64::STRHui:
2366 case AArch64::STRSui:
2367 case AArch64::STRDui:
2368 case AArch64::STRQui:
2369 case AArch64::STR_PXI:
2370 if (MI.getOperand(i: 0).getSubReg() == 0 && MI.getOperand(i: 1).isFI() &&
2371 MI.getOperand(i: 2).isImm() && MI.getOperand(i: 2).getImm() == 0) {
2372 FrameIndex = MI.getOperand(i: 1).getIndex();
2373 return MI.getOperand(i: 0).getReg();
2374 }
2375 break;
2376 }
2377 return 0;
2378}
2379
2380/// Check all MachineMemOperands for a hint to suppress pairing.
2381bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
2382 return llvm::any_of(Range: MI.memoperands(), P: [](MachineMemOperand *MMO) {
2383 return MMO->getFlags() & MOSuppressPair;
2384 });
2385}
2386
2387/// Set a flag on the first MachineMemOperand to suppress pairing.
2388void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
2389 if (MI.memoperands_empty())
2390 return;
2391 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2392}
2393
2394/// Check all MachineMemOperands for a hint that the load/store is strided.
2395bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
2396 return llvm::any_of(Range: MI.memoperands(), P: [](MachineMemOperand *MMO) {
2397 return MMO->getFlags() & MOStridedAccess;
2398 });
2399}
2400
2401bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
2402 switch (Opc) {
2403 default:
2404 return false;
2405 case AArch64::STURSi:
2406 case AArch64::STRSpre:
2407 case AArch64::STURDi:
2408 case AArch64::STRDpre:
2409 case AArch64::STURQi:
2410 case AArch64::STRQpre:
2411 case AArch64::STURBBi:
2412 case AArch64::STURHHi:
2413 case AArch64::STURWi:
2414 case AArch64::STRWpre:
2415 case AArch64::STURXi:
2416 case AArch64::STRXpre:
2417 case AArch64::LDURSi:
2418 case AArch64::LDRSpre:
2419 case AArch64::LDURDi:
2420 case AArch64::LDRDpre:
2421 case AArch64::LDURQi:
2422 case AArch64::LDRQpre:
2423 case AArch64::LDURWi:
2424 case AArch64::LDRWpre:
2425 case AArch64::LDURXi:
2426 case AArch64::LDRXpre:
2427 case AArch64::LDRSWpre:
2428 case AArch64::LDURSWi:
2429 case AArch64::LDURHHi:
2430 case AArch64::LDURBBi:
2431 case AArch64::LDURSBWi:
2432 case AArch64::LDURSHWi:
2433 return true;
2434 }
2435}
2436
2437std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2438 switch (Opc) {
2439 default: return {};
2440 case AArch64::PRFMui: return AArch64::PRFUMi;
2441 case AArch64::LDRXui: return AArch64::LDURXi;
2442 case AArch64::LDRWui: return AArch64::LDURWi;
2443 case AArch64::LDRBui: return AArch64::LDURBi;
2444 case AArch64::LDRHui: return AArch64::LDURHi;
2445 case AArch64::LDRSui: return AArch64::LDURSi;
2446 case AArch64::LDRDui: return AArch64::LDURDi;
2447 case AArch64::LDRQui: return AArch64::LDURQi;
2448 case AArch64::LDRBBui: return AArch64::LDURBBi;
2449 case AArch64::LDRHHui: return AArch64::LDURHHi;
2450 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2451 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2452 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2453 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2454 case AArch64::LDRSWui: return AArch64::LDURSWi;
2455 case AArch64::STRXui: return AArch64::STURXi;
2456 case AArch64::STRWui: return AArch64::STURWi;
2457 case AArch64::STRBui: return AArch64::STURBi;
2458 case AArch64::STRHui: return AArch64::STURHi;
2459 case AArch64::STRSui: return AArch64::STURSi;
2460 case AArch64::STRDui: return AArch64::STURDi;
2461 case AArch64::STRQui: return AArch64::STURQi;
2462 case AArch64::STRBBui: return AArch64::STURBBi;
2463 case AArch64::STRHHui: return AArch64::STURHHi;
2464 }
2465}
2466
2467unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
2468 switch (Opc) {
2469 default:
2470 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2471 case AArch64::ADDG:
2472 case AArch64::LDAPURBi:
2473 case AArch64::LDAPURHi:
2474 case AArch64::LDAPURi:
2475 case AArch64::LDAPURSBWi:
2476 case AArch64::LDAPURSBXi:
2477 case AArch64::LDAPURSHWi:
2478 case AArch64::LDAPURSHXi:
2479 case AArch64::LDAPURSWi:
2480 case AArch64::LDAPURXi:
2481 case AArch64::LDR_PPXI:
2482 case AArch64::LDR_PXI:
2483 case AArch64::LDR_ZXI:
2484 case AArch64::LDR_ZZXI:
2485 case AArch64::LDR_ZZZXI:
2486 case AArch64::LDR_ZZZZXI:
2487 case AArch64::LDRBBui:
2488 case AArch64::LDRBui:
2489 case AArch64::LDRDui:
2490 case AArch64::LDRHHui:
2491 case AArch64::LDRHui:
2492 case AArch64::LDRQui:
2493 case AArch64::LDRSBWui:
2494 case AArch64::LDRSBXui:
2495 case AArch64::LDRSHWui:
2496 case AArch64::LDRSHXui:
2497 case AArch64::LDRSui:
2498 case AArch64::LDRSWui:
2499 case AArch64::LDRWui:
2500 case AArch64::LDRXui:
2501 case AArch64::LDURBBi:
2502 case AArch64::LDURBi:
2503 case AArch64::LDURDi:
2504 case AArch64::LDURHHi:
2505 case AArch64::LDURHi:
2506 case AArch64::LDURQi:
2507 case AArch64::LDURSBWi:
2508 case AArch64::LDURSBXi:
2509 case AArch64::LDURSHWi:
2510 case AArch64::LDURSHXi:
2511 case AArch64::LDURSi:
2512 case AArch64::LDURSWi:
2513 case AArch64::LDURWi:
2514 case AArch64::LDURXi:
2515 case AArch64::PRFMui:
2516 case AArch64::PRFUMi:
2517 case AArch64::ST2Gi:
2518 case AArch64::STGi:
2519 case AArch64::STLURBi:
2520 case AArch64::STLURHi:
2521 case AArch64::STLURWi:
2522 case AArch64::STLURXi:
2523 case AArch64::StoreSwiftAsyncContext:
2524 case AArch64::STR_PPXI:
2525 case AArch64::STR_PXI:
2526 case AArch64::STR_ZXI:
2527 case AArch64::STR_ZZXI:
2528 case AArch64::STR_ZZZXI:
2529 case AArch64::STR_ZZZZXI:
2530 case AArch64::STRBBui:
2531 case AArch64::STRBui:
2532 case AArch64::STRDui:
2533 case AArch64::STRHHui:
2534 case AArch64::STRHui:
2535 case AArch64::STRQui:
2536 case AArch64::STRSui:
2537 case AArch64::STRWui:
2538 case AArch64::STRXui:
2539 case AArch64::STURBBi:
2540 case AArch64::STURBi:
2541 case AArch64::STURDi:
2542 case AArch64::STURHHi:
2543 case AArch64::STURHi:
2544 case AArch64::STURQi:
2545 case AArch64::STURSi:
2546 case AArch64::STURWi:
2547 case AArch64::STURXi:
2548 case AArch64::STZ2Gi:
2549 case AArch64::STZGi:
2550 case AArch64::TAGPstack:
2551 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2552 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2553 return 2;
2554 case AArch64::LD1B_D_IMM:
2555 case AArch64::LD1B_H_IMM:
2556 case AArch64::LD1B_IMM:
2557 case AArch64::LD1B_S_IMM:
2558 case AArch64::LD1D_IMM:
2559 case AArch64::LD1H_D_IMM:
2560 case AArch64::LD1H_IMM:
2561 case AArch64::LD1H_S_IMM:
2562 case AArch64::LD1RB_D_IMM:
2563 case AArch64::LD1RB_H_IMM:
2564 case AArch64::LD1RB_IMM:
2565 case AArch64::LD1RB_S_IMM:
2566 case AArch64::LD1RD_IMM:
2567 case AArch64::LD1RH_D_IMM:
2568 case AArch64::LD1RH_IMM:
2569 case AArch64::LD1RH_S_IMM:
2570 case AArch64::LD1RSB_D_IMM:
2571 case AArch64::LD1RSB_H_IMM:
2572 case AArch64::LD1RSB_S_IMM:
2573 case AArch64::LD1RSH_D_IMM:
2574 case AArch64::LD1RSH_S_IMM:
2575 case AArch64::LD1RSW_IMM:
2576 case AArch64::LD1RW_D_IMM:
2577 case AArch64::LD1RW_IMM:
2578 case AArch64::LD1SB_D_IMM:
2579 case AArch64::LD1SB_H_IMM:
2580 case AArch64::LD1SB_S_IMM:
2581 case AArch64::LD1SH_D_IMM:
2582 case AArch64::LD1SH_S_IMM:
2583 case AArch64::LD1SW_D_IMM:
2584 case AArch64::LD1W_D_IMM:
2585 case AArch64::LD1W_IMM:
2586 case AArch64::LD2B_IMM:
2587 case AArch64::LD2D_IMM:
2588 case AArch64::LD2H_IMM:
2589 case AArch64::LD2W_IMM:
2590 case AArch64::LD3B_IMM:
2591 case AArch64::LD3D_IMM:
2592 case AArch64::LD3H_IMM:
2593 case AArch64::LD3W_IMM:
2594 case AArch64::LD4B_IMM:
2595 case AArch64::LD4D_IMM:
2596 case AArch64::LD4H_IMM:
2597 case AArch64::LD4W_IMM:
2598 case AArch64::LDG:
2599 case AArch64::LDNF1B_D_IMM:
2600 case AArch64::LDNF1B_H_IMM:
2601 case AArch64::LDNF1B_IMM:
2602 case AArch64::LDNF1B_S_IMM:
2603 case AArch64::LDNF1D_IMM:
2604 case AArch64::LDNF1H_D_IMM:
2605 case AArch64::LDNF1H_IMM:
2606 case AArch64::LDNF1H_S_IMM:
2607 case AArch64::LDNF1SB_D_IMM:
2608 case AArch64::LDNF1SB_H_IMM:
2609 case AArch64::LDNF1SB_S_IMM:
2610 case AArch64::LDNF1SH_D_IMM:
2611 case AArch64::LDNF1SH_S_IMM:
2612 case AArch64::LDNF1SW_D_IMM:
2613 case AArch64::LDNF1W_D_IMM:
2614 case AArch64::LDNF1W_IMM:
2615 case AArch64::LDNPDi:
2616 case AArch64::LDNPQi:
2617 case AArch64::LDNPSi:
2618 case AArch64::LDNPWi:
2619 case AArch64::LDNPXi:
2620 case AArch64::LDNT1B_ZRI:
2621 case AArch64::LDNT1D_ZRI:
2622 case AArch64::LDNT1H_ZRI:
2623 case AArch64::LDNT1W_ZRI:
2624 case AArch64::LDPDi:
2625 case AArch64::LDPQi:
2626 case AArch64::LDPSi:
2627 case AArch64::LDPWi:
2628 case AArch64::LDPXi:
2629 case AArch64::LDRBBpost:
2630 case AArch64::LDRBBpre:
2631 case AArch64::LDRBpost:
2632 case AArch64::LDRBpre:
2633 case AArch64::LDRDpost:
2634 case AArch64::LDRDpre:
2635 case AArch64::LDRHHpost:
2636 case AArch64::LDRHHpre:
2637 case AArch64::LDRHpost:
2638 case AArch64::LDRHpre:
2639 case AArch64::LDRQpost:
2640 case AArch64::LDRQpre:
2641 case AArch64::LDRSpost:
2642 case AArch64::LDRSpre:
2643 case AArch64::LDRWpost:
2644 case AArch64::LDRWpre:
2645 case AArch64::LDRXpost:
2646 case AArch64::LDRXpre:
2647 case AArch64::ST1B_D_IMM:
2648 case AArch64::ST1B_H_IMM:
2649 case AArch64::ST1B_IMM:
2650 case AArch64::ST1B_S_IMM:
2651 case AArch64::ST1D_IMM:
2652 case AArch64::ST1H_D_IMM:
2653 case AArch64::ST1H_IMM:
2654 case AArch64::ST1H_S_IMM:
2655 case AArch64::ST1W_D_IMM:
2656 case AArch64::ST1W_IMM:
2657 case AArch64::ST2B_IMM:
2658 case AArch64::ST2D_IMM:
2659 case AArch64::ST2H_IMM:
2660 case AArch64::ST2W_IMM:
2661 case AArch64::ST3B_IMM:
2662 case AArch64::ST3D_IMM:
2663 case AArch64::ST3H_IMM:
2664 case AArch64::ST3W_IMM:
2665 case AArch64::ST4B_IMM:
2666 case AArch64::ST4D_IMM:
2667 case AArch64::ST4H_IMM:
2668 case AArch64::ST4W_IMM:
2669 case AArch64::STGPi:
2670 case AArch64::STGPreIndex:
2671 case AArch64::STZGPreIndex:
2672 case AArch64::ST2GPreIndex:
2673 case AArch64::STZ2GPreIndex:
2674 case AArch64::STGPostIndex:
2675 case AArch64::STZGPostIndex:
2676 case AArch64::ST2GPostIndex:
2677 case AArch64::STZ2GPostIndex:
2678 case AArch64::STNPDi:
2679 case AArch64::STNPQi:
2680 case AArch64::STNPSi:
2681 case AArch64::STNPWi:
2682 case AArch64::STNPXi:
2683 case AArch64::STNT1B_ZRI:
2684 case AArch64::STNT1D_ZRI:
2685 case AArch64::STNT1H_ZRI:
2686 case AArch64::STNT1W_ZRI:
2687 case AArch64::STPDi:
2688 case AArch64::STPQi:
2689 case AArch64::STPSi:
2690 case AArch64::STPWi:
2691 case AArch64::STPXi:
2692 case AArch64::STRBBpost:
2693 case AArch64::STRBBpre:
2694 case AArch64::STRBpost:
2695 case AArch64::STRBpre:
2696 case AArch64::STRDpost:
2697 case AArch64::STRDpre:
2698 case AArch64::STRHHpost:
2699 case AArch64::STRHHpre:
2700 case AArch64::STRHpost:
2701 case AArch64::STRHpre:
2702 case AArch64::STRQpost:
2703 case AArch64::STRQpre:
2704 case AArch64::STRSpost:
2705 case AArch64::STRSpre:
2706 case AArch64::STRWpost:
2707 case AArch64::STRWpre:
2708 case AArch64::STRXpost:
2709 case AArch64::STRXpre:
2710 return 3;
2711 case AArch64::LDPDpost:
2712 case AArch64::LDPDpre:
2713 case AArch64::LDPQpost:
2714 case AArch64::LDPQpre:
2715 case AArch64::LDPSpost:
2716 case AArch64::LDPSpre:
2717 case AArch64::LDPWpost:
2718 case AArch64::LDPWpre:
2719 case AArch64::LDPXpost:
2720 case AArch64::LDPXpre:
2721 case AArch64::STGPpre:
2722 case AArch64::STGPpost:
2723 case AArch64::STPDpost:
2724 case AArch64::STPDpre:
2725 case AArch64::STPQpost:
2726 case AArch64::STPQpre:
2727 case AArch64::STPSpost:
2728 case AArch64::STPSpre:
2729 case AArch64::STPWpost:
2730 case AArch64::STPWpre:
2731 case AArch64::STPXpost:
2732 case AArch64::STPXpre:
2733 return 4;
2734 }
2735}
2736
2737bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
2738 switch (MI.getOpcode()) {
2739 default:
2740 return false;
2741 // Scaled instructions.
2742 case AArch64::STRSui:
2743 case AArch64::STRDui:
2744 case AArch64::STRQui:
2745 case AArch64::STRXui:
2746 case AArch64::STRWui:
2747 case AArch64::LDRSui:
2748 case AArch64::LDRDui:
2749 case AArch64::LDRQui:
2750 case AArch64::LDRXui:
2751 case AArch64::LDRWui:
2752 case AArch64::LDRSWui:
2753 // Unscaled instructions.
2754 case AArch64::STURSi:
2755 case AArch64::STRSpre:
2756 case AArch64::STURDi:
2757 case AArch64::STRDpre:
2758 case AArch64::STURQi:
2759 case AArch64::STRQpre:
2760 case AArch64::STURWi:
2761 case AArch64::STRWpre:
2762 case AArch64::STURXi:
2763 case AArch64::STRXpre:
2764 case AArch64::LDURSi:
2765 case AArch64::LDRSpre:
2766 case AArch64::LDURDi:
2767 case AArch64::LDRDpre:
2768 case AArch64::LDURQi:
2769 case AArch64::LDRQpre:
2770 case AArch64::LDURWi:
2771 case AArch64::LDRWpre:
2772 case AArch64::LDURXi:
2773 case AArch64::LDRXpre:
2774 case AArch64::LDURSWi:
2775 case AArch64::LDRSWpre:
2776 // SVE instructions.
2777 case AArch64::LDR_ZXI:
2778 case AArch64::STR_ZXI:
2779 return true;
2780 }
2781}
2782
2783bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
2784 switch (MI.getOpcode()) {
2785 default:
2786 assert((!MI.isCall() || !MI.isReturn()) &&
2787 "Unexpected instruction - was a new tail call opcode introduced?");
2788 return false;
2789 case AArch64::TCRETURNdi:
2790 case AArch64::TCRETURNri:
2791 case AArch64::TCRETURNrix16x17:
2792 case AArch64::TCRETURNrix17:
2793 case AArch64::TCRETURNrinotx16:
2794 case AArch64::TCRETURNriALL:
2795 case AArch64::AUTH_TCRETURN:
2796 case AArch64::AUTH_TCRETURN_BTI:
2797 return true;
2798 }
2799}
2800
2801unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
2802 switch (Opc) {
2803 default:
2804 llvm_unreachable("Opcode has no flag setting equivalent!");
2805 // 32-bit cases:
2806 case AArch64::ADDWri:
2807 return AArch64::ADDSWri;
2808 case AArch64::ADDWrr:
2809 return AArch64::ADDSWrr;
2810 case AArch64::ADDWrs:
2811 return AArch64::ADDSWrs;
2812 case AArch64::ADDWrx:
2813 return AArch64::ADDSWrx;
2814 case AArch64::ANDWri:
2815 return AArch64::ANDSWri;
2816 case AArch64::ANDWrr:
2817 return AArch64::ANDSWrr;
2818 case AArch64::ANDWrs:
2819 return AArch64::ANDSWrs;
2820 case AArch64::BICWrr:
2821 return AArch64::BICSWrr;
2822 case AArch64::BICWrs:
2823 return AArch64::BICSWrs;
2824 case AArch64::SUBWri:
2825 return AArch64::SUBSWri;
2826 case AArch64::SUBWrr:
2827 return AArch64::SUBSWrr;
2828 case AArch64::SUBWrs:
2829 return AArch64::SUBSWrs;
2830 case AArch64::SUBWrx:
2831 return AArch64::SUBSWrx;
2832 // 64-bit cases:
2833 case AArch64::ADDXri:
2834 return AArch64::ADDSXri;
2835 case AArch64::ADDXrr:
2836 return AArch64::ADDSXrr;
2837 case AArch64::ADDXrs:
2838 return AArch64::ADDSXrs;
2839 case AArch64::ADDXrx:
2840 return AArch64::ADDSXrx;
2841 case AArch64::ANDXri:
2842 return AArch64::ANDSXri;
2843 case AArch64::ANDXrr:
2844 return AArch64::ANDSXrr;
2845 case AArch64::ANDXrs:
2846 return AArch64::ANDSXrs;
2847 case AArch64::BICXrr:
2848 return AArch64::BICSXrr;
2849 case AArch64::BICXrs:
2850 return AArch64::BICSXrs;
2851 case AArch64::SUBXri:
2852 return AArch64::SUBSXri;
2853 case AArch64::SUBXrr:
2854 return AArch64::SUBSXrr;
2855 case AArch64::SUBXrs:
2856 return AArch64::SUBSXrs;
2857 case AArch64::SUBXrx:
2858 return AArch64::SUBSXrx;
2859 // SVE instructions:
2860 case AArch64::AND_PPzPP:
2861 return AArch64::ANDS_PPzPP;
2862 case AArch64::BIC_PPzPP:
2863 return AArch64::BICS_PPzPP;
2864 case AArch64::EOR_PPzPP:
2865 return AArch64::EORS_PPzPP;
2866 case AArch64::NAND_PPzPP:
2867 return AArch64::NANDS_PPzPP;
2868 case AArch64::NOR_PPzPP:
2869 return AArch64::NORS_PPzPP;
2870 case AArch64::ORN_PPzPP:
2871 return AArch64::ORNS_PPzPP;
2872 case AArch64::ORR_PPzPP:
2873 return AArch64::ORRS_PPzPP;
2874 case AArch64::BRKA_PPzP:
2875 return AArch64::BRKAS_PPzP;
2876 case AArch64::BRKPA_PPzPP:
2877 return AArch64::BRKPAS_PPzPP;
2878 case AArch64::BRKB_PPzP:
2879 return AArch64::BRKBS_PPzP;
2880 case AArch64::BRKPB_PPzPP:
2881 return AArch64::BRKPBS_PPzPP;
2882 case AArch64::BRKN_PPzP:
2883 return AArch64::BRKNS_PPzP;
2884 case AArch64::RDFFR_PPz:
2885 return AArch64::RDFFRS_PPz;
2886 case AArch64::PTRUE_B:
2887 return AArch64::PTRUES_B;
2888 }
2889}
2890
2891// Is this a candidate for ld/st merging or pairing? For example, we don't
2892// touch volatiles or load/stores that have a hint to avoid pair formation.
2893bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
2894
2895 bool IsPreLdSt = isPreLdSt(MI);
2896
2897 // If this is a volatile load/store, don't mess with it.
2898 if (MI.hasOrderedMemoryRef())
2899 return false;
2900
2901 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2902 // For Pre-inc LD/ST, the operand is shifted by one.
2903 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2904 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2905 "Expected a reg or frame index operand.");
2906
2907 // For Pre-indexed addressing quadword instructions, the third operand is the
2908 // immediate value.
2909 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(i: 3).isImm();
2910
2911 if (!MI.getOperand(i: 2).isImm() && !IsImmPreLdSt)
2912 return false;
2913
2914 // Can't merge/pair if the instruction modifies the base register.
2915 // e.g., ldr x0, [x0]
2916 // This case will never occur with an FI base.
2917 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2918 // STR<S,D,Q,W,X>pre, it can be merged.
2919 // For example:
2920 // ldr q0, [x11, #32]!
2921 // ldr q1, [x11, #16]
2922 // to
2923 // ldp q0, q1, [x11, #32]!
2924 if (MI.getOperand(i: 1).isReg() && !IsPreLdSt) {
2925 Register BaseReg = MI.getOperand(i: 1).getReg();
2926 const TargetRegisterInfo *TRI = &getRegisterInfo();
2927 if (MI.modifiesRegister(Reg: BaseReg, TRI))
2928 return false;
2929 }
2930
2931 // Pairing SVE fills/spills is only valid for little-endian targets that
2932 // implement VLS 128.
2933 switch (MI.getOpcode()) {
2934 default:
2935 break;
2936 case AArch64::LDR_ZXI:
2937 case AArch64::STR_ZXI:
2938 if (!Subtarget.isLittleEndian() ||
2939 Subtarget.getSVEVectorSizeInBits() != 128)
2940 return false;
2941 }
2942
2943 // Check if this load/store has a hint to avoid pair formation.
2944 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2945 if (isLdStPairSuppressed(MI))
2946 return false;
2947
2948 // Do not pair any callee-save store/reload instructions in the
2949 // prologue/epilogue if the CFI information encoded the operations as separate
2950 // instructions, as that will cause the size of the actual prologue to mismatch
2951 // with the prologue size recorded in the Windows CFI.
2952 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2953 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2954 MI.getMF()->getFunction().needsUnwindTableEntry();
2955 if (NeedsWinCFI && (MI.getFlag(Flag: MachineInstr::FrameSetup) ||
2956 MI.getFlag(Flag: MachineInstr::FrameDestroy)))
2957 return false;
2958
2959 // On some CPUs quad load/store pairs are slower than two single load/stores.
2960 if (Subtarget.isPaired128Slow()) {
2961 switch (MI.getOpcode()) {
2962 default:
2963 break;
2964 case AArch64::LDURQi:
2965 case AArch64::STURQi:
2966 case AArch64::LDRQui:
2967 case AArch64::STRQui:
2968 return false;
2969 }
2970 }
2971
2972 return true;
2973}
2974
2975bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2976 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2977 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2978 const TargetRegisterInfo *TRI) const {
2979 if (!LdSt.mayLoadOrStore())
2980 return false;
2981
2982 const MachineOperand *BaseOp;
2983 TypeSize WidthN(0, false);
2984 if (!getMemOperandWithOffsetWidth(MI: LdSt, BaseOp, Offset, OffsetIsScalable,
2985 Width&: WidthN, TRI))
2986 return false;
2987 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2988 // vector.
2989 Width = LocationSize::precise(Value: WidthN);
2990 BaseOps.push_back(Elt: BaseOp);
2991 return true;
2992}
2993
2994std::optional<ExtAddrMode>
2995AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
2996 const TargetRegisterInfo *TRI) const {
2997 const MachineOperand *Base; // Filled with the base operand of MI.
2998 int64_t Offset; // Filled with the offset of MI.
2999 bool OffsetIsScalable;
3000 if (!getMemOperandWithOffset(MI: MemI, BaseOp&: Base, Offset, OffsetIsScalable, TRI))
3001 return std::nullopt;
3002
3003 if (!Base->isReg())
3004 return std::nullopt;
3005 ExtAddrMode AM;
3006 AM.BaseReg = Base->getReg();
3007 AM.Displacement = Offset;
3008 AM.ScaledReg = 0;
3009 AM.Scale = 0;
3010 return AM;
3011}
3012
3013bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
3014 Register Reg,
3015 const MachineInstr &AddrI,
3016 ExtAddrMode &AM) const {
3017 // Filter out instructions into which we cannot fold.
3018 unsigned NumBytes;
3019 int64_t OffsetScale = 1;
3020 switch (MemI.getOpcode()) {
3021 default:
3022 return false;
3023
3024 case AArch64::LDURQi:
3025 case AArch64::STURQi:
3026 NumBytes = 16;
3027 break;
3028
3029 case AArch64::LDURDi:
3030 case AArch64::STURDi:
3031 case AArch64::LDURXi:
3032 case AArch64::STURXi:
3033 NumBytes = 8;
3034 break;
3035
3036 case AArch64::LDURWi:
3037 case AArch64::LDURSWi:
3038 case AArch64::STURWi:
3039 NumBytes = 4;
3040 break;
3041
3042 case AArch64::LDURHi:
3043 case AArch64::STURHi:
3044 case AArch64::LDURHHi:
3045 case AArch64::STURHHi:
3046 case AArch64::LDURSHXi:
3047 case AArch64::LDURSHWi:
3048 NumBytes = 2;
3049 break;
3050
3051 case AArch64::LDRBroX:
3052 case AArch64::LDRBBroX:
3053 case AArch64::LDRSBXroX:
3054 case AArch64::LDRSBWroX:
3055 case AArch64::STRBroX:
3056 case AArch64::STRBBroX:
3057 case AArch64::LDURBi:
3058 case AArch64::LDURBBi:
3059 case AArch64::LDURSBXi:
3060 case AArch64::LDURSBWi:
3061 case AArch64::STURBi:
3062 case AArch64::STURBBi:
3063 case AArch64::LDRBui:
3064 case AArch64::LDRBBui:
3065 case AArch64::LDRSBXui:
3066 case AArch64::LDRSBWui:
3067 case AArch64::STRBui:
3068 case AArch64::STRBBui:
3069 NumBytes = 1;
3070 break;
3071
3072 case AArch64::LDRQroX:
3073 case AArch64::STRQroX:
3074 case AArch64::LDRQui:
3075 case AArch64::STRQui:
3076 NumBytes = 16;
3077 OffsetScale = 16;
3078 break;
3079
3080 case AArch64::LDRDroX:
3081 case AArch64::STRDroX:
3082 case AArch64::LDRXroX:
3083 case AArch64::STRXroX:
3084 case AArch64::LDRDui:
3085 case AArch64::STRDui:
3086 case AArch64::LDRXui:
3087 case AArch64::STRXui:
3088 NumBytes = 8;
3089 OffsetScale = 8;
3090 break;
3091
3092 case AArch64::LDRWroX:
3093 case AArch64::LDRSWroX:
3094 case AArch64::STRWroX:
3095 case AArch64::LDRWui:
3096 case AArch64::LDRSWui:
3097 case AArch64::STRWui:
3098 NumBytes = 4;
3099 OffsetScale = 4;
3100 break;
3101
3102 case AArch64::LDRHroX:
3103 case AArch64::STRHroX:
3104 case AArch64::LDRHHroX:
3105 case AArch64::STRHHroX:
3106 case AArch64::LDRSHXroX:
3107 case AArch64::LDRSHWroX:
3108 case AArch64::LDRHui:
3109 case AArch64::STRHui:
3110 case AArch64::LDRHHui:
3111 case AArch64::STRHHui:
3112 case AArch64::LDRSHXui:
3113 case AArch64::LDRSHWui:
3114 NumBytes = 2;
3115 OffsetScale = 2;
3116 break;
3117 }
3118
3119 // Check the fold operand is not the loaded/stored value.
3120 const MachineOperand &BaseRegOp = MemI.getOperand(i: 0);
3121 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3122 return false;
3123
3124 // Handle memory instructions with a [Reg, Reg] addressing mode.
3125 if (MemI.getOperand(i: 2).isReg()) {
3126 // Bail if the addressing mode already includes extension of the offset
3127 // register.
3128 if (MemI.getOperand(i: 3).getImm())
3129 return false;
3130
3131 // Check if we actually have a scaled offset.
3132 if (MemI.getOperand(i: 4).getImm() == 0)
3133 OffsetScale = 1;
3134
3135 // If the address instructions is folded into the base register, then the
3136 // addressing mode must not have a scale. Then we can swap the base and the
3137 // scaled registers.
3138 if (MemI.getOperand(i: 1).getReg() == Reg && OffsetScale != 1)
3139 return false;
3140
3141 switch (AddrI.getOpcode()) {
3142 default:
3143 return false;
3144
3145 case AArch64::SBFMXri:
3146 // sxtw Xa, Wm
3147 // ldr Xd, [Xn, Xa, lsl #N]
3148 // ->
3149 // ldr Xd, [Xn, Wm, sxtw #N]
3150 if (AddrI.getOperand(i: 2).getImm() != 0 ||
3151 AddrI.getOperand(i: 3).getImm() != 31)
3152 return false;
3153
3154 AM.BaseReg = MemI.getOperand(i: 1).getReg();
3155 if (AM.BaseReg == Reg)
3156 AM.BaseReg = MemI.getOperand(i: 2).getReg();
3157 AM.ScaledReg = AddrI.getOperand(i: 1).getReg();
3158 AM.Scale = OffsetScale;
3159 AM.Displacement = 0;
3160 AM.Form = ExtAddrMode::Formula::SExtScaledReg;
3161 return true;
3162
3163 case TargetOpcode::SUBREG_TO_REG: {
3164 // mov Wa, Wm
3165 // ldr Xd, [Xn, Xa, lsl #N]
3166 // ->
3167 // ldr Xd, [Xn, Wm, uxtw #N]
3168
3169 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3170 if (AddrI.getOperand(i: 1).getImm() != 0 ||
3171 AddrI.getOperand(i: 3).getImm() != AArch64::sub_32)
3172 return false;
3173
3174 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3175 Register OffsetReg = AddrI.getOperand(i: 2).getReg();
3176 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(RegNo: OffsetReg))
3177 return false;
3178
3179 const MachineInstr &DefMI = *MRI.getVRegDef(Reg: OffsetReg);
3180 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3181 DefMI.getOperand(i: 1).getReg() != AArch64::WZR ||
3182 DefMI.getOperand(i: 3).getImm() != 0)
3183 return false;
3184
3185 AM.BaseReg = MemI.getOperand(i: 1).getReg();
3186 if (AM.BaseReg == Reg)
3187 AM.BaseReg = MemI.getOperand(i: 2).getReg();
3188 AM.ScaledReg = DefMI.getOperand(i: 2).getReg();
3189 AM.Scale = OffsetScale;
3190 AM.Displacement = 0;
3191 AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
3192 return true;
3193 }
3194 }
3195 }
3196
3197 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3198
3199 // Check we are not breaking a potential conversion to an LDP.
3200 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3201 int64_t NewOffset) -> bool {
3202 int64_t MinOffset, MaxOffset;
3203 switch (NumBytes) {
3204 default:
3205 return true;
3206 case 4:
3207 MinOffset = -256;
3208 MaxOffset = 252;
3209 break;
3210 case 8:
3211 MinOffset = -512;
3212 MaxOffset = 504;
3213 break;
3214 case 16:
3215 MinOffset = -1024;
3216 MaxOffset = 1008;
3217 break;
3218 }
3219 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3220 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3221 };
3222 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3223 int64_t OldOffset = MemI.getOperand(i: 2).getImm() * OffsetScale;
3224 int64_t NewOffset = OldOffset + Disp;
3225 if (!isLegalAddressingMode(NumBytes, Offset: NewOffset, /* Scale */ 0))
3226 return false;
3227 // If the old offset would fit into an LDP, but the new offset wouldn't,
3228 // bail out.
3229 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3230 return false;
3231 AM.BaseReg = AddrI.getOperand(i: 1).getReg();
3232 AM.ScaledReg = 0;
3233 AM.Scale = 0;
3234 AM.Displacement = NewOffset;
3235 AM.Form = ExtAddrMode::Formula::Basic;
3236 return true;
3237 };
3238
3239 auto canFoldAddRegIntoAddrMode =
3240 [&](int64_t Scale,
3241 ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
3242 if (MemI.getOperand(i: 2).getImm() != 0)
3243 return false;
3244 if ((unsigned)Scale != Scale)
3245 return false;
3246 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3247 return false;
3248 AM.BaseReg = AddrI.getOperand(i: 1).getReg();
3249 AM.ScaledReg = AddrI.getOperand(i: 2).getReg();
3250 AM.Scale = Scale;
3251 AM.Displacement = 0;
3252 AM.Form = Form;
3253 return true;
3254 };
3255
3256 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3257 unsigned Opcode = MemI.getOpcode();
3258 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3259 Subtarget.isSTRQroSlow();
3260 };
3261
3262 int64_t Disp = 0;
3263 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3264 switch (AddrI.getOpcode()) {
3265 default:
3266 return false;
3267
3268 case AArch64::ADDXri:
3269 // add Xa, Xn, #N
3270 // ldr Xd, [Xa, #M]
3271 // ->
3272 // ldr Xd, [Xn, #N'+M]
3273 Disp = AddrI.getOperand(i: 2).getImm() << AddrI.getOperand(i: 3).getImm();
3274 return canFoldAddSubImmIntoAddrMode(Disp);
3275
3276 case AArch64::SUBXri:
3277 // sub Xa, Xn, #N
3278 // ldr Xd, [Xa, #M]
3279 // ->
3280 // ldr Xd, [Xn, #N'+M]
3281 Disp = AddrI.getOperand(i: 2).getImm() << AddrI.getOperand(i: 3).getImm();
3282 return canFoldAddSubImmIntoAddrMode(-Disp);
3283
3284 case AArch64::ADDXrs: {
3285 // add Xa, Xn, Xm, lsl #N
3286 // ldr Xd, [Xa]
3287 // ->
3288 // ldr Xd, [Xn, Xm, lsl #N]
3289
3290 // Don't fold the add if the result would be slower, unless optimising for
3291 // size.
3292 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(i: 3).getImm());
3293 if (AArch64_AM::getShiftType(Imm: Shift) != AArch64_AM::ShiftExtendType::LSL)
3294 return false;
3295 Shift = AArch64_AM::getShiftValue(Imm: Shift);
3296 if (!OptSize) {
3297 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3298 return false;
3299 if (avoidSlowSTRQ(MemI))
3300 return false;
3301 }
3302 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3303 }
3304
3305 case AArch64::ADDXrr:
3306 // add Xa, Xn, Xm
3307 // ldr Xd, [Xa]
3308 // ->
3309 // ldr Xd, [Xn, Xm, lsl #0]
3310
3311 // Don't fold the add if the result would be slower, unless optimising for
3312 // size.
3313 if (!OptSize && avoidSlowSTRQ(MemI))
3314 return false;
3315 return canFoldAddRegIntoAddrMode(1);
3316
3317 case AArch64::ADDXrx:
3318 // add Xa, Xn, Wm, {s,u}xtw #N
3319 // ldr Xd, [Xa]
3320 // ->
3321 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3322
3323 // Don't fold the add if the result would be slower, unless optimising for
3324 // size.
3325 if (!OptSize && avoidSlowSTRQ(MemI))
3326 return false;
3327
3328 // Can fold only sign-/zero-extend of a word.
3329 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(i: 3).getImm());
3330 AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
3331 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3332 return false;
3333
3334 return canFoldAddRegIntoAddrMode(
3335 1ULL << AArch64_AM::getArithShiftValue(Imm),
3336 (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
3337 : ExtAddrMode::Formula::ZExtScaledReg);
3338 }
3339}
3340
3341// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3342// return the opcode of an instruction performing the same operation, but using
3343// the [Reg, Reg] addressing mode.
3344static unsigned regOffsetOpcode(unsigned Opcode) {
3345 switch (Opcode) {
3346 default:
3347 llvm_unreachable("Address folding not implemented for instruction");
3348
3349 case AArch64::LDURQi:
3350 case AArch64::LDRQui:
3351 return AArch64::LDRQroX;
3352 case AArch64::STURQi:
3353 case AArch64::STRQui:
3354 return AArch64::STRQroX;
3355 case AArch64::LDURDi:
3356 case AArch64::LDRDui:
3357 return AArch64::LDRDroX;
3358 case AArch64::STURDi:
3359 case AArch64::STRDui:
3360 return AArch64::STRDroX;
3361 case AArch64::LDURXi:
3362 case AArch64::LDRXui:
3363 return AArch64::LDRXroX;
3364 case AArch64::STURXi:
3365 case AArch64::STRXui:
3366 return AArch64::STRXroX;
3367 case AArch64::LDURWi:
3368 case AArch64::LDRWui:
3369 return AArch64::LDRWroX;
3370 case AArch64::LDURSWi:
3371 case AArch64::LDRSWui:
3372 return AArch64::LDRSWroX;
3373 case AArch64::STURWi:
3374 case AArch64::STRWui:
3375 return AArch64::STRWroX;
3376 case AArch64::LDURHi:
3377 case AArch64::LDRHui:
3378 return AArch64::LDRHroX;
3379 case AArch64::STURHi:
3380 case AArch64::STRHui:
3381 return AArch64::STRHroX;
3382 case AArch64::LDURHHi:
3383 case AArch64::LDRHHui:
3384 return AArch64::LDRHHroX;
3385 case AArch64::STURHHi:
3386 case AArch64::STRHHui:
3387 return AArch64::STRHHroX;
3388 case AArch64::LDURSHXi:
3389 case AArch64::LDRSHXui:
3390 return AArch64::LDRSHXroX;
3391 case AArch64::LDURSHWi:
3392 case AArch64::LDRSHWui:
3393 return AArch64::LDRSHWroX;
3394 case AArch64::LDURBi:
3395 case AArch64::LDRBui:
3396 return AArch64::LDRBroX;
3397 case AArch64::LDURBBi:
3398 case AArch64::LDRBBui:
3399 return AArch64::LDRBBroX;
3400 case AArch64::LDURSBXi:
3401 case AArch64::LDRSBXui:
3402 return AArch64::LDRSBXroX;
3403 case AArch64::LDURSBWi:
3404 case AArch64::LDRSBWui:
3405 return AArch64::LDRSBWroX;
3406 case AArch64::STURBi:
3407 case AArch64::STRBui:
3408 return AArch64::STRBroX;
3409 case AArch64::STURBBi:
3410 case AArch64::STRBBui:
3411 return AArch64::STRBBroX;
3412 }
3413}
3414
3415// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3416// the opcode of an instruction performing the same operation, but using the
3417// [Reg, #Imm] addressing mode with scaled offset.
3418unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3419 switch (Opcode) {
3420 default:
3421 llvm_unreachable("Address folding not implemented for instruction");
3422
3423 case AArch64::LDURQi:
3424 Scale = 16;
3425 return AArch64::LDRQui;
3426 case AArch64::STURQi:
3427 Scale = 16;
3428 return AArch64::STRQui;
3429 case AArch64::LDURDi:
3430 Scale = 8;
3431 return AArch64::LDRDui;
3432 case AArch64::STURDi:
3433 Scale = 8;
3434 return AArch64::STRDui;
3435 case AArch64::LDURXi:
3436 Scale = 8;
3437 return AArch64::LDRXui;
3438 case AArch64::STURXi:
3439 Scale = 8;
3440 return AArch64::STRXui;
3441 case AArch64::LDURWi:
3442 Scale = 4;
3443 return AArch64::LDRWui;
3444 case AArch64::LDURSWi:
3445 Scale = 4;
3446 return AArch64::LDRSWui;
3447 case AArch64::STURWi:
3448 Scale = 4;
3449 return AArch64::STRWui;
3450 case AArch64::LDURHi:
3451 Scale = 2;
3452 return AArch64::LDRHui;
3453 case AArch64::STURHi:
3454 Scale = 2;
3455 return AArch64::STRHui;
3456 case AArch64::LDURHHi:
3457 Scale = 2;
3458 return AArch64::LDRHHui;
3459 case AArch64::STURHHi:
3460 Scale = 2;
3461 return AArch64::STRHHui;
3462 case AArch64::LDURSHXi:
3463 Scale = 2;
3464 return AArch64::LDRSHXui;
3465 case AArch64::LDURSHWi:
3466 Scale = 2;
3467 return AArch64::LDRSHWui;
3468 case AArch64::LDURBi:
3469 Scale = 1;
3470 return AArch64::LDRBui;
3471 case AArch64::LDURBBi:
3472 Scale = 1;
3473 return AArch64::LDRBBui;
3474 case AArch64::LDURSBXi:
3475 Scale = 1;
3476 return AArch64::LDRSBXui;
3477 case AArch64::LDURSBWi:
3478 Scale = 1;
3479 return AArch64::LDRSBWui;
3480 case AArch64::STURBi:
3481 Scale = 1;
3482 return AArch64::STRBui;
3483 case AArch64::STURBBi:
3484 Scale = 1;
3485 return AArch64::STRBBui;
3486 case AArch64::LDRQui:
3487 case AArch64::STRQui:
3488 Scale = 16;
3489 return Opcode;
3490 case AArch64::LDRDui:
3491 case AArch64::STRDui:
3492 case AArch64::LDRXui:
3493 case AArch64::STRXui:
3494 Scale = 8;
3495 return Opcode;
3496 case AArch64::LDRWui:
3497 case AArch64::LDRSWui:
3498 case AArch64::STRWui:
3499 Scale = 4;
3500 return Opcode;
3501 case AArch64::LDRHui:
3502 case AArch64::STRHui:
3503 case AArch64::LDRHHui:
3504 case AArch64::STRHHui:
3505 case AArch64::LDRSHXui:
3506 case AArch64::LDRSHWui:
3507 Scale = 2;
3508 return Opcode;
3509 case AArch64::LDRBui:
3510 case AArch64::LDRBBui:
3511 case AArch64::LDRSBXui:
3512 case AArch64::LDRSBWui:
3513 case AArch64::STRBui:
3514 case AArch64::STRBBui:
3515 Scale = 1;
3516 return Opcode;
3517 }
3518}
3519
3520// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3521// the opcode of an instruction performing the same operation, but using the
3522// [Reg, #Imm] addressing mode with unscaled offset.
3523unsigned unscaledOffsetOpcode(unsigned Opcode) {
3524 switch (Opcode) {
3525 default:
3526 llvm_unreachable("Address folding not implemented for instruction");
3527
3528 case AArch64::LDURQi:
3529 case AArch64::STURQi:
3530 case AArch64::LDURDi:
3531 case AArch64::STURDi:
3532 case AArch64::LDURXi:
3533 case AArch64::STURXi:
3534 case AArch64::LDURWi:
3535 case AArch64::LDURSWi:
3536 case AArch64::STURWi:
3537 case AArch64::LDURHi:
3538 case AArch64::STURHi:
3539 case AArch64::LDURHHi:
3540 case AArch64::STURHHi:
3541 case AArch64::LDURSHXi:
3542 case AArch64::LDURSHWi:
3543 case AArch64::LDURBi:
3544 case AArch64::STURBi:
3545 case AArch64::LDURBBi:
3546 case AArch64::STURBBi:
3547 case AArch64::LDURSBWi:
3548 case AArch64::LDURSBXi:
3549 return Opcode;
3550 case AArch64::LDRQui:
3551 return AArch64::LDURQi;
3552 case AArch64::STRQui:
3553 return AArch64::STURQi;
3554 case AArch64::LDRDui:
3555 return AArch64::LDURDi;
3556 case AArch64::STRDui:
3557 return AArch64::STURDi;
3558 case AArch64::LDRXui:
3559 return AArch64::LDURXi;
3560 case AArch64::STRXui:
3561 return AArch64::STURXi;
3562 case AArch64::LDRWui:
3563 return AArch64::LDURWi;
3564 case AArch64::LDRSWui:
3565 return AArch64::LDURSWi;
3566 case AArch64::STRWui:
3567 return AArch64::STURWi;
3568 case AArch64::LDRHui:
3569 return AArch64::LDURHi;
3570 case AArch64::STRHui:
3571 return AArch64::STURHi;
3572 case AArch64::LDRHHui:
3573 return AArch64::LDURHHi;
3574 case AArch64::STRHHui:
3575 return AArch64::STURHHi;
3576 case AArch64::LDRSHXui:
3577 return AArch64::LDURSHXi;
3578 case AArch64::LDRSHWui:
3579 return AArch64::LDURSHWi;
3580 case AArch64::LDRBBui:
3581 return AArch64::LDURBBi;
3582 case AArch64::LDRBui:
3583 return AArch64::LDURBi;
3584 case AArch64::STRBBui:
3585 return AArch64::STURBBi;
3586 case AArch64::STRBui:
3587 return AArch64::STURBi;
3588 case AArch64::LDRSBWui:
3589 return AArch64::LDURSBWi;
3590 case AArch64::LDRSBXui:
3591 return AArch64::LDURSBXi;
3592 }
3593}
3594
3595// Given the opcode of a memory load/store instruction, return the opcode of an
3596// instruction performing the same operation, but using
3597// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3598// offset register.
3599static unsigned offsetExtendOpcode(unsigned Opcode) {
3600 switch (Opcode) {
3601 default:
3602 llvm_unreachable("Address folding not implemented for instruction");
3603
3604 case AArch64::LDRQroX:
3605 case AArch64::LDURQi:
3606 case AArch64::LDRQui:
3607 return AArch64::LDRQroW;
3608 case AArch64::STRQroX:
3609 case AArch64::STURQi:
3610 case AArch64::STRQui:
3611 return AArch64::STRQroW;
3612 case AArch64::LDRDroX:
3613 case AArch64::LDURDi:
3614 case AArch64::LDRDui:
3615 return AArch64::LDRDroW;
3616 case AArch64::STRDroX:
3617 case AArch64::STURDi:
3618 case AArch64::STRDui:
3619 return AArch64::STRDroW;
3620 case AArch64::LDRXroX:
3621 case AArch64::LDURXi:
3622 case AArch64::LDRXui:
3623 return AArch64::LDRXroW;
3624 case AArch64::STRXroX:
3625 case AArch64::STURXi:
3626 case AArch64::STRXui:
3627 return AArch64::STRXroW;
3628 case AArch64::LDRWroX:
3629 case AArch64::LDURWi:
3630 case AArch64::LDRWui:
3631 return AArch64::LDRWroW;
3632 case AArch64::LDRSWroX:
3633 case AArch64::LDURSWi:
3634 case AArch64::LDRSWui:
3635 return AArch64::LDRSWroW;
3636 case AArch64::STRWroX:
3637 case AArch64::STURWi:
3638 case AArch64::STRWui:
3639 return AArch64::STRWroW;
3640 case AArch64::LDRHroX:
3641 case AArch64::LDURHi:
3642 case AArch64::LDRHui:
3643 return AArch64::LDRHroW;
3644 case AArch64::STRHroX:
3645 case AArch64::STURHi:
3646 case AArch64::STRHui:
3647 return AArch64::STRHroW;
3648 case AArch64::LDRHHroX:
3649 case AArch64::LDURHHi:
3650 case AArch64::LDRHHui:
3651 return AArch64::LDRHHroW;
3652 case AArch64::STRHHroX:
3653 case AArch64::STURHHi:
3654 case AArch64::STRHHui:
3655 return AArch64::STRHHroW;
3656 case AArch64::LDRSHXroX:
3657 case AArch64::LDURSHXi:
3658 case AArch64::LDRSHXui:
3659 return AArch64::LDRSHXroW;
3660 case AArch64::LDRSHWroX:
3661 case AArch64::LDURSHWi:
3662 case AArch64::LDRSHWui:
3663 return AArch64::LDRSHWroW;
3664 case AArch64::LDRBroX:
3665 case AArch64::LDURBi:
3666 case AArch64::LDRBui:
3667 return AArch64::LDRBroW;
3668 case AArch64::LDRBBroX:
3669 case AArch64::LDURBBi:
3670 case AArch64::LDRBBui:
3671 return AArch64::LDRBBroW;
3672 case AArch64::LDRSBXroX:
3673 case AArch64::LDURSBXi:
3674 case AArch64::LDRSBXui:
3675 return AArch64::LDRSBXroW;
3676 case AArch64::LDRSBWroX:
3677 case AArch64::LDURSBWi:
3678 case AArch64::LDRSBWui:
3679 return AArch64::LDRSBWroW;
3680 case AArch64::STRBroX:
3681 case AArch64::STURBi:
3682 case AArch64::STRBui:
3683 return AArch64::STRBroW;
3684 case AArch64::STRBBroX:
3685 case AArch64::STURBBi:
3686 case AArch64::STRBBui:
3687 return AArch64::STRBBroW;
3688 }
3689}
3690
3691MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
3692 const ExtAddrMode &AM) const {
3693
3694 const DebugLoc &DL = MemI.getDebugLoc();
3695 MachineBasicBlock &MBB = *MemI.getParent();
3696 MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
3697
3698 if (AM.Form == ExtAddrMode::Formula::Basic) {
3699 if (AM.ScaledReg) {
3700 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3701 unsigned Opcode = regOffsetOpcode(Opcode: MemI.getOpcode());
3702 MRI.constrainRegClass(Reg: AM.BaseReg, RC: &AArch64::GPR64spRegClass);
3703 auto B = BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode))
3704 .addReg(RegNo: MemI.getOperand(i: 0).getReg(),
3705 flags: MemI.mayLoad() ? RegState::Define : 0)
3706 .addReg(RegNo: AM.BaseReg)
3707 .addReg(RegNo: AM.ScaledReg)
3708 .addImm(Val: 0)
3709 .addImm(Val: AM.Scale > 1)
3710 .setMemRefs(MemI.memoperands())
3711 .setMIFlags(MemI.getFlags());
3712 return B.getInstr();
3713 }
3714
3715 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3716 "Addressing mode not supported for folding");
3717
3718 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3719 unsigned Scale = 1;
3720 unsigned Opcode = MemI.getOpcode();
3721 if (isInt<9>(x: AM.Displacement))
3722 Opcode = unscaledOffsetOpcode(Opcode);
3723 else
3724 Opcode = scaledOffsetOpcode(Opcode, Scale);
3725
3726 auto B = BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode))
3727 .addReg(RegNo: MemI.getOperand(i: 0).getReg(),
3728 flags: MemI.mayLoad() ? RegState::Define : 0)
3729 .addReg(RegNo: AM.BaseReg)
3730 .addImm(Val: AM.Displacement / Scale)
3731 .setMemRefs(MemI.memoperands())
3732 .setMIFlags(MemI.getFlags());
3733 return B.getInstr();
3734 }
3735
3736 if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
3737 AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
3738 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3739 assert(AM.ScaledReg && !AM.Displacement &&
3740 "Address offset can be a register or an immediate, but not both");
3741 unsigned Opcode = offsetExtendOpcode(Opcode: MemI.getOpcode());
3742 MRI.constrainRegClass(Reg: AM.BaseReg, RC: &AArch64::GPR64spRegClass);
3743 // Make sure the offset register is in the correct register class.
3744 Register OffsetReg = AM.ScaledReg;
3745 const TargetRegisterClass *RC = MRI.getRegClass(Reg: OffsetReg);
3746 if (RC->hasSuperClassEq(RC: &AArch64::GPR64RegClass)) {
3747 OffsetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3748 BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode: TargetOpcode::COPY), DestReg: OffsetReg)
3749 .addReg(RegNo: AM.ScaledReg, flags: 0, SubReg: AArch64::sub_32);
3750 }
3751 auto B = BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode))
3752 .addReg(RegNo: MemI.getOperand(i: 0).getReg(),
3753 flags: MemI.mayLoad() ? RegState::Define : 0)
3754 .addReg(RegNo: AM.BaseReg)
3755 .addReg(RegNo: OffsetReg)
3756 .addImm(Val: AM.Form == ExtAddrMode::Formula::SExtScaledReg)
3757 .addImm(Val: AM.Scale != 1)
3758 .setMemRefs(MemI.memoperands())
3759 .setMIFlags(MemI.getFlags());
3760
3761 return B.getInstr();
3762 }
3763
3764 llvm_unreachable(
3765 "Function must not be called with an addressing mode it can't handle");
3766}
3767
3768/// Return true if the opcode is a post-index ld/st instruction, which really
3769/// loads from base+0.
3770static bool isPostIndexLdStOpcode(unsigned Opcode) {
3771 switch (Opcode) {
3772 default:
3773 return false;
3774 case AArch64::LD1Fourv16b_POST:
3775 case AArch64::LD1Fourv1d_POST:
3776 case AArch64::LD1Fourv2d_POST:
3777 case AArch64::LD1Fourv2s_POST:
3778 case AArch64::LD1Fourv4h_POST:
3779 case AArch64::LD1Fourv4s_POST:
3780 case AArch64::LD1Fourv8b_POST:
3781 case AArch64::LD1Fourv8h_POST:
3782 case AArch64::LD1Onev16b_POST:
3783 case AArch64::LD1Onev1d_POST:
3784 case AArch64::LD1Onev2d_POST:
3785 case AArch64::LD1Onev2s_POST:
3786 case AArch64::LD1Onev4h_POST:
3787 case AArch64::LD1Onev4s_POST:
3788 case AArch64::LD1Onev8b_POST:
3789 case AArch64::LD1Onev8h_POST:
3790 case AArch64::LD1Rv16b_POST:
3791 case AArch64::LD1Rv1d_POST:
3792 case AArch64::LD1Rv2d_POST:
3793 case AArch64::LD1Rv2s_POST:
3794 case AArch64::LD1Rv4h_POST:
3795 case AArch64::LD1Rv4s_POST:
3796 case AArch64::LD1Rv8b_POST:
3797 case AArch64::LD1Rv8h_POST:
3798 case AArch64::LD1Threev16b_POST:
3799 case AArch64::LD1Threev1d_POST:
3800 case AArch64::LD1Threev2d_POST:
3801 case AArch64::LD1Threev2s_POST:
3802 case AArch64::LD1Threev4h_POST:
3803 case AArch64::LD1Threev4s_POST:
3804 case AArch64::LD1Threev8b_POST:
3805 case AArch64::LD1Threev8h_POST:
3806 case AArch64::LD1Twov16b_POST:
3807 case AArch64::LD1Twov1d_POST:
3808 case AArch64::LD1Twov2d_POST:
3809 case AArch64::LD1Twov2s_POST:
3810 case AArch64::LD1Twov4h_POST:
3811 case AArch64::LD1Twov4s_POST:
3812 case AArch64::LD1Twov8b_POST:
3813 case AArch64::LD1Twov8h_POST:
3814 case AArch64::LD1i16_POST:
3815 case AArch64::LD1i32_POST:
3816 case AArch64::LD1i64_POST:
3817 case AArch64::LD1i8_POST:
3818 case AArch64::LD2Rv16b_POST:
3819 case AArch64::LD2Rv1d_POST:
3820 case AArch64::LD2Rv2d_POST:
3821 case AArch64::LD2Rv2s_POST:
3822 case AArch64::LD2Rv4h_POST:
3823 case AArch64::LD2Rv4s_POST:
3824 case AArch64::LD2Rv8b_POST:
3825 case AArch64::LD2Rv8h_POST:
3826 case AArch64::LD2Twov16b_POST:
3827 case AArch64::LD2Twov2d_POST:
3828 case AArch64::LD2Twov2s_POST:
3829 case AArch64::LD2Twov4h_POST:
3830 case AArch64::LD2Twov4s_POST:
3831 case AArch64::LD2Twov8b_POST:
3832 case AArch64::LD2Twov8h_POST:
3833 case AArch64::LD2i16_POST:
3834 case AArch64::LD2i32_POST:
3835 case AArch64::LD2i64_POST:
3836 case AArch64::LD2i8_POST:
3837 case AArch64::LD3Rv16b_POST:
3838 case AArch64::LD3Rv1d_POST:
3839 case AArch64::LD3Rv2d_POST:
3840 case AArch64::LD3Rv2s_POST:
3841 case AArch64::LD3Rv4h_POST:
3842 case AArch64::LD3Rv4s_POST:
3843 case AArch64::LD3Rv8b_POST:
3844 case AArch64::LD3Rv8h_POST:
3845 case AArch64::LD3Threev16b_POST:
3846 case AArch64::LD3Threev2d_POST:
3847 case AArch64::LD3Threev2s_POST:
3848 case AArch64::LD3Threev4h_POST:
3849 case AArch64::LD3Threev4s_POST:
3850 case AArch64::LD3Threev8b_POST:
3851 case AArch64::LD3Threev8h_POST:
3852 case AArch64::LD3i16_POST:
3853 case AArch64::LD3i32_POST:
3854 case AArch64::LD3i64_POST:
3855 case AArch64::LD3i8_POST:
3856 case AArch64::LD4Fourv16b_POST:
3857 case AArch64::LD4Fourv2d_POST:
3858 case AArch64::LD4Fourv2s_POST:
3859 case AArch64::LD4Fourv4h_POST:
3860 case AArch64::LD4Fourv4s_POST:
3861 case AArch64::LD4Fourv8b_POST:
3862 case AArch64::LD4Fourv8h_POST:
3863 case AArch64::LD4Rv16b_POST:
3864 case AArch64::LD4Rv1d_POST:
3865 case AArch64::LD4Rv2d_POST:
3866 case AArch64::LD4Rv2s_POST:
3867 case AArch64::LD4Rv4h_POST:
3868 case AArch64::LD4Rv4s_POST:
3869 case AArch64::LD4Rv8b_POST:
3870 case AArch64::LD4Rv8h_POST:
3871 case AArch64::LD4i16_POST:
3872 case AArch64::LD4i32_POST:
3873 case AArch64::LD4i64_POST:
3874 case AArch64::LD4i8_POST:
3875 case AArch64::LDAPRWpost:
3876 case AArch64::LDAPRXpost:
3877 case AArch64::LDIAPPWpost:
3878 case AArch64::LDIAPPXpost:
3879 case AArch64::LDPDpost:
3880 case AArch64::LDPQpost:
3881 case AArch64::LDPSWpost:
3882 case AArch64::LDPSpost:
3883 case AArch64::LDPWpost:
3884 case AArch64::LDPXpost:
3885 case AArch64::LDRBBpost:
3886 case AArch64::LDRBpost:
3887 case AArch64::LDRDpost:
3888 case AArch64::LDRHHpost:
3889 case AArch64::LDRHpost:
3890 case AArch64::LDRQpost:
3891 case AArch64::LDRSBWpost:
3892 case AArch64::LDRSBXpost:
3893 case AArch64::LDRSHWpost:
3894 case AArch64::LDRSHXpost:
3895 case AArch64::LDRSWpost:
3896 case AArch64::LDRSpost:
3897 case AArch64::LDRWpost:
3898 case AArch64::LDRXpost:
3899 case AArch64::ST1Fourv16b_POST:
3900 case AArch64::ST1Fourv1d_POST:
3901 case AArch64::ST1Fourv2d_POST:
3902 case AArch64::ST1Fourv2s_POST:
3903 case AArch64::ST1Fourv4h_POST:
3904 case AArch64::ST1Fourv4s_POST:
3905 case AArch64::ST1Fourv8b_POST:
3906 case AArch64::ST1Fourv8h_POST:
3907 case AArch64::ST1Onev16b_POST:
3908 case AArch64::ST1Onev1d_POST:
3909 case AArch64::ST1Onev2d_POST:
3910 case AArch64::ST1Onev2s_POST:
3911 case AArch64::ST1Onev4h_POST:
3912 case AArch64::ST1Onev4s_POST:
3913 case AArch64::ST1Onev8b_POST:
3914 case AArch64::ST1Onev8h_POST:
3915 case AArch64::ST1Threev16b_POST:
3916 case AArch64::ST1Threev1d_POST:
3917 case AArch64::ST1Threev2d_POST:
3918 case AArch64::ST1Threev2s_POST:
3919 case AArch64::ST1Threev4h_POST:
3920 case AArch64::ST1Threev4s_POST:
3921 case AArch64::ST1Threev8b_POST:
3922 case AArch64::ST1Threev8h_POST:
3923 case AArch64::ST1Twov16b_POST:
3924 case AArch64::ST1Twov1d_POST:
3925 case AArch64::ST1Twov2d_POST:
3926 case AArch64::ST1Twov2s_POST:
3927 case AArch64::ST1Twov4h_POST:
3928 case AArch64::ST1Twov4s_POST:
3929 case AArch64::ST1Twov8b_POST:
3930 case AArch64::ST1Twov8h_POST:
3931 case AArch64::ST1i16_POST:
3932 case AArch64::ST1i32_POST:
3933 case AArch64::ST1i64_POST:
3934 case AArch64::ST1i8_POST:
3935 case AArch64::ST2GPostIndex:
3936 case AArch64::ST2Twov16b_POST:
3937 case AArch64::ST2Twov2d_POST:
3938 case AArch64::ST2Twov2s_POST:
3939 case AArch64::ST2Twov4h_POST:
3940 case AArch64::ST2Twov4s_POST:
3941 case AArch64::ST2Twov8b_POST:
3942 case AArch64::ST2Twov8h_POST:
3943 case AArch64::ST2i16_POST:
3944 case AArch64::ST2i32_POST:
3945 case AArch64::ST2i64_POST:
3946 case AArch64::ST2i8_POST:
3947 case AArch64::ST3Threev16b_POST:
3948 case AArch64::ST3Threev2d_POST:
3949 case AArch64::ST3Threev2s_POST:
3950 case AArch64::ST3Threev4h_POST:
3951 case AArch64::ST3Threev4s_POST:
3952 case AArch64::ST3Threev8b_POST:
3953 case AArch64::ST3Threev8h_POST:
3954 case AArch64::ST3i16_POST:
3955 case AArch64::ST3i32_POST:
3956 case AArch64::ST3i64_POST:
3957 case AArch64::ST3i8_POST:
3958 case AArch64::ST4Fourv16b_POST:
3959 case AArch64::ST4Fourv2d_POST:
3960 case AArch64::ST4Fourv2s_POST:
3961 case AArch64::ST4Fourv4h_POST:
3962 case AArch64::ST4Fourv4s_POST:
3963 case AArch64::ST4Fourv8b_POST:
3964 case AArch64::ST4Fourv8h_POST:
3965 case AArch64::ST4i16_POST:
3966 case AArch64::ST4i32_POST:
3967 case AArch64::ST4i64_POST:
3968 case AArch64::ST4i8_POST:
3969 case AArch64::STGPostIndex:
3970 case AArch64::STGPpost:
3971 case AArch64::STPDpost:
3972 case AArch64::STPQpost:
3973 case AArch64::STPSpost:
3974 case AArch64::STPWpost:
3975 case AArch64::STPXpost:
3976 case AArch64::STRBBpost:
3977 case AArch64::STRBpost:
3978 case AArch64::STRDpost:
3979 case AArch64::STRHHpost:
3980 case AArch64::STRHpost:
3981 case AArch64::STRQpost:
3982 case AArch64::STRSpost:
3983 case AArch64::STRWpost:
3984 case AArch64::STRXpost:
3985 case AArch64::STZ2GPostIndex:
3986 case AArch64::STZGPostIndex:
3987 return true;
3988 }
3989}
3990
3991bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
3992 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3993 bool &OffsetIsScalable, TypeSize &Width,
3994 const TargetRegisterInfo *TRI) const {
3995 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3996 // Handle only loads/stores with base register followed by immediate offset.
3997 if (LdSt.getNumExplicitOperands() == 3) {
3998 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3999 if ((!LdSt.getOperand(i: 1).isReg() && !LdSt.getOperand(i: 1).isFI()) ||
4000 !LdSt.getOperand(i: 2).isImm())
4001 return false;
4002 } else if (LdSt.getNumExplicitOperands() == 4) {
4003 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4004 if (!LdSt.getOperand(i: 1).isReg() ||
4005 (!LdSt.getOperand(i: 2).isReg() && !LdSt.getOperand(i: 2).isFI()) ||
4006 !LdSt.getOperand(i: 3).isImm())
4007 return false;
4008 } else
4009 return false;
4010
4011 // Get the scaling factor for the instruction and set the width for the
4012 // instruction.
4013 TypeSize Scale(0U, false);
4014 int64_t Dummy1, Dummy2;
4015
4016 // If this returns false, then it's an instruction we don't want to handle.
4017 if (!getMemOpInfo(Opcode: LdSt.getOpcode(), Scale, Width, MinOffset&: Dummy1, MaxOffset&: Dummy2))
4018 return false;
4019
4020 // Compute the offset. Offset is calculated as the immediate operand
4021 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4022 // set to 1. Postindex are a special case which have an offset of 0.
4023 if (isPostIndexLdStOpcode(Opcode: LdSt.getOpcode())) {
4024 BaseOp = &LdSt.getOperand(i: 2);
4025 Offset = 0;
4026 } else if (LdSt.getNumExplicitOperands() == 3) {
4027 BaseOp = &LdSt.getOperand(i: 1);
4028 Offset = LdSt.getOperand(i: 2).getImm() * Scale.getKnownMinValue();
4029 } else {
4030 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4031 BaseOp = &LdSt.getOperand(i: 2);
4032 Offset = LdSt.getOperand(i: 3).getImm() * Scale.getKnownMinValue();
4033 }
4034 OffsetIsScalable = Scale.isScalable();
4035
4036 return BaseOp->isReg() || BaseOp->isFI();
4037}
4038
4039MachineOperand &
4040AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
4041 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4042 MachineOperand &OfsOp = LdSt.getOperand(i: LdSt.getNumExplicitOperands() - 1);
4043 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4044 return OfsOp;
4045}
4046
4047bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4048 TypeSize &Width, int64_t &MinOffset,
4049 int64_t &MaxOffset) {
4050 switch (Opcode) {
4051 // Not a memory operation or something we want to handle.
4052 default:
4053 Scale = TypeSize::getFixed(ExactSize: 0);
4054 Width = TypeSize::getFixed(ExactSize: 0);
4055 MinOffset = MaxOffset = 0;
4056 return false;
4057 // LDR / STR
4058 case AArch64::LDRQui:
4059 case AArch64::STRQui:
4060 Scale = TypeSize::getFixed(ExactSize: 16);
4061 Width = TypeSize::getFixed(ExactSize: 16);
4062 MinOffset = 0;
4063 MaxOffset = 4095;
4064 break;
4065 case AArch64::LDRXui:
4066 case AArch64::LDRDui:
4067 case AArch64::STRXui:
4068 case AArch64::STRDui:
4069 case AArch64::PRFMui:
4070 Scale = TypeSize::getFixed(ExactSize: 8);
4071 Width = TypeSize::getFixed(ExactSize: 8);
4072 MinOffset = 0;
4073 MaxOffset = 4095;
4074 break;
4075 case AArch64::LDRWui:
4076 case AArch64::LDRSui:
4077 case AArch64::LDRSWui:
4078 case AArch64::STRWui:
4079 case AArch64::STRSui:
4080 Scale = TypeSize::getFixed(ExactSize: 4);
4081 Width = TypeSize::getFixed(ExactSize: 4);
4082 MinOffset = 0;
4083 MaxOffset = 4095;
4084 break;
4085 case AArch64::LDRHui:
4086 case AArch64::LDRHHui:
4087 case AArch64::LDRSHWui:
4088 case AArch64::LDRSHXui:
4089 case AArch64::STRHui:
4090 case AArch64::STRHHui:
4091 Scale = TypeSize::getFixed(ExactSize: 2);
4092 Width = TypeSize::getFixed(ExactSize: 2);
4093 MinOffset = 0;
4094 MaxOffset = 4095;
4095 break;
4096 case AArch64::LDRBui:
4097 case AArch64::LDRBBui:
4098 case AArch64::LDRSBWui:
4099 case AArch64::LDRSBXui:
4100 case AArch64::STRBui:
4101 case AArch64::STRBBui:
4102 Scale = TypeSize::getFixed(ExactSize: 1);
4103 Width = TypeSize::getFixed(ExactSize: 1);
4104 MinOffset = 0;
4105 MaxOffset = 4095;
4106 break;
4107 // post/pre inc
4108 case AArch64::STRQpre:
4109 case AArch64::LDRQpost:
4110 Scale = TypeSize::getFixed(ExactSize: 1);
4111 Width = TypeSize::getFixed(ExactSize: 16);
4112 MinOffset = -256;
4113 MaxOffset = 255;
4114 break;
4115 case AArch64::LDRDpost:
4116 case AArch64::LDRDpre:
4117 case AArch64::LDRXpost:
4118 case AArch64::LDRXpre:
4119 case AArch64::STRDpost:
4120 case AArch64::STRDpre:
4121 case AArch64::STRXpost:
4122 case AArch64::STRXpre:
4123 Scale = TypeSize::getFixed(ExactSize: 1);
4124 Width = TypeSize::getFixed(ExactSize: 8);
4125 MinOffset = -256;
4126 MaxOffset = 255;
4127 break;
4128 case AArch64::STRWpost:
4129 case AArch64::STRWpre:
4130 case AArch64::LDRWpost:
4131 case AArch64::LDRWpre:
4132 case AArch64::STRSpost:
4133 case AArch64::STRSpre:
4134 case AArch64::LDRSpost:
4135 case AArch64::LDRSpre:
4136 Scale = TypeSize::getFixed(ExactSize: 1);
4137 Width = TypeSize::getFixed(ExactSize: 4);
4138 MinOffset = -256;
4139 MaxOffset = 255;
4140 break;
4141 case AArch64::LDRHpost:
4142 case AArch64::LDRHpre:
4143 case AArch64::STRHpost:
4144 case AArch64::STRHpre:
4145 case AArch64::LDRHHpost:
4146 case AArch64::LDRHHpre:
4147 case AArch64::STRHHpost:
4148 case AArch64::STRHHpre:
4149 Scale = TypeSize::getFixed(ExactSize: 1);
4150 Width = TypeSize::getFixed(ExactSize: 2);
4151 MinOffset = -256;
4152 MaxOffset = 255;
4153 break;
4154 case AArch64::LDRBpost:
4155 case AArch64::LDRBpre:
4156 case AArch64::STRBpost:
4157 case AArch64::STRBpre:
4158 case AArch64::LDRBBpost:
4159 case AArch64::LDRBBpre:
4160 case AArch64::STRBBpost:
4161 case AArch64::STRBBpre:
4162 Scale = TypeSize::getFixed(ExactSize: 1);
4163 Width = TypeSize::getFixed(ExactSize: 1);
4164 MinOffset = -256;
4165 MaxOffset = 255;
4166 break;
4167 // Unscaled
4168 case AArch64::LDURQi:
4169 case AArch64::STURQi:
4170 Scale = TypeSize::getFixed(ExactSize: 1);
4171 Width = TypeSize::getFixed(ExactSize: 16);
4172 MinOffset = -256;
4173 MaxOffset = 255;
4174 break;
4175 case AArch64::LDURXi:
4176 case AArch64::LDURDi:
4177 case AArch64::LDAPURXi:
4178 case AArch64::STURXi:
4179 case AArch64::STURDi:
4180 case AArch64::STLURXi:
4181 case AArch64::PRFUMi:
4182 Scale = TypeSize::getFixed(ExactSize: 1);
4183 Width = TypeSize::getFixed(ExactSize: 8);
4184 MinOffset = -256;
4185 MaxOffset = 255;
4186 break;
4187 case AArch64::LDURWi:
4188 case AArch64::LDURSi:
4189 case AArch64::LDURSWi:
4190 case AArch64::LDAPURi:
4191 case AArch64::LDAPURSWi:
4192 case AArch64::STURWi:
4193 case AArch64::STURSi:
4194 case AArch64::STLURWi:
4195 Scale = TypeSize::getFixed(ExactSize: 1);
4196 Width = TypeSize::getFixed(ExactSize: 4);
4197 MinOffset = -256;
4198 MaxOffset = 255;
4199 break;
4200 case AArch64::LDURHi:
4201 case AArch64::LDURHHi:
4202 case AArch64::LDURSHXi:
4203 case AArch64::LDURSHWi:
4204 case AArch64::LDAPURHi:
4205 case AArch64::LDAPURSHWi:
4206 case AArch64::LDAPURSHXi:
4207 case AArch64::STURHi:
4208 case AArch64::STURHHi:
4209 case AArch64::STLURHi:
4210 Scale = TypeSize::getFixed(ExactSize: 1);
4211 Width = TypeSize::getFixed(ExactSize: 2);
4212 MinOffset = -256;
4213 MaxOffset = 255;
4214 break;
4215 case AArch64::LDURBi:
4216 case AArch64::LDURBBi:
4217 case AArch64::LDURSBXi:
4218 case AArch64::LDURSBWi:
4219 case AArch64::LDAPURBi:
4220 case AArch64::LDAPURSBWi:
4221 case AArch64::LDAPURSBXi:
4222 case AArch64::STURBi:
4223 case AArch64::STURBBi:
4224 case AArch64::STLURBi:
4225 Scale = TypeSize::getFixed(ExactSize: 1);
4226 Width = TypeSize::getFixed(ExactSize: 1);
4227 MinOffset = -256;
4228 MaxOffset = 255;
4229 break;
4230 // LDP / STP (including pre/post inc)
4231 case AArch64::LDPQi:
4232 case AArch64::LDNPQi:
4233 case AArch64::STPQi:
4234 case AArch64::STNPQi:
4235 case AArch64::LDPQpost:
4236 case AArch64::LDPQpre:
4237 case AArch64::STPQpost:
4238 case AArch64::STPQpre:
4239 Scale = TypeSize::getFixed(ExactSize: 16);
4240 Width = TypeSize::getFixed(ExactSize: 16 * 2);
4241 MinOffset = -64;
4242 MaxOffset = 63;
4243 break;
4244 case AArch64::LDPXi:
4245 case AArch64::LDPDi:
4246 case AArch64::LDNPXi:
4247 case AArch64::LDNPDi:
4248 case AArch64::STPXi:
4249 case AArch64::STPDi:
4250 case AArch64::STNPXi:
4251 case AArch64::STNPDi:
4252 case AArch64::LDPDpost:
4253 case AArch64::LDPDpre:
4254 case AArch64::LDPXpost:
4255 case AArch64::LDPXpre:
4256 case AArch64::STPDpost:
4257 case AArch64::STPDpre:
4258 case AArch64::STPXpost:
4259 case AArch64::STPXpre:
4260 Scale = TypeSize::getFixed(ExactSize: 8);
4261 Width = TypeSize::getFixed(ExactSize: 8 * 2);
4262 MinOffset = -64;
4263 MaxOffset = 63;
4264 break;
4265 case AArch64::LDPWi:
4266 case AArch64::LDPSi:
4267 case AArch64::LDNPWi:
4268 case AArch64::LDNPSi:
4269 case AArch64::STPWi:
4270 case AArch64::STPSi:
4271 case AArch64::STNPWi:
4272 case AArch64::STNPSi:
4273 case AArch64::LDPSpost:
4274 case AArch64::LDPSpre:
4275 case AArch64::LDPWpost:
4276 case AArch64::LDPWpre:
4277 case AArch64::STPSpost:
4278 case AArch64::STPSpre:
4279 case AArch64::STPWpost:
4280 case AArch64::STPWpre:
4281 Scale = TypeSize::getFixed(ExactSize: 4);
4282 Width = TypeSize::getFixed(ExactSize: 4 * 2);
4283 MinOffset = -64;
4284 MaxOffset = 63;
4285 break;
4286 case AArch64::StoreSwiftAsyncContext:
4287 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4288 Scale = TypeSize::getFixed(ExactSize: 1);
4289 Width = TypeSize::getFixed(ExactSize: 8);
4290 MinOffset = 0;
4291 MaxOffset = 4095;
4292 break;
4293 case AArch64::ADDG:
4294 Scale = TypeSize::getFixed(ExactSize: 16);
4295 Width = TypeSize::getFixed(ExactSize: 0);
4296 MinOffset = 0;
4297 MaxOffset = 63;
4298 break;
4299 case AArch64::TAGPstack:
4300 Scale = TypeSize::getFixed(ExactSize: 16);
4301 Width = TypeSize::getFixed(ExactSize: 0);
4302 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4303 // of 63 (not 64!).
4304 MinOffset = -63;
4305 MaxOffset = 63;
4306 break;
4307 case AArch64::LDG:
4308 case AArch64::STGi:
4309 case AArch64::STGPreIndex:
4310 case AArch64::STGPostIndex:
4311 case AArch64::STZGi:
4312 case AArch64::STZGPreIndex:
4313 case AArch64::STZGPostIndex:
4314 Scale = TypeSize::getFixed(ExactSize: 16);
4315 Width = TypeSize::getFixed(ExactSize: 16);
4316 MinOffset = -256;
4317 MaxOffset = 255;
4318 break;
4319 // SVE
4320 case AArch64::STR_ZZZZXI:
4321 case AArch64::LDR_ZZZZXI:
4322 Scale = TypeSize::getScalable(MinimumSize: 16);
4323 Width = TypeSize::getScalable(MinimumSize: 16 * 4);
4324 MinOffset = -256;
4325 MaxOffset = 252;
4326 break;
4327 case AArch64::STR_ZZZXI:
4328 case AArch64::LDR_ZZZXI:
4329 Scale = TypeSize::getScalable(MinimumSize: 16);
4330 Width = TypeSize::getScalable(MinimumSize: 16 * 3);
4331 MinOffset = -256;
4332 MaxOffset = 253;
4333 break;
4334 case AArch64::STR_ZZXI:
4335 case AArch64::LDR_ZZXI:
4336 Scale = TypeSize::getScalable(MinimumSize: 16);
4337 Width = TypeSize::getScalable(MinimumSize: 16 * 2);
4338 MinOffset = -256;
4339 MaxOffset = 254;
4340 break;
4341 case AArch64::LDR_PXI:
4342 case AArch64::STR_PXI:
4343 Scale = TypeSize::getScalable(MinimumSize: 2);
4344 Width = TypeSize::getScalable(MinimumSize: 2);
4345 MinOffset = -256;
4346 MaxOffset = 255;
4347 break;
4348 case AArch64::LDR_PPXI:
4349 case AArch64::STR_PPXI:
4350 Scale = TypeSize::getScalable(MinimumSize: 2);
4351 Width = TypeSize::getScalable(MinimumSize: 2 * 2);
4352 MinOffset = -256;
4353 MaxOffset = 254;
4354 break;
4355 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4356 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4357 case AArch64::LDR_ZXI:
4358 case AArch64::STR_ZXI:
4359 Scale = TypeSize::getScalable(MinimumSize: 16);
4360 Width = TypeSize::getScalable(MinimumSize: 16);
4361 MinOffset = -256;
4362 MaxOffset = 255;
4363 break;
4364 case AArch64::LD1B_IMM:
4365 case AArch64::LD1H_IMM:
4366 case AArch64::LD1W_IMM:
4367 case AArch64::LD1D_IMM:
4368 case AArch64::LDNT1B_ZRI:
4369 case AArch64::LDNT1H_ZRI:
4370 case AArch64::LDNT1W_ZRI:
4371 case AArch64::LDNT1D_ZRI:
4372 case AArch64::ST1B_IMM:
4373 case AArch64::ST1H_IMM:
4374 case AArch64::ST1W_IMM:
4375 case AArch64::ST1D_IMM:
4376 case AArch64::STNT1B_ZRI:
4377 case AArch64::STNT1H_ZRI:
4378 case AArch64::STNT1W_ZRI:
4379 case AArch64::STNT1D_ZRI:
4380 case AArch64::LDNF1B_IMM:
4381 case AArch64::LDNF1H_IMM:
4382 case AArch64::LDNF1W_IMM:
4383 case AArch64::LDNF1D_IMM:
4384 // A full vectors worth of data
4385 // Width = mbytes * elements
4386 Scale = TypeSize::getScalable(MinimumSize: 16);
4387 Width = TypeSize::getScalable(MinimumSize: 16);
4388 MinOffset = -8;
4389 MaxOffset = 7;
4390 break;
4391 case AArch64::LD2B_IMM:
4392 case AArch64::LD2H_IMM:
4393 case AArch64::LD2W_IMM:
4394 case AArch64::LD2D_IMM:
4395 case AArch64::ST2B_IMM:
4396 case AArch64::ST2H_IMM:
4397 case AArch64::ST2W_IMM:
4398 case AArch64::ST2D_IMM:
4399 Scale = TypeSize::getScalable(MinimumSize: 32);
4400 Width = TypeSize::getScalable(MinimumSize: 16 * 2);
4401 MinOffset = -8;
4402 MaxOffset = 7;
4403 break;
4404 case AArch64::LD3B_IMM:
4405 case AArch64::LD3H_IMM:
4406 case AArch64::LD3W_IMM:
4407 case AArch64::LD3D_IMM:
4408 case AArch64::ST3B_IMM:
4409 case AArch64::ST3H_IMM:
4410 case AArch64::ST3W_IMM:
4411 case AArch64::ST3D_IMM:
4412 Scale = TypeSize::getScalable(MinimumSize: 48);
4413 Width = TypeSize::getScalable(MinimumSize: 16 * 3);
4414 MinOffset = -8;
4415 MaxOffset = 7;
4416 break;
4417 case AArch64::LD4B_IMM:
4418 case AArch64::LD4H_IMM:
4419 case AArch64::LD4W_IMM:
4420 case AArch64::LD4D_IMM:
4421 case AArch64::ST4B_IMM:
4422 case AArch64::ST4H_IMM:
4423 case AArch64::ST4W_IMM:
4424 case AArch64::ST4D_IMM:
4425 Scale = TypeSize::getScalable(MinimumSize: 64);
4426 Width = TypeSize::getScalable(MinimumSize: 16 * 4);
4427 MinOffset = -8;
4428 MaxOffset = 7;
4429 break;
4430 case AArch64::LD1B_H_IMM:
4431 case AArch64::LD1SB_H_IMM:
4432 case AArch64::LD1H_S_IMM:
4433 case AArch64::LD1SH_S_IMM:
4434 case AArch64::LD1W_D_IMM:
4435 case AArch64::LD1SW_D_IMM:
4436 case AArch64::ST1B_H_IMM:
4437 case AArch64::ST1H_S_IMM:
4438 case AArch64::ST1W_D_IMM:
4439 case AArch64::LDNF1B_H_IMM:
4440 case AArch64::LDNF1SB_H_IMM:
4441 case AArch64::LDNF1H_S_IMM:
4442 case AArch64::LDNF1SH_S_IMM:
4443 case AArch64::LDNF1W_D_IMM:
4444 case AArch64::LDNF1SW_D_IMM:
4445 // A half vector worth of data
4446 // Width = mbytes * elements
4447 Scale = TypeSize::getScalable(MinimumSize: 8);
4448 Width = TypeSize::getScalable(MinimumSize: 8);
4449 MinOffset = -8;
4450 MaxOffset = 7;
4451 break;
4452 case AArch64::LD1B_S_IMM:
4453 case AArch64::LD1SB_S_IMM:
4454 case AArch64::LD1H_D_IMM:
4455 case AArch64::LD1SH_D_IMM:
4456 case AArch64::ST1B_S_IMM:
4457 case AArch64::ST1H_D_IMM:
4458 case AArch64::LDNF1B_S_IMM:
4459 case AArch64::LDNF1SB_S_IMM:
4460 case AArch64::LDNF1H_D_IMM:
4461 case AArch64::LDNF1SH_D_IMM:
4462 // A quarter vector worth of data
4463 // Width = mbytes * elements
4464 Scale = TypeSize::getScalable(MinimumSize: 4);
4465 Width = TypeSize::getScalable(MinimumSize: 4);
4466 MinOffset = -8;
4467 MaxOffset = 7;
4468 break;
4469 case AArch64::LD1B_D_IMM:
4470 case AArch64::LD1SB_D_IMM:
4471 case AArch64::ST1B_D_IMM:
4472 case AArch64::LDNF1B_D_IMM:
4473 case AArch64::LDNF1SB_D_IMM:
4474 // A eighth vector worth of data
4475 // Width = mbytes * elements
4476 Scale = TypeSize::getScalable(MinimumSize: 2);
4477 Width = TypeSize::getScalable(MinimumSize: 2);
4478 MinOffset = -8;
4479 MaxOffset = 7;
4480 break;
4481 case AArch64::ST2Gi:
4482 case AArch64::ST2GPreIndex:
4483 case AArch64::ST2GPostIndex:
4484 case AArch64::STZ2Gi:
4485 case AArch64::STZ2GPreIndex:
4486 case AArch64::STZ2GPostIndex:
4487 Scale = TypeSize::getFixed(ExactSize: 16);
4488 Width = TypeSize::getFixed(ExactSize: 32);
4489 MinOffset = -256;
4490 MaxOffset = 255;
4491 break;
4492 case AArch64::STGPi:
4493 case AArch64::STGPpost:
4494 case AArch64::STGPpre:
4495 Scale = TypeSize::getFixed(ExactSize: 16);
4496 Width = TypeSize::getFixed(ExactSize: 16);
4497 MinOffset = -64;
4498 MaxOffset = 63;
4499 break;
4500 case AArch64::LD1RB_IMM:
4501 case AArch64::LD1RB_H_IMM:
4502 case AArch64::LD1RB_S_IMM:
4503 case AArch64::LD1RB_D_IMM:
4504 case AArch64::LD1RSB_H_IMM:
4505 case AArch64::LD1RSB_S_IMM:
4506 case AArch64::LD1RSB_D_IMM:
4507 Scale = TypeSize::getFixed(ExactSize: 1);
4508 Width = TypeSize::getFixed(ExactSize: 1);
4509 MinOffset = 0;
4510 MaxOffset = 63;
4511 break;
4512 case AArch64::LD1RH_IMM:
4513 case AArch64::LD1RH_S_IMM:
4514 case AArch64::LD1RH_D_IMM:
4515 case AArch64::LD1RSH_S_IMM:
4516 case AArch64::LD1RSH_D_IMM:
4517 Scale = TypeSize::getFixed(ExactSize: 2);
4518 Width = TypeSize::getFixed(ExactSize: 2);
4519 MinOffset = 0;
4520 MaxOffset = 63;
4521 break;
4522 case AArch64::LD1RW_IMM:
4523 case AArch64::LD1RW_D_IMM:
4524 case AArch64::LD1RSW_IMM:
4525 Scale = TypeSize::getFixed(ExactSize: 4);
4526 Width = TypeSize::getFixed(ExactSize: 4);
4527 MinOffset = 0;
4528 MaxOffset = 63;
4529 break;
4530 case AArch64::LD1RD_IMM:
4531 Scale = TypeSize::getFixed(ExactSize: 8);
4532 Width = TypeSize::getFixed(ExactSize: 8);
4533 MinOffset = 0;
4534 MaxOffset = 63;
4535 break;
4536 }
4537
4538 return true;
4539}
4540
4541// Scaling factor for unscaled load or store.
4542int AArch64InstrInfo::getMemScale(unsigned Opc) {
4543 switch (Opc) {
4544 default:
4545 llvm_unreachable("Opcode has unknown scale!");
4546 case AArch64::LDRBBui:
4547 case AArch64::LDURBBi:
4548 case AArch64::LDRSBWui:
4549 case AArch64::LDURSBWi:
4550 case AArch64::STRBBui:
4551 case AArch64::STURBBi:
4552 return 1;
4553 case AArch64::LDRHHui:
4554 case AArch64::LDURHHi:
4555 case AArch64::LDRSHWui:
4556 case AArch64::LDURSHWi:
4557 case AArch64::STRHHui:
4558 case AArch64::STURHHi:
4559 return 2;
4560 case AArch64::LDRSui:
4561 case AArch64::LDURSi:
4562 case AArch64::LDRSpre:
4563 case AArch64::LDRSWui:
4564 case AArch64::LDURSWi:
4565 case AArch64::LDRSWpre:
4566 case AArch64::LDRWpre:
4567 case AArch64::LDRWui:
4568 case AArch64::LDURWi:
4569 case AArch64::STRSui:
4570 case AArch64::STURSi:
4571 case AArch64::STRSpre:
4572 case AArch64::STRWui:
4573 case AArch64::STURWi:
4574 case AArch64::STRWpre:
4575 case AArch64::LDPSi:
4576 case AArch64::LDPSWi:
4577 case AArch64::LDPWi:
4578 case AArch64::STPSi:
4579 case AArch64::STPWi:
4580 return 4;
4581 case AArch64::LDRDui:
4582 case AArch64::LDURDi:
4583 case AArch64::LDRDpre:
4584 case AArch64::LDRXui:
4585 case AArch64::LDURXi:
4586 case AArch64::LDRXpre:
4587 case AArch64::STRDui:
4588 case AArch64::STURDi:
4589 case AArch64::STRDpre:
4590 case AArch64::STRXui:
4591 case AArch64::STURXi:
4592 case AArch64::STRXpre:
4593 case AArch64::LDPDi:
4594 case AArch64::LDPXi:
4595 case AArch64::STPDi:
4596 case AArch64::STPXi:
4597 return 8;
4598 case AArch64::LDRQui:
4599 case AArch64::LDURQi:
4600 case AArch64::STRQui:
4601 case AArch64::STURQi:
4602 case AArch64::STRQpre:
4603 case AArch64::LDPQi:
4604 case AArch64::LDRQpre:
4605 case AArch64::STPQi:
4606 case AArch64::STGi:
4607 case AArch64::STZGi:
4608 case AArch64::ST2Gi:
4609 case AArch64::STZ2Gi:
4610 case AArch64::STGPi:
4611 return 16;
4612 }
4613}
4614
4615bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
4616 switch (MI.getOpcode()) {
4617 default:
4618 return false;
4619 case AArch64::LDRWpre:
4620 case AArch64::LDRXpre:
4621 case AArch64::LDRSWpre:
4622 case AArch64::LDRSpre:
4623 case AArch64::LDRDpre:
4624 case AArch64::LDRQpre:
4625 return true;
4626 }
4627}
4628
4629bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
4630 switch (MI.getOpcode()) {
4631 default:
4632 return false;
4633 case AArch64::STRWpre:
4634 case AArch64::STRXpre:
4635 case AArch64::STRSpre:
4636 case AArch64::STRDpre:
4637 case AArch64::STRQpre:
4638 return true;
4639 }
4640}
4641
4642bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
4643 return isPreLd(MI) || isPreSt(MI);
4644}
4645
4646bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
4647 switch (MI.getOpcode()) {
4648 default:
4649 return false;
4650 case AArch64::LDPSi:
4651 case AArch64::LDPSWi:
4652 case AArch64::LDPDi:
4653 case AArch64::LDPQi:
4654 case AArch64::LDPWi:
4655 case AArch64::LDPXi:
4656 case AArch64::STPSi:
4657 case AArch64::STPDi:
4658 case AArch64::STPQi:
4659 case AArch64::STPWi:
4660 case AArch64::STPXi:
4661 case AArch64::STGPi:
4662 return true;
4663 }
4664}
4665
4666const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
4667 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4668 unsigned Idx =
4669 AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
4670 : 1;
4671 return MI.getOperand(i: Idx);
4672}
4673
4674const MachineOperand &
4675AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
4676 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4677 unsigned Idx =
4678 AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
4679 : 2;
4680 return MI.getOperand(i: Idx);
4681}
4682
4683const MachineOperand &
4684AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
4685 switch (MI.getOpcode()) {
4686 default:
4687 llvm_unreachable("Unexpected opcode");
4688 case AArch64::LDRBroX:
4689 case AArch64::LDRBBroX:
4690 case AArch64::LDRSBXroX:
4691 case AArch64::LDRSBWroX:
4692 case AArch64::LDRHroX:
4693 case AArch64::LDRHHroX:
4694 case AArch64::LDRSHXroX:
4695 case AArch64::LDRSHWroX:
4696 case AArch64::LDRWroX:
4697 case AArch64::LDRSroX:
4698 case AArch64::LDRSWroX:
4699 case AArch64::LDRDroX:
4700 case AArch64::LDRXroX:
4701 case AArch64::LDRQroX:
4702 return MI.getOperand(i: 4);
4703 }
4704}
4705
4706static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
4707 Register Reg) {
4708 if (MI.getParent() == nullptr)
4709 return nullptr;
4710 const MachineFunction *MF = MI.getParent()->getParent();
4711 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4712}
4713
4714bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
4715 auto IsHFPR = [&](const MachineOperand &Op) {
4716 if (!Op.isReg())
4717 return false;
4718 auto Reg = Op.getReg();
4719 if (Reg.isPhysical())
4720 return AArch64::FPR16RegClass.contains(Reg);
4721 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4722 return TRC == &AArch64::FPR16RegClass ||
4723 TRC == &AArch64::FPR16_loRegClass;
4724 };
4725 return llvm::any_of(Range: MI.operands(), P: IsHFPR);
4726}
4727
4728bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
4729 auto IsQFPR = [&](const MachineOperand &Op) {
4730 if (!Op.isReg())
4731 return false;
4732 auto Reg = Op.getReg();
4733 if (Reg.isPhysical())
4734 return AArch64::FPR128RegClass.contains(Reg);
4735 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4736 return TRC == &AArch64::FPR128RegClass ||
4737 TRC == &AArch64::FPR128_loRegClass;
4738 };
4739 return llvm::any_of(Range: MI.operands(), P: IsQFPR);
4740}
4741
4742bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
4743 switch (MI.getOpcode()) {
4744 case AArch64::BRK:
4745 case AArch64::HLT:
4746 case AArch64::PACIASP:
4747 case AArch64::PACIBSP:
4748 // Implicit BTI behavior.
4749 return true;
4750 case AArch64::PAUTH_PROLOGUE:
4751 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4752 return true;
4753 case AArch64::HINT: {
4754 unsigned Imm = MI.getOperand(i: 0).getImm();
4755 // Explicit BTI instruction.
4756 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4757 return true;
4758 // PACI(A|B)SP instructions.
4759 if (Imm == 25 || Imm == 27)
4760 return true;
4761 return false;
4762 }
4763 default:
4764 return false;
4765 }
4766}
4767
4768bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
4769 if (Reg == 0)
4770 return false;
4771 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4772 return AArch64::FPR128RegClass.contains(Reg) ||
4773 AArch64::FPR64RegClass.contains(Reg) ||
4774 AArch64::FPR32RegClass.contains(Reg) ||
4775 AArch64::FPR16RegClass.contains(Reg) ||
4776 AArch64::FPR8RegClass.contains(Reg);
4777}
4778
4779bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
4780 auto IsFPR = [&](const MachineOperand &Op) {
4781 if (!Op.isReg())
4782 return false;
4783 auto Reg = Op.getReg();
4784 if (Reg.isPhysical())
4785 return isFpOrNEON(Reg);
4786
4787 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4788 return TRC == &AArch64::FPR128RegClass ||
4789 TRC == &AArch64::FPR128_loRegClass ||
4790 TRC == &AArch64::FPR64RegClass ||
4791 TRC == &AArch64::FPR64_loRegClass ||
4792 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4793 TRC == &AArch64::FPR8RegClass;
4794 };
4795 return llvm::any_of(Range: MI.operands(), P: IsFPR);
4796}
4797
4798// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4799// scaled.
4800static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4801 int Scale = AArch64InstrInfo::getMemScale(Opc);
4802
4803 // If the byte-offset isn't a multiple of the stride, we can't scale this
4804 // offset.
4805 if (Offset % Scale != 0)
4806 return false;
4807
4808 // Convert the byte-offset used by unscaled into an "element" offset used
4809 // by the scaled pair load/store instructions.
4810 Offset /= Scale;
4811 return true;
4812}
4813
4814static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4815 if (FirstOpc == SecondOpc)
4816 return true;
4817 // We can also pair sign-ext and zero-ext instructions.
4818 switch (FirstOpc) {
4819 default:
4820 return false;
4821 case AArch64::STRSui:
4822 case AArch64::STURSi:
4823 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4824 case AArch64::STRDui:
4825 case AArch64::STURDi:
4826 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4827 case AArch64::STRQui:
4828 case AArch64::STURQi:
4829 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4830 case AArch64::STRWui:
4831 case AArch64::STURWi:
4832 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4833 case AArch64::STRXui:
4834 case AArch64::STURXi:
4835 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4836 case AArch64::LDRSui:
4837 case AArch64::LDURSi:
4838 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4839 case AArch64::LDRDui:
4840 case AArch64::LDURDi:
4841 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4842 case AArch64::LDRQui:
4843 case AArch64::LDURQi:
4844 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4845 case AArch64::LDRWui:
4846 case AArch64::LDURWi:
4847 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4848 case AArch64::LDRSWui:
4849 case AArch64::LDURSWi:
4850 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4851 case AArch64::LDRXui:
4852 case AArch64::LDURXi:
4853 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4854 }
4855 // These instructions can't be paired based on their opcodes.
4856 return false;
4857}
4858
4859static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4860 int64_t Offset1, unsigned Opcode1, int FI2,
4861 int64_t Offset2, unsigned Opcode2) {
4862 // Accesses through fixed stack object frame indices may access a different
4863 // fixed stack slot. Check that the object offsets + offsets match.
4864 if (MFI.isFixedObjectIndex(ObjectIdx: FI1) && MFI.isFixedObjectIndex(ObjectIdx: FI2)) {
4865 int64_t ObjectOffset1 = MFI.getObjectOffset(ObjectIdx: FI1);
4866 int64_t ObjectOffset2 = MFI.getObjectOffset(ObjectIdx: FI2);
4867 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4868 // Convert to scaled object offsets.
4869 int Scale1 = AArch64InstrInfo::getMemScale(Opc: Opcode1);
4870 if (ObjectOffset1 % Scale1 != 0)
4871 return false;
4872 ObjectOffset1 /= Scale1;
4873 int Scale2 = AArch64InstrInfo::getMemScale(Opc: Opcode2);
4874 if (ObjectOffset2 % Scale2 != 0)
4875 return false;
4876 ObjectOffset2 /= Scale2;
4877 ObjectOffset1 += Offset1;
4878 ObjectOffset2 += Offset2;
4879 return ObjectOffset1 + 1 == ObjectOffset2;
4880 }
4881
4882 return FI1 == FI2;
4883}
4884
4885/// Detect opportunities for ldp/stp formation.
4886///
4887/// Only called for LdSt for which getMemOperandWithOffset returns true.
4888bool AArch64InstrInfo::shouldClusterMemOps(
4889 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4890 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4891 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4892 unsigned NumBytes) const {
4893 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4894 const MachineOperand &BaseOp1 = *BaseOps1.front();
4895 const MachineOperand &BaseOp2 = *BaseOps2.front();
4896 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4897 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4898 if (BaseOp1.getType() != BaseOp2.getType())
4899 return false;
4900
4901 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4902 "Only base registers and frame indices are supported.");
4903
4904 // Check for both base regs and base FI.
4905 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4906 return false;
4907
4908 // Only cluster up to a single pair.
4909 if (ClusterSize > 2)
4910 return false;
4911
4912 if (!isPairableLdStInst(MI: FirstLdSt) || !isPairableLdStInst(MI: SecondLdSt))
4913 return false;
4914
4915 // Can we pair these instructions based on their opcodes?
4916 unsigned FirstOpc = FirstLdSt.getOpcode();
4917 unsigned SecondOpc = SecondLdSt.getOpcode();
4918 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4919 return false;
4920
4921 // Can't merge volatiles or load/stores that have a hint to avoid pair
4922 // formation, for example.
4923 if (!isCandidateToMergeOrPair(MI: FirstLdSt) ||
4924 !isCandidateToMergeOrPair(MI: SecondLdSt))
4925 return false;
4926
4927 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4928 int64_t Offset1 = FirstLdSt.getOperand(i: 2).getImm();
4929 if (hasUnscaledLdStOffset(Opc: FirstOpc) && !scaleOffset(Opc: FirstOpc, Offset&: Offset1))
4930 return false;
4931
4932 int64_t Offset2 = SecondLdSt.getOperand(i: 2).getImm();
4933 if (hasUnscaledLdStOffset(Opc: SecondOpc) && !scaleOffset(Opc: SecondOpc, Offset&: Offset2))
4934 return false;
4935
4936 // Pairwise instructions have a 7-bit signed offset field.
4937 if (Offset1 > 63 || Offset1 < -64)
4938 return false;
4939
4940 // The caller should already have ordered First/SecondLdSt by offset.
4941 // Note: except for non-equal frame index bases
4942 if (BaseOp1.isFI()) {
4943 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4944 "Caller should have ordered offsets.");
4945
4946 const MachineFrameInfo &MFI =
4947 FirstLdSt.getParent()->getParent()->getFrameInfo();
4948 return shouldClusterFI(MFI, FI1: BaseOp1.getIndex(), Offset1, Opcode1: FirstOpc,
4949 FI2: BaseOp2.getIndex(), Offset2, Opcode2: SecondOpc);
4950 }
4951
4952 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4953
4954 return Offset1 + 1 == Offset2;
4955}
4956
4957static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
4958 MCRegister Reg, unsigned SubIdx,
4959 unsigned State,
4960 const TargetRegisterInfo *TRI) {
4961 if (!SubIdx)
4962 return MIB.addReg(RegNo: Reg, flags: State);
4963
4964 if (Reg.isPhysical())
4965 return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State);
4966 return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx);
4967}
4968
4969static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4970 unsigned NumRegs) {
4971 // We really want the positive remainder mod 32 here, that happens to be
4972 // easily obtainable with a mask.
4973 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4974}
4975
4976void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
4977 MachineBasicBlock::iterator I,
4978 const DebugLoc &DL, MCRegister DestReg,
4979 MCRegister SrcReg, bool KillSrc,
4980 unsigned Opcode,
4981 ArrayRef<unsigned> Indices) const {
4982 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4983 const TargetRegisterInfo *TRI = &getRegisterInfo();
4984 uint16_t DestEncoding = TRI->getEncodingValue(Reg: DestReg);
4985 uint16_t SrcEncoding = TRI->getEncodingValue(Reg: SrcReg);
4986 unsigned NumRegs = Indices.size();
4987
4988 int SubReg = 0, End = NumRegs, Incr = 1;
4989 if (forwardCopyWillClobberTuple(DestReg: DestEncoding, SrcReg: SrcEncoding, NumRegs)) {
4990 SubReg = NumRegs - 1;
4991 End = -1;
4992 Incr = -1;
4993 }
4994
4995 for (; SubReg != End; SubReg += Incr) {
4996 const MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode));
4997 AddSubReg(MIB, Reg: DestReg, SubIdx: Indices[SubReg], State: RegState::Define, TRI);
4998 AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices[SubReg], State: 0, TRI);
4999 AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices[SubReg], State: getKillRegState(B: KillSrc), TRI);
5000 }
5001}
5002
5003void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
5004 MachineBasicBlock::iterator I,
5005 const DebugLoc &DL, MCRegister DestReg,
5006 MCRegister SrcReg, bool KillSrc,
5007 unsigned Opcode, unsigned ZeroReg,
5008 llvm::ArrayRef<unsigned> Indices) const {
5009 const TargetRegisterInfo *TRI = &getRegisterInfo();
5010 unsigned NumRegs = Indices.size();
5011
5012#ifndef NDEBUG
5013 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5014 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5015 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5016 "GPR reg sequences should not be able to overlap");
5017#endif
5018
5019 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5020 const MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode));
5021 AddSubReg(MIB, Reg: DestReg, SubIdx: Indices[SubReg], State: RegState::Define, TRI);
5022 MIB.addReg(RegNo: ZeroReg);
5023 AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices[SubReg], State: getKillRegState(B: KillSrc), TRI);
5024 MIB.addImm(Val: 0);
5025 }
5026}
5027
5028void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
5029 MachineBasicBlock::iterator I,
5030 const DebugLoc &DL, Register DestReg,
5031 Register SrcReg, bool KillSrc,
5032 bool RenamableDest,
5033 bool RenamableSrc) const {
5034 if (AArch64::GPR32spRegClass.contains(Reg: DestReg) &&
5035 (AArch64::GPR32spRegClass.contains(Reg: SrcReg) || SrcReg == AArch64::WZR)) {
5036 const TargetRegisterInfo *TRI = &getRegisterInfo();
5037
5038 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5039 // If either operand is WSP, expand to ADD #0.
5040 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5041 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5042 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5043 MCRegister DestRegX = TRI->getMatchingSuperReg(
5044 Reg: DestReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5045 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5046 Reg: SrcReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5047 // This instruction is reading and writing X registers. This may upset
5048 // the register scavenger and machine verifier, so we need to indicate
5049 // that we are reading an undefined value from SrcRegX, but a proper
5050 // value from SrcReg.
5051 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ADDXri), DestReg: DestRegX)
5052 .addReg(RegNo: SrcRegX, flags: RegState::Undef)
5053 .addImm(Val: 0)
5054 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0))
5055 .addReg(RegNo: SrcReg, flags: RegState::Implicit | getKillRegState(B: KillSrc));
5056 } else {
5057 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ADDWri), DestReg)
5058 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5059 .addImm(Val: 0)
5060 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
5061 }
5062 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
5063 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MOVZWi), DestReg)
5064 .addImm(Val: 0)
5065 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
5066 } else {
5067 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5068 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5069 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5070 MCRegister DestRegX = TRI->getMatchingSuperReg(
5071 Reg: DestReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5072 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5073 Reg: SrcReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5074 // This instruction is reading and writing X registers. This may upset
5075 // the register scavenger and machine verifier, so we need to indicate
5076 // that we are reading an undefined value from SrcRegX, but a proper
5077 // value from SrcReg.
5078 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRXrr), DestReg: DestRegX)
5079 .addReg(RegNo: AArch64::XZR)
5080 .addReg(RegNo: SrcRegX, flags: RegState::Undef)
5081 .addReg(RegNo: SrcReg, flags: RegState::Implicit | getKillRegState(B: KillSrc));
5082 } else {
5083 // Otherwise, expand to ORR WZR.
5084 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRWrr), DestReg)
5085 .addReg(RegNo: AArch64::WZR)
5086 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5087 }
5088 }
5089 return;
5090 }
5091
5092 // Copy a Predicate register by ORRing with itself.
5093 if (AArch64::PPRRegClass.contains(Reg: DestReg) &&
5094 AArch64::PPRRegClass.contains(Reg: SrcReg)) {
5095 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5096 "Unexpected SVE register.");
5097 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_PPzPP), DestReg)
5098 .addReg(RegNo: SrcReg) // Pg
5099 .addReg(RegNo: SrcReg)
5100 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5101 return;
5102 }
5103
5104 // Copy a predicate-as-counter register by ORRing with itself as if it
5105 // were a regular predicate (mask) register.
5106 bool DestIsPNR = AArch64::PNRRegClass.contains(Reg: DestReg);
5107 bool SrcIsPNR = AArch64::PNRRegClass.contains(Reg: SrcReg);
5108 if (DestIsPNR || SrcIsPNR) {
5109 auto ToPPR = [](MCRegister R) -> MCRegister {
5110 return (R - AArch64::PN0) + AArch64::P0;
5111 };
5112 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5113 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5114
5115 if (PPRSrcReg != PPRDestReg) {
5116 auto NewMI = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_PPzPP), DestReg: PPRDestReg)
5117 .addReg(RegNo: PPRSrcReg) // Pg
5118 .addReg(RegNo: PPRSrcReg)
5119 .addReg(RegNo: PPRSrcReg, flags: getKillRegState(B: KillSrc));
5120 if (DestIsPNR)
5121 NewMI.addDef(RegNo: DestReg, Flags: RegState::Implicit);
5122 }
5123 return;
5124 }
5125
5126 // Copy a Z register by ORRing with itself.
5127 if (AArch64::ZPRRegClass.contains(Reg: DestReg) &&
5128 AArch64::ZPRRegClass.contains(Reg: SrcReg)) {
5129 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5130 "Unexpected SVE register.");
5131 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_ZZZ), DestReg)
5132 .addReg(RegNo: SrcReg)
5133 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5134 return;
5135 }
5136
5137 // Copy a Z register pair by copying the individual sub-registers.
5138 if ((AArch64::ZPR2RegClass.contains(Reg: DestReg) ||
5139 AArch64::ZPR2StridedOrContiguousRegClass.contains(Reg: DestReg)) &&
5140 (AArch64::ZPR2RegClass.contains(Reg: SrcReg) ||
5141 AArch64::ZPR2StridedOrContiguousRegClass.contains(Reg: SrcReg))) {
5142 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5143 "Unexpected SVE register.");
5144 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5145 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORR_ZZZ,
5146 Indices);
5147 return;
5148 }
5149
5150 // Copy a Z register triple by copying the individual sub-registers.
5151 if (AArch64::ZPR3RegClass.contains(Reg: DestReg) &&
5152 AArch64::ZPR3RegClass.contains(Reg: SrcReg)) {
5153 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5154 "Unexpected SVE register.");
5155 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5156 AArch64::zsub2};
5157 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORR_ZZZ,
5158 Indices);
5159 return;
5160 }
5161
5162 // Copy a Z register quad by copying the individual sub-registers.
5163 if ((AArch64::ZPR4RegClass.contains(Reg: DestReg) ||
5164 AArch64::ZPR4StridedOrContiguousRegClass.contains(Reg: DestReg)) &&
5165 (AArch64::ZPR4RegClass.contains(Reg: SrcReg) ||
5166 AArch64::ZPR4StridedOrContiguousRegClass.contains(Reg: SrcReg))) {
5167 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5168 "Unexpected SVE register.");
5169 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5170 AArch64::zsub2, AArch64::zsub3};
5171 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORR_ZZZ,
5172 Indices);
5173 return;
5174 }
5175
5176 if (AArch64::GPR64spRegClass.contains(Reg: DestReg) &&
5177 (AArch64::GPR64spRegClass.contains(Reg: SrcReg) || SrcReg == AArch64::XZR)) {
5178 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5179 // If either operand is SP, expand to ADD #0.
5180 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ADDXri), DestReg)
5181 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5182 .addImm(Val: 0)
5183 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
5184 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5185 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MOVZXi), DestReg)
5186 .addImm(Val: 0)
5187 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0));
5188 } else {
5189 // Otherwise, expand to ORR XZR.
5190 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRXrr), DestReg)
5191 .addReg(RegNo: AArch64::XZR)
5192 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5193 }
5194 return;
5195 }
5196
5197 // Copy a DDDD register quad by copying the individual sub-registers.
5198 if (AArch64::DDDDRegClass.contains(Reg: DestReg) &&
5199 AArch64::DDDDRegClass.contains(Reg: SrcReg)) {
5200 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5201 AArch64::dsub2, AArch64::dsub3};
5202 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv8i8,
5203 Indices);
5204 return;
5205 }
5206
5207 // Copy a DDD register triple by copying the individual sub-registers.
5208 if (AArch64::DDDRegClass.contains(Reg: DestReg) &&
5209 AArch64::DDDRegClass.contains(Reg: SrcReg)) {
5210 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5211 AArch64::dsub2};
5212 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv8i8,
5213 Indices);
5214 return;
5215 }
5216
5217 // Copy a DD register pair by copying the individual sub-registers.
5218 if (AArch64::DDRegClass.contains(Reg: DestReg) &&
5219 AArch64::DDRegClass.contains(Reg: SrcReg)) {
5220 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5221 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv8i8,
5222 Indices);
5223 return;
5224 }
5225
5226 // Copy a QQQQ register quad by copying the individual sub-registers.
5227 if (AArch64::QQQQRegClass.contains(Reg: DestReg) &&
5228 AArch64::QQQQRegClass.contains(Reg: SrcReg)) {
5229 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5230 AArch64::qsub2, AArch64::qsub3};
5231 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv16i8,
5232 Indices);
5233 return;
5234 }
5235
5236 // Copy a QQQ register triple by copying the individual sub-registers.
5237 if (AArch64::QQQRegClass.contains(Reg: DestReg) &&
5238 AArch64::QQQRegClass.contains(Reg: SrcReg)) {
5239 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5240 AArch64::qsub2};
5241 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv16i8,
5242 Indices);
5243 return;
5244 }
5245
5246 // Copy a QQ register pair by copying the individual sub-registers.
5247 if (AArch64::QQRegClass.contains(Reg: DestReg) &&
5248 AArch64::QQRegClass.contains(Reg: SrcReg)) {
5249 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5250 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv16i8,
5251 Indices);
5252 return;
5253 }
5254
5255 if (AArch64::XSeqPairsClassRegClass.contains(Reg: DestReg) &&
5256 AArch64::XSeqPairsClassRegClass.contains(Reg: SrcReg)) {
5257 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5258 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRXrs,
5259 ZeroReg: AArch64::XZR, Indices);
5260 return;
5261 }
5262
5263 if (AArch64::WSeqPairsClassRegClass.contains(Reg: DestReg) &&
5264 AArch64::WSeqPairsClassRegClass.contains(Reg: SrcReg)) {
5265 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5266 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRWrs,
5267 ZeroReg: AArch64::WZR, Indices);
5268 return;
5269 }
5270
5271 if (AArch64::FPR128RegClass.contains(Reg: DestReg) &&
5272 AArch64::FPR128RegClass.contains(Reg: SrcReg)) {
5273 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5274 !Subtarget.isNeonAvailable())
5275 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_ZZZ))
5276 .addReg(RegNo: AArch64::Z0 + (DestReg - AArch64::Q0), flags: RegState::Define)
5277 .addReg(RegNo: AArch64::Z0 + (SrcReg - AArch64::Q0))
5278 .addReg(RegNo: AArch64::Z0 + (SrcReg - AArch64::Q0));
5279 else if (Subtarget.isNeonAvailable())
5280 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRv16i8), DestReg)
5281 .addReg(RegNo: SrcReg)
5282 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5283 else {
5284 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::STRQpre))
5285 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
5286 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5287 .addReg(RegNo: AArch64::SP)
5288 .addImm(Val: -16);
5289 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::LDRQpost))
5290 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
5291 .addReg(RegNo: DestReg, flags: RegState::Define)
5292 .addReg(RegNo: AArch64::SP)
5293 .addImm(Val: 16);
5294 }
5295 return;
5296 }
5297
5298 if (AArch64::FPR64RegClass.contains(Reg: DestReg) &&
5299 AArch64::FPR64RegClass.contains(Reg: SrcReg)) {
5300 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg)
5301 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5302 return;
5303 }
5304
5305 if (AArch64::FPR32RegClass.contains(Reg: DestReg) &&
5306 AArch64::FPR32RegClass.contains(Reg: SrcReg)) {
5307 if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5308 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5309 const TargetRegisterInfo *TRI = &getRegisterInfo();
5310 MCRegister DestRegD = TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::ssub,
5311 RC: &AArch64::FPR64RegClass);
5312 MCRegister SrcRegD = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::ssub,
5313 RC: &AArch64::FPR64RegClass);
5314 // This instruction is reading and writing D registers. This may upset
5315 // the register scavenger and machine verifier, so we need to indicate
5316 // that we are reading an undefined value from SrcRegD, but a proper
5317 // value from SrcReg.
5318 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg: DestRegD)
5319 .addReg(RegNo: SrcRegD, flags: RegState::Undef)
5320 .addReg(RegNo: SrcReg, flags: RegState::Implicit | getKillRegState(B: KillSrc));
5321 } else {
5322 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSr), DestReg)
5323 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5324 }
5325 return;
5326 }
5327
5328 if (AArch64::FPR16RegClass.contains(Reg: DestReg) &&
5329 AArch64::FPR16RegClass.contains(Reg: SrcReg)) {
5330 if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5331 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5332 const TargetRegisterInfo *TRI = &getRegisterInfo();
5333 MCRegister DestRegD = TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::hsub,
5334 RC: &AArch64::FPR64RegClass);
5335 MCRegister SrcRegD = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::hsub,
5336 RC: &AArch64::FPR64RegClass);
5337 // This instruction is reading and writing D registers. This may upset
5338 // the register scavenger and machine verifier, so we need to indicate
5339 // that we are reading an undefined value from SrcRegD, but a proper
5340 // value from SrcReg.
5341 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg: DestRegD)
5342 .addReg(RegNo: SrcRegD, flags: RegState::Undef)
5343 .addReg(RegNo: SrcReg, flags: RegState::Implicit | getKillRegState(B: KillSrc));
5344 } else {
5345 DestReg = RI.getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::hsub,
5346 RC: &AArch64::FPR32RegClass);
5347 SrcReg = RI.getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::hsub,
5348 RC: &AArch64::FPR32RegClass);
5349 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSr), DestReg)
5350 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5351 }
5352 return;
5353 }
5354
5355 if (AArch64::FPR8RegClass.contains(Reg: DestReg) &&
5356 AArch64::FPR8RegClass.contains(Reg: SrcReg)) {
5357 if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5358 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5359 const TargetRegisterInfo *TRI = &getRegisterInfo();
5360 MCRegister DestRegD = TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::bsub,
5361 RC: &AArch64::FPR64RegClass);
5362 MCRegister SrcRegD = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::bsub,
5363 RC: &AArch64::FPR64RegClass);
5364 // This instruction is reading and writing D registers. This may upset
5365 // the register scavenger and machine verifier, so we need to indicate
5366 // that we are reading an undefined value from SrcRegD, but a proper
5367 // value from SrcReg.
5368 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg: DestRegD)
5369 .addReg(RegNo: SrcRegD, flags: RegState::Undef)
5370 .addReg(RegNo: SrcReg, flags: RegState::Implicit | getKillRegState(B: KillSrc));
5371 } else {
5372 DestReg = RI.getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::bsub,
5373 RC: &AArch64::FPR32RegClass);
5374 SrcReg = RI.getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::bsub,
5375 RC: &AArch64::FPR32RegClass);
5376 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSr), DestReg)
5377 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5378 }
5379 return;
5380 }
5381
5382 // Copies between GPR64 and FPR64.
5383 if (AArch64::FPR64RegClass.contains(Reg: DestReg) &&
5384 AArch64::GPR64RegClass.contains(Reg: SrcReg)) {
5385 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVXDr), DestReg)
5386 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5387 return;
5388 }
5389 if (AArch64::GPR64RegClass.contains(Reg: DestReg) &&
5390 AArch64::FPR64RegClass.contains(Reg: SrcReg)) {
5391 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDXr), DestReg)
5392 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5393 return;
5394 }
5395 // Copies between GPR32 and FPR32.
5396 if (AArch64::FPR32RegClass.contains(Reg: DestReg) &&
5397 AArch64::GPR32RegClass.contains(Reg: SrcReg)) {
5398 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVWSr), DestReg)
5399 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5400 return;
5401 }
5402 if (AArch64::GPR32RegClass.contains(Reg: DestReg) &&
5403 AArch64::FPR32RegClass.contains(Reg: SrcReg)) {
5404 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSWr), DestReg)
5405 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5406 return;
5407 }
5408
5409 if (DestReg == AArch64::NZCV) {
5410 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5411 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MSR))
5412 .addImm(Val: AArch64SysReg::NZCV)
5413 .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5414 .addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit | RegState::Define);
5415 return;
5416 }
5417
5418 if (SrcReg == AArch64::NZCV) {
5419 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5420 BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MRS), DestReg)
5421 .addImm(Val: AArch64SysReg::NZCV)
5422 .addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit | getKillRegState(B: KillSrc));
5423 return;
5424 }
5425
5426#ifndef NDEBUG
5427 const TargetRegisterInfo &TRI = getRegisterInfo();
5428 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5429 << TRI.getRegAsmName(SrcReg) << "\n";
5430#endif
5431 llvm_unreachable("unimplemented reg-to-reg copy");
5432}
5433
5434static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
5435 MachineBasicBlock &MBB,
5436 MachineBasicBlock::iterator InsertBefore,
5437 const MCInstrDesc &MCID,
5438 Register SrcReg, bool IsKill,
5439 unsigned SubIdx0, unsigned SubIdx1, int FI,
5440 MachineMemOperand *MMO) {
5441 Register SrcReg0 = SrcReg;
5442 Register SrcReg1 = SrcReg;
5443 if (SrcReg.isPhysical()) {
5444 SrcReg0 = TRI.getSubReg(Reg: SrcReg, Idx: SubIdx0);
5445 SubIdx0 = 0;
5446 SrcReg1 = TRI.getSubReg(Reg: SrcReg, Idx: SubIdx1);
5447 SubIdx1 = 0;
5448 }
5449 BuildMI(BB&: MBB, I: InsertBefore, MIMD: DebugLoc(), MCID)
5450 .addReg(RegNo: SrcReg0, flags: getKillRegState(B: IsKill), SubReg: SubIdx0)
5451 .addReg(RegNo: SrcReg1, flags: getKillRegState(B: IsKill), SubReg: SubIdx1)
5452 .addFrameIndex(Idx: FI)
5453 .addImm(Val: 0)
5454 .addMemOperand(MMO);
5455}
5456
5457void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
5458 MachineBasicBlock::iterator MBBI,
5459 Register SrcReg, bool isKill, int FI,
5460 const TargetRegisterClass *RC,
5461 const TargetRegisterInfo *TRI,
5462 Register VReg,
5463 MachineInstr::MIFlag Flags) const {
5464 MachineFunction &MF = *MBB.getParent();
5465 MachineFrameInfo &MFI = MF.getFrameInfo();
5466
5467 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
5468 MachineMemOperand *MMO =
5469 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore,
5470 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
5471 unsigned Opc = 0;
5472 bool Offset = true;
5473 MCRegister PNRReg = MCRegister::NoRegister;
5474 unsigned StackID = TargetStackID::Default;
5475 switch (TRI->getSpillSize(RC: *RC)) {
5476 case 1:
5477 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5478 Opc = AArch64::STRBui;
5479 break;
5480 case 2: {
5481 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5482 Opc = AArch64::STRHui;
5483 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5484 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5485 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5486 "Unexpected register store without SVE store instructions");
5487 Opc = AArch64::STR_PXI;
5488 StackID = TargetStackID::ScalableVector;
5489 }
5490 break;
5491 }
5492 case 4:
5493 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5494 Opc = AArch64::STRWui;
5495 if (SrcReg.isVirtual())
5496 MF.getRegInfo().constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR32RegClass);
5497 else
5498 assert(SrcReg != AArch64::WSP);
5499 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5500 Opc = AArch64::STRSui;
5501 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5502 Opc = AArch64::STR_PPXI;
5503 StackID = TargetStackID::ScalableVector;
5504 }
5505 break;
5506 case 8:
5507 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5508 Opc = AArch64::STRXui;
5509 if (SrcReg.isVirtual())
5510 MF.getRegInfo().constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR64RegClass);
5511 else
5512 assert(SrcReg != AArch64::SP);
5513 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5514 Opc = AArch64::STRDui;
5515 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5516 storeRegPairToStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5517 MCID: get(Opcode: AArch64::STPWi), SrcReg, IsKill: isKill,
5518 SubIdx0: AArch64::sube32, SubIdx1: AArch64::subo32, FI, MMO);
5519 return;
5520 }
5521 break;
5522 case 16:
5523 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5524 Opc = AArch64::STRQui;
5525 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5526 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5527 Opc = AArch64::ST1Twov1d;
5528 Offset = false;
5529 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5530 storeRegPairToStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5531 MCID: get(Opcode: AArch64::STPXi), SrcReg, IsKill: isKill,
5532 SubIdx0: AArch64::sube64, SubIdx1: AArch64::subo64, FI, MMO);
5533 return;
5534 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5535 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5536 "Unexpected register store without SVE store instructions");
5537 Opc = AArch64::STR_ZXI;
5538 StackID = TargetStackID::ScalableVector;
5539 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5540 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5541 "Unexpected predicate store without SVE store instructions");
5542 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5543 StackID = TargetStackID::ScalableVector;
5544 }
5545 break;
5546 case 24:
5547 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5548 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5549 Opc = AArch64::ST1Threev1d;
5550 Offset = false;
5551 }
5552 break;
5553 case 32:
5554 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5555 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5556 Opc = AArch64::ST1Fourv1d;
5557 Offset = false;
5558 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5559 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5560 Opc = AArch64::ST1Twov2d;
5561 Offset = false;
5562 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5563 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5564 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5565 "Unexpected register store without SVE store instructions");
5566 Opc = AArch64::STR_ZZXI;
5567 StackID = TargetStackID::ScalableVector;
5568 }
5569 break;
5570 case 48:
5571 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5572 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5573 Opc = AArch64::ST1Threev2d;
5574 Offset = false;
5575 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5576 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5577 "Unexpected register store without SVE store instructions");
5578 Opc = AArch64::STR_ZZZXI;
5579 StackID = TargetStackID::ScalableVector;
5580 }
5581 break;
5582 case 64:
5583 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5584 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5585 Opc = AArch64::ST1Fourv2d;
5586 Offset = false;
5587 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5588 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5589 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5590 "Unexpected register store without SVE store instructions");
5591 Opc = AArch64::STR_ZZZZXI;
5592 StackID = TargetStackID::ScalableVector;
5593 }
5594 break;
5595 }
5596 assert(Opc && "Unknown register class");
5597 MFI.setStackID(ObjectIdx: FI, ID: StackID);
5598
5599 const MachineInstrBuilder MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: get(Opcode: Opc))
5600 .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
5601 .addFrameIndex(Idx: FI);
5602
5603 if (Offset)
5604 MI.addImm(Val: 0);
5605 if (PNRReg.isValid())
5606 MI.addDef(RegNo: PNRReg, Flags: RegState::Implicit);
5607 MI.addMemOperand(MMO);
5608}
5609
5610static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
5611 MachineBasicBlock &MBB,
5612 MachineBasicBlock::iterator InsertBefore,
5613 const MCInstrDesc &MCID,
5614 Register DestReg, unsigned SubIdx0,
5615 unsigned SubIdx1, int FI,
5616 MachineMemOperand *MMO) {
5617 Register DestReg0 = DestReg;
5618 Register DestReg1 = DestReg;
5619 bool IsUndef = true;
5620 if (DestReg.isPhysical()) {
5621 DestReg0 = TRI.getSubReg(Reg: DestReg, Idx: SubIdx0);
5622 SubIdx0 = 0;
5623 DestReg1 = TRI.getSubReg(Reg: DestReg, Idx: SubIdx1);
5624 SubIdx1 = 0;
5625 IsUndef = false;
5626 }
5627 BuildMI(BB&: MBB, I: InsertBefore, MIMD: DebugLoc(), MCID)
5628 .addReg(RegNo: DestReg0, flags: RegState::Define | getUndefRegState(B: IsUndef), SubReg: SubIdx0)
5629 .addReg(RegNo: DestReg1, flags: RegState::Define | getUndefRegState(B: IsUndef), SubReg: SubIdx1)
5630 .addFrameIndex(Idx: FI)
5631 .addImm(Val: 0)
5632 .addMemOperand(MMO);
5633}
5634
5635void AArch64InstrInfo::loadRegFromStackSlot(
5636 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
5637 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5638 Register VReg, MachineInstr::MIFlag Flags) const {
5639 MachineFunction &MF = *MBB.getParent();
5640 MachineFrameInfo &MFI = MF.getFrameInfo();
5641 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
5642 MachineMemOperand *MMO =
5643 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad,
5644 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
5645
5646 unsigned Opc = 0;
5647 bool Offset = true;
5648 unsigned StackID = TargetStackID::Default;
5649 Register PNRReg = MCRegister::NoRegister;
5650 switch (TRI->getSpillSize(RC: *RC)) {
5651 case 1:
5652 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5653 Opc = AArch64::LDRBui;
5654 break;
5655 case 2: {
5656 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5657 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5658 Opc = AArch64::LDRHui;
5659 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5660 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5661 "Unexpected register load without SVE load instructions");
5662 if (IsPNR)
5663 PNRReg = DestReg;
5664 Opc = AArch64::LDR_PXI;
5665 StackID = TargetStackID::ScalableVector;
5666 }
5667 break;
5668 }
5669 case 4:
5670 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5671 Opc = AArch64::LDRWui;
5672 if (DestReg.isVirtual())
5673 MF.getRegInfo().constrainRegClass(Reg: DestReg, RC: &AArch64::GPR32RegClass);
5674 else
5675 assert(DestReg != AArch64::WSP);
5676 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5677 Opc = AArch64::LDRSui;
5678 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5679 Opc = AArch64::LDR_PPXI;
5680 StackID = TargetStackID::ScalableVector;
5681 }
5682 break;
5683 case 8:
5684 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5685 Opc = AArch64::LDRXui;
5686 if (DestReg.isVirtual())
5687 MF.getRegInfo().constrainRegClass(Reg: DestReg, RC: &AArch64::GPR64RegClass);
5688 else
5689 assert(DestReg != AArch64::SP);
5690 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5691 Opc = AArch64::LDRDui;
5692 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5693 loadRegPairFromStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5694 MCID: get(Opcode: AArch64::LDPWi), DestReg, SubIdx0: AArch64::sube32,
5695 SubIdx1: AArch64::subo32, FI, MMO);
5696 return;
5697 }
5698 break;
5699 case 16:
5700 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5701 Opc = AArch64::LDRQui;
5702 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5703 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5704 Opc = AArch64::LD1Twov1d;
5705 Offset = false;
5706 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5707 loadRegPairFromStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5708 MCID: get(Opcode: AArch64::LDPXi), DestReg, SubIdx0: AArch64::sube64,
5709 SubIdx1: AArch64::subo64, FI, MMO);
5710 return;
5711 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5712 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5713 "Unexpected register load without SVE load instructions");
5714 Opc = AArch64::LDR_ZXI;
5715 StackID = TargetStackID::ScalableVector;
5716 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5717 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5718 "Unexpected predicate load without SVE load instructions");
5719 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5720 StackID = TargetStackID::ScalableVector;
5721 }
5722 break;
5723 case 24:
5724 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5725 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5726 Opc = AArch64::LD1Threev1d;
5727 Offset = false;
5728 }
5729 break;
5730 case 32:
5731 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5732 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5733 Opc = AArch64::LD1Fourv1d;
5734 Offset = false;
5735 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5736 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5737 Opc = AArch64::LD1Twov2d;
5738 Offset = false;
5739 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5740 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5741 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5742 "Unexpected register load without SVE load instructions");
5743 Opc = AArch64::LDR_ZZXI;
5744 StackID = TargetStackID::ScalableVector;
5745 }
5746 break;
5747 case 48:
5748 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5749 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5750 Opc = AArch64::LD1Threev2d;
5751 Offset = false;
5752 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5753 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5754 "Unexpected register load without SVE load instructions");
5755 Opc = AArch64::LDR_ZZZXI;
5756 StackID = TargetStackID::ScalableVector;
5757 }
5758 break;
5759 case 64:
5760 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5761 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5762 Opc = AArch64::LD1Fourv2d;
5763 Offset = false;
5764 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5765 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5766 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5767 "Unexpected register load without SVE load instructions");
5768 Opc = AArch64::LDR_ZZZZXI;
5769 StackID = TargetStackID::ScalableVector;
5770 }
5771 break;
5772 }
5773
5774 assert(Opc && "Unknown register class");
5775 MFI.setStackID(ObjectIdx: FI, ID: StackID);
5776
5777 const MachineInstrBuilder MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: get(Opcode: Opc))
5778 .addReg(RegNo: DestReg, flags: getDefRegState(B: true))
5779 .addFrameIndex(Idx: FI);
5780 if (Offset)
5781 MI.addImm(Val: 0);
5782 if (PNRReg.isValid() && !PNRReg.isVirtual())
5783 MI.addDef(RegNo: PNRReg, Flags: RegState::Implicit);
5784 MI.addMemOperand(MMO);
5785}
5786
5787bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
5788 const MachineInstr &UseMI,
5789 const TargetRegisterInfo *TRI) {
5790 return any_of(Range: instructionsWithoutDebug(It: std::next(x: DefMI.getIterator()),
5791 End: UseMI.getIterator()),
5792 P: [TRI](const MachineInstr &I) {
5793 return I.modifiesRegister(Reg: AArch64::NZCV, TRI) ||
5794 I.readsRegister(Reg: AArch64::NZCV, TRI);
5795 });
5796}
5797
5798void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5799 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5800 // The smallest scalable element supported by scaled SVE addressing
5801 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5802 // byte offset must always be a multiple of 2.
5803 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5804
5805 // VGSized offsets are divided by '2', because the VG register is the
5806 // the number of 64bit granules as opposed to 128bit vector chunks,
5807 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5808 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5809 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5810 ByteSized = Offset.getFixed();
5811 VGSized = Offset.getScalable() / 2;
5812}
5813
5814/// Returns the offset in parts to which this frame offset can be
5815/// decomposed for the purpose of describing a frame offset.
5816/// For non-scalable offsets this is simply its byte size.
5817void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5818 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5819 int64_t &NumDataVectors) {
5820 // The smallest scalable element supported by scaled SVE addressing
5821 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5822 // byte offset must always be a multiple of 2.
5823 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5824
5825 NumBytes = Offset.getFixed();
5826 NumDataVectors = 0;
5827 NumPredicateVectors = Offset.getScalable() / 2;
5828 // This method is used to get the offsets to adjust the frame offset.
5829 // If the function requires ADDPL to be used and needs more than two ADDPL
5830 // instructions, part of the offset is folded into NumDataVectors so that it
5831 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5832 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5833 NumPredicateVectors > 62) {
5834 NumDataVectors = NumPredicateVectors / 8;
5835 NumPredicateVectors -= NumDataVectors * 8;
5836 }
5837}
5838
5839// Convenience function to create a DWARF expression for
5840// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5841static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5842 int NumVGScaledBytes, unsigned VG,
5843 llvm::raw_string_ostream &Comment) {
5844 uint8_t buffer[16];
5845
5846 if (NumBytes) {
5847 Expr.push_back(Elt: dwarf::DW_OP_consts);
5848 Expr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: NumBytes, p: buffer));
5849 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
5850 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(x: NumBytes);
5851 }
5852
5853 if (NumVGScaledBytes) {
5854 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_consts);
5855 Expr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: NumVGScaledBytes, p: buffer));
5856
5857 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_bregx);
5858 Expr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: VG, p: buffer));
5859 Expr.push_back(Elt: 0);
5860
5861 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_mul);
5862 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
5863
5864 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5865 << std::abs(x: NumVGScaledBytes) << " * VG";
5866 }
5867}
5868
5869// Creates an MCCFIInstruction:
5870// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5871static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
5872 unsigned Reg,
5873 const StackOffset &Offset) {
5874 int64_t NumBytes, NumVGScaledBytes;
5875 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, ByteSized&: NumBytes,
5876 VGSized&: NumVGScaledBytes);
5877 std::string CommentBuffer;
5878 llvm::raw_string_ostream Comment(CommentBuffer);
5879
5880 if (Reg == AArch64::SP)
5881 Comment << "sp";
5882 else if (Reg == AArch64::FP)
5883 Comment << "fp";
5884 else
5885 Comment << printReg(Reg, TRI: &TRI);
5886
5887 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5888 SmallString<64> Expr;
5889 unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5890 Expr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5891 Expr.push_back(Elt: 0);
5892 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5893 VG: TRI.getDwarfRegNum(RegNum: AArch64::VG, isEH: true), Comment);
5894
5895 // Wrap this into DW_CFA_def_cfa.
5896 SmallString<64> DefCfaExpr;
5897 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
5898 uint8_t buffer[16];
5899 DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: Expr.size(), p: buffer));
5900 DefCfaExpr.append(RHS: Expr.str());
5901 return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc(),
5902 Comment: Comment.str());
5903}
5904
5905MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI,
5906 unsigned FrameReg, unsigned Reg,
5907 const StackOffset &Offset,
5908 bool LastAdjustmentWasScalable) {
5909 if (Offset.getScalable())
5910 return createDefCFAExpression(TRI, Reg, Offset);
5911
5912 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5913 return MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: int(Offset.getFixed()));
5914
5915 unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5916 return MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfReg, Offset: (int)Offset.getFixed());
5917}
5918
5919MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
5920 unsigned Reg,
5921 const StackOffset &OffsetFromDefCFA) {
5922 int64_t NumBytes, NumVGScaledBytes;
5923 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5924 Offset: OffsetFromDefCFA, ByteSized&: NumBytes, VGSized&: NumVGScaledBytes);
5925
5926 unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5927
5928 // Non-scalable offsets can use DW_CFA_offset directly.
5929 if (!NumVGScaledBytes)
5930 return MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: NumBytes);
5931
5932 std::string CommentBuffer;
5933 llvm::raw_string_ostream Comment(CommentBuffer);
5934 Comment << printReg(Reg, TRI: &TRI) << " @ cfa";
5935
5936 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5937 SmallString<64> OffsetExpr;
5938 appendVGScaledOffsetExpr(Expr&: OffsetExpr, NumBytes, NumVGScaledBytes,
5939 VG: TRI.getDwarfRegNum(RegNum: AArch64::VG, isEH: true), Comment);
5940
5941 // Wrap this into DW_CFA_expression
5942 SmallString<64> CfaExpr;
5943 CfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
5944 uint8_t buffer[16];
5945 CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer));
5946 CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: OffsetExpr.size(), p: buffer));
5947 CfaExpr.append(RHS: OffsetExpr.str());
5948
5949 return MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str(), Loc: SMLoc(),
5950 Comment: Comment.str());
5951}
5952
5953// Helper function to emit a frame offset adjustment from a given
5954// pointer (SrcReg), stored into DestReg. This function is explicit
5955// in that it requires the opcode.
5956static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
5957 MachineBasicBlock::iterator MBBI,
5958 const DebugLoc &DL, unsigned DestReg,
5959 unsigned SrcReg, int64_t Offset, unsigned Opc,
5960 const TargetInstrInfo *TII,
5961 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5962 bool *HasWinCFI, bool EmitCFAOffset,
5963 StackOffset CFAOffset, unsigned FrameReg) {
5964 int Sign = 1;
5965 unsigned MaxEncoding, ShiftSize;
5966 switch (Opc) {
5967 case AArch64::ADDXri:
5968 case AArch64::ADDSXri:
5969 case AArch64::SUBXri:
5970 case AArch64::SUBSXri:
5971 MaxEncoding = 0xfff;
5972 ShiftSize = 12;
5973 break;
5974 case AArch64::ADDVL_XXI:
5975 case AArch64::ADDPL_XXI:
5976 case AArch64::ADDSVL_XXI:
5977 case AArch64::ADDSPL_XXI:
5978 MaxEncoding = 31;
5979 ShiftSize = 0;
5980 if (Offset < 0) {
5981 MaxEncoding = 32;
5982 Sign = -1;
5983 Offset = -Offset;
5984 }
5985 break;
5986 default:
5987 llvm_unreachable("Unsupported opcode");
5988 }
5989
5990 // `Offset` can be in bytes or in "scalable bytes".
5991 int VScale = 1;
5992 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5993 VScale = 16;
5994 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5995 VScale = 2;
5996
5997 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5998 // scratch register. If DestReg is a virtual register, use it as the
5999 // scratch register; otherwise, create a new virtual register (to be
6000 // replaced by the scavenger at the end of PEI). That case can be optimized
6001 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6002 // register can be loaded with offset%8 and the add/sub can use an extending
6003 // instruction with LSL#3.
6004 // Currently the function handles any offsets but generates a poor sequence
6005 // of code.
6006 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6007
6008 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6009 Register TmpReg = DestReg;
6010 if (TmpReg == AArch64::XZR)
6011 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6012 RegClass: &AArch64::GPR64RegClass);
6013 do {
6014 uint64_t ThisVal = std::min<uint64_t>(a: Offset, b: MaxEncodableValue);
6015 unsigned LocalShiftSize = 0;
6016 if (ThisVal > MaxEncoding) {
6017 ThisVal = ThisVal >> ShiftSize;
6018 LocalShiftSize = ShiftSize;
6019 }
6020 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6021 "Encoding cannot handle value that big");
6022
6023 Offset -= ThisVal << LocalShiftSize;
6024 if (Offset == 0)
6025 TmpReg = DestReg;
6026 auto MBI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: TmpReg)
6027 .addReg(RegNo: SrcReg)
6028 .addImm(Val: Sign * (int)ThisVal);
6029 if (ShiftSize)
6030 MBI = MBI.addImm(
6031 Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LocalShiftSize));
6032 MBI = MBI.setMIFlag(Flag);
6033
6034 auto Change =
6035 VScale == 1
6036 ? StackOffset::getFixed(Fixed: ThisVal << LocalShiftSize)
6037 : StackOffset::getScalable(Scalable: VScale * (ThisVal << LocalShiftSize));
6038 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6039 CFAOffset += Change;
6040 else
6041 CFAOffset -= Change;
6042 if (EmitCFAOffset && DestReg == TmpReg) {
6043 MachineFunction &MF = *MBB.getParent();
6044 const TargetSubtargetInfo &STI = MF.getSubtarget();
6045 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6046
6047 unsigned CFIIndex = MF.addFrameInst(
6048 Inst: createDefCFA(TRI, FrameReg, Reg: DestReg, Offset: CFAOffset, LastAdjustmentWasScalable: VScale != 1));
6049 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::CFI_INSTRUCTION))
6050 .addCFIIndex(CFIIndex)
6051 .setMIFlags(Flag);
6052 }
6053
6054 if (NeedsWinCFI) {
6055 int Imm = (int)(ThisVal << LocalShiftSize);
6056 if (VScale != 1 && DestReg == AArch64::SP) {
6057 if (HasWinCFI)
6058 *HasWinCFI = true;
6059 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_AllocZ))
6060 .addImm(Val: ThisVal)
6061 .setMIFlag(Flag);
6062 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6063 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6064 assert(VScale == 1 && "Expected non-scalable operation");
6065 if (HasWinCFI)
6066 *HasWinCFI = true;
6067 if (Imm == 0)
6068 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_SetFP)).setMIFlag(Flag);
6069 else
6070 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_AddFP))
6071 .addImm(Val: Imm)
6072 .setMIFlag(Flag);
6073 assert(Offset == 0 && "Expected remaining offset to be zero to "
6074 "emit a single SEH directive");
6075 } else if (DestReg == AArch64::SP) {
6076 assert(VScale == 1 && "Expected non-scalable operation");
6077 if (HasWinCFI)
6078 *HasWinCFI = true;
6079 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6080 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_StackAlloc))
6081 .addImm(Val: Imm)
6082 .setMIFlag(Flag);
6083 }
6084 }
6085
6086 SrcReg = TmpReg;
6087 } while (Offset);
6088}
6089
6090void llvm::emitFrameOffset(MachineBasicBlock &MBB,
6091 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
6092 unsigned DestReg, unsigned SrcReg,
6093 StackOffset Offset, const TargetInstrInfo *TII,
6094 MachineInstr::MIFlag Flag, bool SetNZCV,
6095 bool NeedsWinCFI, bool *HasWinCFI,
6096 bool EmitCFAOffset, StackOffset CFAOffset,
6097 unsigned FrameReg) {
6098 // If a function is marked as arm_locally_streaming, then the runtime value of
6099 // vscale in the prologue/epilogue is different the runtime value of vscale
6100 // in the function's body. To avoid having to consider multiple vscales,
6101 // we can use `addsvl` to allocate any scalable stack-slots, which under
6102 // most circumstances will be only locals, not callee-save slots.
6103 const Function &F = MBB.getParent()->getFunction();
6104 bool UseSVL = F.hasFnAttribute(Kind: "aarch64_pstate_sm_body");
6105
6106 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6107 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6108 Offset, NumBytes&: Bytes, NumPredicateVectors, NumDataVectors);
6109
6110 // First emit non-scalable frame offsets, or a simple 'mov'.
6111 if (Bytes || (!Offset && SrcReg != DestReg)) {
6112 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6113 "SP increment/decrement not 8-byte aligned");
6114 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6115 if (Bytes < 0) {
6116 Bytes = -Bytes;
6117 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6118 }
6119 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: Bytes, Opc, TII, Flag,
6120 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6121 FrameReg);
6122 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6123 ? StackOffset::getFixed(Fixed: -Bytes)
6124 : StackOffset::getFixed(Fixed: Bytes);
6125 SrcReg = DestReg;
6126 FrameReg = DestReg;
6127 }
6128
6129 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
6130 "SetNZCV not supported with SVE vectors");
6131 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6132 "WinCFI can't allocate fractions of an SVE data vector");
6133
6134 if (NumDataVectors) {
6135 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: NumDataVectors,
6136 Opc: UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6137 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6138 FrameReg);
6139 CFAOffset += StackOffset::getScalable(Scalable: -NumDataVectors * 16);
6140 SrcReg = DestReg;
6141 }
6142
6143 if (NumPredicateVectors) {
6144 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6145 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: NumPredicateVectors,
6146 Opc: UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6147 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6148 FrameReg);
6149 }
6150}
6151
6152MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
6153 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
6154 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6155 LiveIntervals *LIS, VirtRegMap *VRM) const {
6156 // This is a bit of a hack. Consider this instruction:
6157 //
6158 // %0 = COPY %sp; GPR64all:%0
6159 //
6160 // We explicitly chose GPR64all for the virtual register so such a copy might
6161 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6162 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6163 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6164 //
6165 // To prevent that, we are going to constrain the %0 register class here.
6166 if (MI.isFullCopy()) {
6167 Register DstReg = MI.getOperand(i: 0).getReg();
6168 Register SrcReg = MI.getOperand(i: 1).getReg();
6169 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6170 MF.getRegInfo().constrainRegClass(Reg: DstReg, RC: &AArch64::GPR64RegClass);
6171 return nullptr;
6172 }
6173 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6174 MF.getRegInfo().constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR64RegClass);
6175 return nullptr;
6176 }
6177 // Nothing can folded with copy from/to NZCV.
6178 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6179 return nullptr;
6180 }
6181
6182 // Handle the case where a copy is being spilled or filled but the source
6183 // and destination register class don't match. For example:
6184 //
6185 // %0 = COPY %xzr; GPR64common:%0
6186 //
6187 // In this case we can still safely fold away the COPY and generate the
6188 // following spill code:
6189 //
6190 // STRXui %xzr, %stack.0
6191 //
6192 // This also eliminates spilled cross register class COPYs (e.g. between x and
6193 // d regs) of the same size. For example:
6194 //
6195 // %0 = COPY %1; GPR64:%0, FPR64:%1
6196 //
6197 // will be filled as
6198 //
6199 // LDRDui %0, fi<#0>
6200 //
6201 // instead of
6202 //
6203 // LDRXui %Temp, fi<#0>
6204 // %0 = FMOV %Temp
6205 //
6206 if (MI.isCopy() && Ops.size() == 1 &&
6207 // Make sure we're only folding the explicit COPY defs/uses.
6208 (Ops[0] == 0 || Ops[0] == 1)) {
6209 bool IsSpill = Ops[0] == 0;
6210 bool IsFill = !IsSpill;
6211 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
6212 const MachineRegisterInfo &MRI = MF.getRegInfo();
6213 MachineBasicBlock &MBB = *MI.getParent();
6214 const MachineOperand &DstMO = MI.getOperand(i: 0);
6215 const MachineOperand &SrcMO = MI.getOperand(i: 1);
6216 Register DstReg = DstMO.getReg();
6217 Register SrcReg = SrcMO.getReg();
6218 // This is slightly expensive to compute for physical regs since
6219 // getMinimalPhysRegClass is slow.
6220 auto getRegClass = [&](unsigned Reg) {
6221 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6222 : TRI.getMinimalPhysRegClass(Reg);
6223 };
6224
6225 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6226 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6227 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6228 "Mismatched register size in non subreg COPY");
6229 if (IsSpill)
6230 storeRegToStackSlot(MBB, MBBI: InsertPt, SrcReg, isKill: SrcMO.isKill(), FI: FrameIndex,
6231 RC: getRegClass(SrcReg), TRI: &TRI, VReg: Register());
6232 else
6233 loadRegFromStackSlot(MBB, MBBI: InsertPt, DestReg: DstReg, FI: FrameIndex,
6234 RC: getRegClass(DstReg), TRI: &TRI, VReg: Register());
6235 return &*--InsertPt;
6236 }
6237
6238 // Handle cases like spilling def of:
6239 //
6240 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6241 //
6242 // where the physical register source can be widened and stored to the full
6243 // virtual reg destination stack slot, in this case producing:
6244 //
6245 // STRXui %xzr, %stack.0
6246 //
6247 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6248 TRI.getRegSizeInBits(RC: *getRegClass(DstReg)) == 64) {
6249 assert(SrcMO.getSubReg() == 0 &&
6250 "Unexpected subreg on physical register");
6251 storeRegToStackSlot(MBB, MBBI: InsertPt, SrcReg: AArch64::XZR, isKill: SrcMO.isKill(),
6252 FI: FrameIndex, RC: &AArch64::GPR64RegClass, TRI: &TRI,
6253 VReg: Register());
6254 return &*--InsertPt;
6255 }
6256
6257 // Handle cases like filling use of:
6258 //
6259 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6260 //
6261 // where we can load the full virtual reg source stack slot, into the subreg
6262 // destination, in this case producing:
6263 //
6264 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6265 //
6266 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6267 const TargetRegisterClass *FillRC;
6268 switch (DstMO.getSubReg()) {
6269 default:
6270 FillRC = nullptr;
6271 break;
6272 case AArch64::sub_32:
6273 FillRC = &AArch64::GPR32RegClass;
6274 break;
6275 case AArch64::ssub:
6276 FillRC = &AArch64::FPR32RegClass;
6277 break;
6278 case AArch64::dsub:
6279 FillRC = &AArch64::FPR64RegClass;
6280 break;
6281 }
6282
6283 if (FillRC) {
6284 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6285 TRI.getRegSizeInBits(*FillRC) &&
6286 "Mismatched regclass size on folded subreg COPY");
6287 loadRegFromStackSlot(MBB, MBBI: InsertPt, DestReg: DstReg, FI: FrameIndex, RC: FillRC, TRI: &TRI,
6288 VReg: Register());
6289 MachineInstr &LoadMI = *--InsertPt;
6290 MachineOperand &LoadDst = LoadMI.getOperand(i: 0);
6291 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6292 LoadDst.setSubReg(DstMO.getSubReg());
6293 LoadDst.setIsUndef();
6294 return &LoadMI;
6295 }
6296 }
6297 }
6298
6299 // Cannot fold.
6300 return nullptr;
6301}
6302
6303int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
6304 StackOffset &SOffset,
6305 bool *OutUseUnscaledOp,
6306 unsigned *OutUnscaledOp,
6307 int64_t *EmittableOffset) {
6308 // Set output values in case of early exit.
6309 if (EmittableOffset)
6310 *EmittableOffset = 0;
6311 if (OutUseUnscaledOp)
6312 *OutUseUnscaledOp = false;
6313 if (OutUnscaledOp)
6314 *OutUnscaledOp = 0;
6315
6316 // Exit early for structured vector spills/fills as they can't take an
6317 // immediate offset.
6318 switch (MI.getOpcode()) {
6319 default:
6320 break;
6321 case AArch64::LD1Rv1d:
6322 case AArch64::LD1Rv2s:
6323 case AArch64::LD1Rv2d:
6324 case AArch64::LD1Rv4h:
6325 case AArch64::LD1Rv4s:
6326 case AArch64::LD1Rv8b:
6327 case AArch64::LD1Rv8h:
6328 case AArch64::LD1Rv16b:
6329 case AArch64::LD1Twov2d:
6330 case AArch64::LD1Threev2d:
6331 case AArch64::LD1Fourv2d:
6332 case AArch64::LD1Twov1d:
6333 case AArch64::LD1Threev1d:
6334 case AArch64::LD1Fourv1d:
6335 case AArch64::ST1Twov2d:
6336 case AArch64::ST1Threev2d:
6337 case AArch64::ST1Fourv2d:
6338 case AArch64::ST1Twov1d:
6339 case AArch64::ST1Threev1d:
6340 case AArch64::ST1Fourv1d:
6341 case AArch64::ST1i8:
6342 case AArch64::ST1i16:
6343 case AArch64::ST1i32:
6344 case AArch64::ST1i64:
6345 case AArch64::IRG:
6346 case AArch64::IRGstack:
6347 case AArch64::STGloop:
6348 case AArch64::STZGloop:
6349 return AArch64FrameOffsetCannotUpdate;
6350 }
6351
6352 // Get the min/max offset and the scale.
6353 TypeSize ScaleValue(0U, false), Width(0U, false);
6354 int64_t MinOff, MaxOff;
6355 if (!AArch64InstrInfo::getMemOpInfo(Opcode: MI.getOpcode(), Scale&: ScaleValue, Width, MinOffset&: MinOff,
6356 MaxOffset&: MaxOff))
6357 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6358
6359 // Construct the complete offset.
6360 bool IsMulVL = ScaleValue.isScalable();
6361 unsigned Scale = ScaleValue.getKnownMinValue();
6362 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6363
6364 const MachineOperand &ImmOpnd =
6365 MI.getOperand(i: AArch64InstrInfo::getLoadStoreImmIdx(Opc: MI.getOpcode()));
6366 Offset += ImmOpnd.getImm() * Scale;
6367
6368 // If the offset doesn't match the scale, we rewrite the instruction to
6369 // use the unscaled instruction instead. Likewise, if we have a negative
6370 // offset and there is an unscaled op to use.
6371 std::optional<unsigned> UnscaledOp =
6372 AArch64InstrInfo::getUnscaledLdSt(Opc: MI.getOpcode());
6373 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6374 if (useUnscaledOp &&
6375 !AArch64InstrInfo::getMemOpInfo(Opcode: *UnscaledOp, Scale&: ScaleValue, Width, MinOffset&: MinOff,
6376 MaxOffset&: MaxOff))
6377 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6378
6379 Scale = ScaleValue.getKnownMinValue();
6380 assert(IsMulVL == ScaleValue.isScalable() &&
6381 "Unscaled opcode has different value for scalable");
6382
6383 int64_t Remainder = Offset % Scale;
6384 assert(!(Remainder && useUnscaledOp) &&
6385 "Cannot have remainder when using unscaled op");
6386
6387 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6388 int64_t NewOffset = Offset / Scale;
6389 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6390 Offset = Remainder;
6391 else {
6392 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6393 Offset = Offset - (NewOffset * Scale);
6394 }
6395
6396 if (EmittableOffset)
6397 *EmittableOffset = NewOffset;
6398 if (OutUseUnscaledOp)
6399 *OutUseUnscaledOp = useUnscaledOp;
6400 if (OutUnscaledOp && UnscaledOp)
6401 *OutUnscaledOp = *UnscaledOp;
6402
6403 if (IsMulVL)
6404 SOffset = StackOffset::get(Fixed: SOffset.getFixed(), Scalable: Offset);
6405 else
6406 SOffset = StackOffset::get(Fixed: Offset, Scalable: SOffset.getScalable());
6407 return AArch64FrameOffsetCanUpdate |
6408 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6409}
6410
6411bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
6412 unsigned FrameReg, StackOffset &Offset,
6413 const AArch64InstrInfo *TII) {
6414 unsigned Opcode = MI.getOpcode();
6415 unsigned ImmIdx = FrameRegIdx + 1;
6416
6417 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6418 Offset += StackOffset::getFixed(Fixed: MI.getOperand(i: ImmIdx).getImm());
6419 emitFrameOffset(MBB&: *MI.getParent(), MBBI: MI, DL: MI.getDebugLoc(),
6420 DestReg: MI.getOperand(i: 0).getReg(), SrcReg: FrameReg, Offset, TII,
6421 Flag: MachineInstr::NoFlags, SetNZCV: (Opcode == AArch64::ADDSXri));
6422 MI.eraseFromParent();
6423 Offset = StackOffset();
6424 return true;
6425 }
6426
6427 int64_t NewOffset;
6428 unsigned UnscaledOp;
6429 bool UseUnscaledOp;
6430 int Status = isAArch64FrameOffsetLegal(MI, SOffset&: Offset, OutUseUnscaledOp: &UseUnscaledOp,
6431 OutUnscaledOp: &UnscaledOp, EmittableOffset: &NewOffset);
6432 if (Status & AArch64FrameOffsetCanUpdate) {
6433 if (Status & AArch64FrameOffsetIsLegal)
6434 // Replace the FrameIndex with FrameReg.
6435 MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false);
6436 if (UseUnscaledOp)
6437 MI.setDesc(TII->get(Opcode: UnscaledOp));
6438
6439 MI.getOperand(i: ImmIdx).ChangeToImmediate(ImmVal: NewOffset);
6440 return !Offset;
6441 }
6442
6443 return false;
6444}
6445
6446void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB,
6447 MachineBasicBlock::iterator MI) const {
6448 DebugLoc DL;
6449 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: AArch64::HINT)).addImm(Val: 0);
6450}
6451
6452MCInst AArch64InstrInfo::getNop() const {
6453 return MCInstBuilder(AArch64::HINT).addImm(Val: 0);
6454}
6455
6456// AArch64 supports MachineCombiner.
6457bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6458
6459// True when Opc sets flag
6460static bool isCombineInstrSettingFlag(unsigned Opc) {
6461 switch (Opc) {
6462 case AArch64::ADDSWrr:
6463 case AArch64::ADDSWri:
6464 case AArch64::ADDSXrr:
6465 case AArch64::ADDSXri:
6466 case AArch64::SUBSWrr:
6467 case AArch64::SUBSXrr:
6468 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6469 case AArch64::SUBSWri:
6470 case AArch64::SUBSXri:
6471 return true;
6472 default:
6473 break;
6474 }
6475 return false;
6476}
6477
6478// 32b Opcodes that can be combined with a MUL
6479static bool isCombineInstrCandidate32(unsigned Opc) {
6480 switch (Opc) {
6481 case AArch64::ADDWrr:
6482 case AArch64::ADDWri:
6483 case AArch64::SUBWrr:
6484 case AArch64::ADDSWrr:
6485 case AArch64::ADDSWri:
6486 case AArch64::SUBSWrr:
6487 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6488 case AArch64::SUBWri:
6489 case AArch64::SUBSWri:
6490 return true;
6491 default:
6492 break;
6493 }
6494 return false;
6495}
6496
6497// 64b Opcodes that can be combined with a MUL
6498static bool isCombineInstrCandidate64(unsigned Opc) {
6499 switch (Opc) {
6500 case AArch64::ADDXrr:
6501 case AArch64::ADDXri:
6502 case AArch64::SUBXrr:
6503 case AArch64::ADDSXrr:
6504 case AArch64::ADDSXri:
6505 case AArch64::SUBSXrr:
6506 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6507 case AArch64::SUBXri:
6508 case AArch64::SUBSXri:
6509 case AArch64::ADDv8i8:
6510 case AArch64::ADDv16i8:
6511 case AArch64::ADDv4i16:
6512 case AArch64::ADDv8i16:
6513 case AArch64::ADDv2i32:
6514 case AArch64::ADDv4i32:
6515 case AArch64::SUBv8i8:
6516 case AArch64::SUBv16i8:
6517 case AArch64::SUBv4i16:
6518 case AArch64::SUBv8i16:
6519 case AArch64::SUBv2i32:
6520 case AArch64::SUBv4i32:
6521 return true;
6522 default:
6523 break;
6524 }
6525 return false;
6526}
6527
6528// FP Opcodes that can be combined with a FMUL.
6529static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6530 switch (Inst.getOpcode()) {
6531 default:
6532 break;
6533 case AArch64::FADDHrr:
6534 case AArch64::FADDSrr:
6535 case AArch64::FADDDrr:
6536 case AArch64::FADDv4f16:
6537 case AArch64::FADDv8f16:
6538 case AArch64::FADDv2f32:
6539 case AArch64::FADDv2f64:
6540 case AArch64::FADDv4f32:
6541 case AArch64::FSUBHrr:
6542 case AArch64::FSUBSrr:
6543 case AArch64::FSUBDrr:
6544 case AArch64::FSUBv4f16:
6545 case AArch64::FSUBv8f16:
6546 case AArch64::FSUBv2f32:
6547 case AArch64::FSUBv2f64:
6548 case AArch64::FSUBv4f32:
6549 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
6550 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6551 // the target options or if FADD/FSUB has the contract fast-math flag.
6552 return Options.UnsafeFPMath ||
6553 Options.AllowFPOpFusion == FPOpFusion::Fast ||
6554 Inst.getFlag(Flag: MachineInstr::FmContract);
6555 return true;
6556 }
6557 return false;
6558}
6559
6560// Opcodes that can be combined with a MUL
6561static bool isCombineInstrCandidate(unsigned Opc) {
6562 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
6563}
6564
6565//
6566// Utility routine that checks if \param MO is defined by an
6567// \param CombineOpc instruction in the basic block \param MBB
6568static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
6569 unsigned CombineOpc, unsigned ZeroReg = 0,
6570 bool CheckZeroReg = false) {
6571 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6572 MachineInstr *MI = nullptr;
6573
6574 if (MO.isReg() && MO.getReg().isVirtual())
6575 MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
6576 // And it needs to be in the trace (otherwise, it won't have a depth).
6577 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
6578 return false;
6579 // Must only used by the user we combine with.
6580 if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
6581 return false;
6582
6583 if (CheckZeroReg) {
6584 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6585 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6586 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6587 // The third input reg must be zero.
6588 if (MI->getOperand(i: 3).getReg() != ZeroReg)
6589 return false;
6590 }
6591
6592 if (isCombineInstrSettingFlag(Opc: CombineOpc) &&
6593 MI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr, isDead: true) == -1)
6594 return false;
6595
6596 return true;
6597}
6598
6599//
6600// Is \param MO defined by an integer multiply and can be combined?
6601static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
6602 unsigned MulOpc, unsigned ZeroReg) {
6603 return canCombine(MBB, MO, CombineOpc: MulOpc, ZeroReg, CheckZeroReg: true);
6604}
6605
6606//
6607// Is \param MO defined by a floating-point multiply and can be combined?
6608static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
6609 unsigned MulOpc) {
6610 return canCombine(MBB, MO, CombineOpc: MulOpc);
6611}
6612
6613// TODO: There are many more machine instruction opcodes to match:
6614// 1. Other data types (integer, vectors)
6615// 2. Other math / logic operations (xor, or)
6616// 3. Other forms of the same operation (intrinsics and other variants)
6617bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6618 bool Invert) const {
6619 if (Invert)
6620 return false;
6621 switch (Inst.getOpcode()) {
6622 // == Floating-point types ==
6623 // -- Floating-point instructions --
6624 case AArch64::FADDHrr:
6625 case AArch64::FADDSrr:
6626 case AArch64::FADDDrr:
6627 case AArch64::FMULHrr:
6628 case AArch64::FMULSrr:
6629 case AArch64::FMULDrr:
6630 case AArch64::FMULX16:
6631 case AArch64::FMULX32:
6632 case AArch64::FMULX64:
6633 // -- Advanced SIMD instructions --
6634 case AArch64::FADDv4f16:
6635 case AArch64::FADDv8f16:
6636 case AArch64::FADDv2f32:
6637 case AArch64::FADDv4f32:
6638 case AArch64::FADDv2f64:
6639 case AArch64::FMULv4f16:
6640 case AArch64::FMULv8f16:
6641 case AArch64::FMULv2f32:
6642 case AArch64::FMULv4f32:
6643 case AArch64::FMULv2f64:
6644 case AArch64::FMULXv4f16:
6645 case AArch64::FMULXv8f16:
6646 case AArch64::FMULXv2f32:
6647 case AArch64::FMULXv4f32:
6648 case AArch64::FMULXv2f64:
6649 // -- SVE instructions --
6650 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6651 // in the SVE instruction set (though there are predicated ones).
6652 case AArch64::FADD_ZZZ_H:
6653 case AArch64::FADD_ZZZ_S:
6654 case AArch64::FADD_ZZZ_D:
6655 case AArch64::FMUL_ZZZ_H:
6656 case AArch64::FMUL_ZZZ_S:
6657 case AArch64::FMUL_ZZZ_D:
6658 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6659 (Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
6660 Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz));
6661
6662 // == Integer types ==
6663 // -- Base instructions --
6664 // Opcodes MULWrr and MULXrr don't exist because
6665 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6666 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6667 // The machine-combiner does not support three-source-operands machine
6668 // instruction. So we cannot reassociate MULs.
6669 case AArch64::ADDWrr:
6670 case AArch64::ADDXrr:
6671 case AArch64::ANDWrr:
6672 case AArch64::ANDXrr:
6673 case AArch64::ORRWrr:
6674 case AArch64::ORRXrr:
6675 case AArch64::EORWrr:
6676 case AArch64::EORXrr:
6677 case AArch64::EONWrr:
6678 case AArch64::EONXrr:
6679 // -- Advanced SIMD instructions --
6680 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6681 // in the Advanced SIMD instruction set.
6682 case AArch64::ADDv8i8:
6683 case AArch64::ADDv16i8:
6684 case AArch64::ADDv4i16:
6685 case AArch64::ADDv8i16:
6686 case AArch64::ADDv2i32:
6687 case AArch64::ADDv4i32:
6688 case AArch64::ADDv1i64:
6689 case AArch64::ADDv2i64:
6690 case AArch64::MULv8i8:
6691 case AArch64::MULv16i8:
6692 case AArch64::MULv4i16:
6693 case AArch64::MULv8i16:
6694 case AArch64::MULv2i32:
6695 case AArch64::MULv4i32:
6696 case AArch64::ANDv8i8:
6697 case AArch64::ANDv16i8:
6698 case AArch64::ORRv8i8:
6699 case AArch64::ORRv16i8:
6700 case AArch64::EORv8i8:
6701 case AArch64::EORv16i8:
6702 // -- SVE instructions --
6703 case AArch64::ADD_ZZZ_B:
6704 case AArch64::ADD_ZZZ_H:
6705 case AArch64::ADD_ZZZ_S:
6706 case AArch64::ADD_ZZZ_D:
6707 case AArch64::MUL_ZZZ_B:
6708 case AArch64::MUL_ZZZ_H:
6709 case AArch64::MUL_ZZZ_S:
6710 case AArch64::MUL_ZZZ_D:
6711 case AArch64::AND_ZZZ:
6712 case AArch64::ORR_ZZZ:
6713 case AArch64::EOR_ZZZ:
6714 return true;
6715
6716 default:
6717 return false;
6718 }
6719}
6720
6721/// Find instructions that can be turned into madd.
6722static bool getMaddPatterns(MachineInstr &Root,
6723 SmallVectorImpl<unsigned> &Patterns) {
6724 unsigned Opc = Root.getOpcode();
6725 MachineBasicBlock &MBB = *Root.getParent();
6726 bool Found = false;
6727
6728 if (!isCombineInstrCandidate(Opc))
6729 return false;
6730 if (isCombineInstrSettingFlag(Opc)) {
6731 int Cmp_NZCV =
6732 Root.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr, isDead: true);
6733 // When NZCV is live bail out.
6734 if (Cmp_NZCV == -1)
6735 return false;
6736 unsigned NewOpc = convertToNonFlagSettingOpc(MI: Root);
6737 // When opcode can't change bail out.
6738 // CHECKME: do we miss any cases for opcode conversion?
6739 if (NewOpc == Opc)
6740 return false;
6741 Opc = NewOpc;
6742 }
6743
6744 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6745 unsigned Pattern) {
6746 if (canCombineWithMUL(MBB, MO&: Root.getOperand(i: Operand), MulOpc: Opcode, ZeroReg)) {
6747 Patterns.push_back(Elt: Pattern);
6748 Found = true;
6749 }
6750 };
6751
6752 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6753 if (canCombine(MBB, MO&: Root.getOperand(i: Operand), CombineOpc: Opcode)) {
6754 Patterns.push_back(Elt: Pattern);
6755 Found = true;
6756 }
6757 };
6758
6759 typedef AArch64MachineCombinerPattern MCP;
6760
6761 switch (Opc) {
6762 default:
6763 break;
6764 case AArch64::ADDWrr:
6765 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6766 "ADDWrr does not have register operands");
6767 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6768 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6769 break;
6770 case AArch64::ADDXrr:
6771 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6772 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6773 break;
6774 case AArch64::SUBWrr:
6775 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6776 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6777 break;
6778 case AArch64::SUBXrr:
6779 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6780 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6781 break;
6782 case AArch64::ADDWri:
6783 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6784 break;
6785 case AArch64::ADDXri:
6786 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6787 break;
6788 case AArch64::SUBWri:
6789 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6790 break;
6791 case AArch64::SUBXri:
6792 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6793 break;
6794 case AArch64::ADDv8i8:
6795 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6796 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6797 break;
6798 case AArch64::ADDv16i8:
6799 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6800 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6801 break;
6802 case AArch64::ADDv4i16:
6803 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6804 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6805 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6806 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6807 break;
6808 case AArch64::ADDv8i16:
6809 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6810 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6811 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6812 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6813 break;
6814 case AArch64::ADDv2i32:
6815 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6816 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6817 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6818 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6819 break;
6820 case AArch64::ADDv4i32:
6821 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6822 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6823 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6824 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6825 break;
6826 case AArch64::SUBv8i8:
6827 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6828 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6829 break;
6830 case AArch64::SUBv16i8:
6831 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6832 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6833 break;
6834 case AArch64::SUBv4i16:
6835 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6836 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6837 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6838 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6839 break;
6840 case AArch64::SUBv8i16:
6841 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6842 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6843 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6844 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6845 break;
6846 case AArch64::SUBv2i32:
6847 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6848 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6849 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6850 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6851 break;
6852 case AArch64::SUBv4i32:
6853 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6854 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6855 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6856 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6857 break;
6858 }
6859 return Found;
6860}
6861
6862bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
6863 switch (Opcode) {
6864 default:
6865 break;
6866 case AArch64::UABALB_ZZZ_D:
6867 case AArch64::UABALB_ZZZ_H:
6868 case AArch64::UABALB_ZZZ_S:
6869 case AArch64::UABALT_ZZZ_D:
6870 case AArch64::UABALT_ZZZ_H:
6871 case AArch64::UABALT_ZZZ_S:
6872 case AArch64::SABALB_ZZZ_D:
6873 case AArch64::SABALB_ZZZ_S:
6874 case AArch64::SABALB_ZZZ_H:
6875 case AArch64::SABALT_ZZZ_D:
6876 case AArch64::SABALT_ZZZ_S:
6877 case AArch64::SABALT_ZZZ_H:
6878 case AArch64::UABALv16i8_v8i16:
6879 case AArch64::UABALv2i32_v2i64:
6880 case AArch64::UABALv4i16_v4i32:
6881 case AArch64::UABALv4i32_v2i64:
6882 case AArch64::UABALv8i16_v4i32:
6883 case AArch64::UABALv8i8_v8i16:
6884 case AArch64::UABAv16i8:
6885 case AArch64::UABAv2i32:
6886 case AArch64::UABAv4i16:
6887 case AArch64::UABAv4i32:
6888 case AArch64::UABAv8i16:
6889 case AArch64::UABAv8i8:
6890 case AArch64::SABALv16i8_v8i16:
6891 case AArch64::SABALv2i32_v2i64:
6892 case AArch64::SABALv4i16_v4i32:
6893 case AArch64::SABALv4i32_v2i64:
6894 case AArch64::SABALv8i16_v4i32:
6895 case AArch64::SABALv8i8_v8i16:
6896 case AArch64::SABAv16i8:
6897 case AArch64::SABAv2i32:
6898 case AArch64::SABAv4i16:
6899 case AArch64::SABAv4i32:
6900 case AArch64::SABAv8i16:
6901 case AArch64::SABAv8i8:
6902 return true;
6903 }
6904
6905 return false;
6906}
6907
6908unsigned AArch64InstrInfo::getAccumulationStartOpcode(
6909 unsigned AccumulationOpcode) const {
6910 switch (AccumulationOpcode) {
6911 default:
6912 llvm_unreachable("Unsupported accumulation Opcode!");
6913 case AArch64::UABALB_ZZZ_D:
6914 return AArch64::UABDLB_ZZZ_D;
6915 case AArch64::UABALB_ZZZ_H:
6916 return AArch64::UABDLB_ZZZ_H;
6917 case AArch64::UABALB_ZZZ_S:
6918 return AArch64::UABDLB_ZZZ_S;
6919 case AArch64::UABALT_ZZZ_D:
6920 return AArch64::UABDLT_ZZZ_D;
6921 case AArch64::UABALT_ZZZ_H:
6922 return AArch64::UABDLT_ZZZ_H;
6923 case AArch64::UABALT_ZZZ_S:
6924 return AArch64::UABDLT_ZZZ_S;
6925 case AArch64::UABALv16i8_v8i16:
6926 return AArch64::UABDLv16i8_v8i16;
6927 case AArch64::UABALv2i32_v2i64:
6928 return AArch64::UABDLv2i32_v2i64;
6929 case AArch64::UABALv4i16_v4i32:
6930 return AArch64::UABDLv4i16_v4i32;
6931 case AArch64::UABALv4i32_v2i64:
6932 return AArch64::UABDLv4i32_v2i64;
6933 case AArch64::UABALv8i16_v4i32:
6934 return AArch64::UABDLv8i16_v4i32;
6935 case AArch64::UABALv8i8_v8i16:
6936 return AArch64::UABDLv8i8_v8i16;
6937 case AArch64::UABAv16i8:
6938 return AArch64::UABDv16i8;
6939 case AArch64::UABAv2i32:
6940 return AArch64::UABDv2i32;
6941 case AArch64::UABAv4i16:
6942 return AArch64::UABDv4i16;
6943 case AArch64::UABAv4i32:
6944 return AArch64::UABDv4i32;
6945 case AArch64::UABAv8i16:
6946 return AArch64::UABDv8i16;
6947 case AArch64::UABAv8i8:
6948 return AArch64::UABDv8i8;
6949 case AArch64::SABALB_ZZZ_D:
6950 return AArch64::SABDLB_ZZZ_D;
6951 case AArch64::SABALB_ZZZ_S:
6952 return AArch64::SABDLB_ZZZ_S;
6953 case AArch64::SABALB_ZZZ_H:
6954 return AArch64::SABDLB_ZZZ_H;
6955 case AArch64::SABALT_ZZZ_D:
6956 return AArch64::SABDLT_ZZZ_D;
6957 case AArch64::SABALT_ZZZ_S:
6958 return AArch64::SABDLT_ZZZ_S;
6959 case AArch64::SABALT_ZZZ_H:
6960 return AArch64::SABDLT_ZZZ_H;
6961 case AArch64::SABALv16i8_v8i16:
6962 return AArch64::SABDLv16i8_v8i16;
6963 case AArch64::SABALv2i32_v2i64:
6964 return AArch64::SABDLv2i32_v2i64;
6965 case AArch64::SABALv4i16_v4i32:
6966 return AArch64::SABDLv4i16_v4i32;
6967 case AArch64::SABALv4i32_v2i64:
6968 return AArch64::SABDLv4i32_v2i64;
6969 case AArch64::SABALv8i16_v4i32:
6970 return AArch64::SABDLv8i16_v4i32;
6971 case AArch64::SABALv8i8_v8i16:
6972 return AArch64::SABDLv8i8_v8i16;
6973 case AArch64::SABAv16i8:
6974 return AArch64::SABDv16i8;
6975 case AArch64::SABAv2i32:
6976 return AArch64::SABAv2i32;
6977 case AArch64::SABAv4i16:
6978 return AArch64::SABDv4i16;
6979 case AArch64::SABAv4i32:
6980 return AArch64::SABDv4i32;
6981 case AArch64::SABAv8i16:
6982 return AArch64::SABDv8i16;
6983 case AArch64::SABAv8i8:
6984 return AArch64::SABDv8i8;
6985 }
6986}
6987
6988/// Floating-Point Support
6989
6990/// Find instructions that can be turned into madd.
6991static bool getFMAPatterns(MachineInstr &Root,
6992 SmallVectorImpl<unsigned> &Patterns) {
6993
6994 if (!isCombineInstrCandidateFP(Inst: Root))
6995 return false;
6996
6997 MachineBasicBlock &MBB = *Root.getParent();
6998 bool Found = false;
6999
7000 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7001 if (canCombineWithFMUL(MBB, MO&: Root.getOperand(i: Operand), MulOpc: Opcode)) {
7002 Patterns.push_back(Elt: Pattern);
7003 return true;
7004 }
7005 return false;
7006 };
7007
7008 typedef AArch64MachineCombinerPattern MCP;
7009
7010 switch (Root.getOpcode()) {
7011 default:
7012 assert(false && "Unsupported FP instruction in combiner\n");
7013 break;
7014 case AArch64::FADDHrr:
7015 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7016 "FADDHrr does not have register operands");
7017
7018 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7019 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7020 break;
7021 case AArch64::FADDSrr:
7022 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7023 "FADDSrr does not have register operands");
7024
7025 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7026 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7027
7028 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7029 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7030 break;
7031 case AArch64::FADDDrr:
7032 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7033 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7034
7035 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7036 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7037 break;
7038 case AArch64::FADDv4f16:
7039 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7040 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7041
7042 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7043 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7044 break;
7045 case AArch64::FADDv8f16:
7046 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7047 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7048
7049 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7050 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7051 break;
7052 case AArch64::FADDv2f32:
7053 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7054 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7055
7056 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7057 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7058 break;
7059 case AArch64::FADDv2f64:
7060 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7061 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7062
7063 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7064 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7065 break;
7066 case AArch64::FADDv4f32:
7067 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7068 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7069
7070 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7071 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7072 break;
7073 case AArch64::FSUBHrr:
7074 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7075 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7076 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7077 break;
7078 case AArch64::FSUBSrr:
7079 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7080
7081 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7082 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7083
7084 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7085 break;
7086 case AArch64::FSUBDrr:
7087 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7088
7089 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7090 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7091
7092 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7093 break;
7094 case AArch64::FSUBv4f16:
7095 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7096 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7097
7098 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7099 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7100 break;
7101 case AArch64::FSUBv8f16:
7102 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7103 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7104
7105 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7106 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7107 break;
7108 case AArch64::FSUBv2f32:
7109 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7110 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7111
7112 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7113 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7114 break;
7115 case AArch64::FSUBv2f64:
7116 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7117 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7118
7119 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7120 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7121 break;
7122 case AArch64::FSUBv4f32:
7123 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7124 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7125
7126 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7127 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7128 break;
7129 }
7130 return Found;
7131}
7132
7133static bool getFMULPatterns(MachineInstr &Root,
7134 SmallVectorImpl<unsigned> &Patterns) {
7135 MachineBasicBlock &MBB = *Root.getParent();
7136 bool Found = false;
7137
7138 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7139 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7140 MachineOperand &MO = Root.getOperand(i: Operand);
7141 MachineInstr *MI = nullptr;
7142 if (MO.isReg() && MO.getReg().isVirtual())
7143 MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
7144 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7145 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7146 MI->getOperand(i: 1).getReg().isVirtual())
7147 MI = MRI.getUniqueVRegDef(Reg: MI->getOperand(i: 1).getReg());
7148 if (MI && MI->getOpcode() == Opcode) {
7149 Patterns.push_back(Elt: Pattern);
7150 return true;
7151 }
7152 return false;
7153 };
7154
7155 typedef AArch64MachineCombinerPattern MCP;
7156
7157 switch (Root.getOpcode()) {
7158 default:
7159 return false;
7160 case AArch64::FMULv2f32:
7161 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7162 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7163 break;
7164 case AArch64::FMULv2f64:
7165 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7166 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7167 break;
7168 case AArch64::FMULv4f16:
7169 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7170 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7171 break;
7172 case AArch64::FMULv4f32:
7173 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7174 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7175 break;
7176 case AArch64::FMULv8f16:
7177 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7178 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7179 break;
7180 }
7181
7182 return Found;
7183}
7184
7185static bool getFNEGPatterns(MachineInstr &Root,
7186 SmallVectorImpl<unsigned> &Patterns) {
7187 unsigned Opc = Root.getOpcode();
7188 MachineBasicBlock &MBB = *Root.getParent();
7189 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7190
7191 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7192 MachineOperand &MO = Root.getOperand(i: 1);
7193 MachineInstr *MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
7194 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7195 MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()) &&
7196 Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) &&
7197 Root.getFlag(Flag: MachineInstr::MIFlag::FmNsz) &&
7198 MI->getFlag(Flag: MachineInstr::MIFlag::FmContract) &&
7199 MI->getFlag(Flag: MachineInstr::MIFlag::FmNsz)) {
7200 Patterns.push_back(Elt: Pattern);
7201 return true;
7202 }
7203 return false;
7204 };
7205
7206 switch (Opc) {
7207 default:
7208 break;
7209 case AArch64::FNEGDr:
7210 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7211 case AArch64::FNEGSr:
7212 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7213 }
7214
7215 return false;
7216}
7217
7218/// Return true when a code sequence can improve throughput. It
7219/// should be called only for instructions in loops.
7220/// \param Pattern - combiner pattern
7221bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
7222 switch (Pattern) {
7223 default:
7224 break;
7225 case AArch64MachineCombinerPattern::FMULADDH_OP1:
7226 case AArch64MachineCombinerPattern::FMULADDH_OP2:
7227 case AArch64MachineCombinerPattern::FMULSUBH_OP1:
7228 case AArch64MachineCombinerPattern::FMULSUBH_OP2:
7229 case AArch64MachineCombinerPattern::FMULADDS_OP1:
7230 case AArch64MachineCombinerPattern::FMULADDS_OP2:
7231 case AArch64MachineCombinerPattern::FMULSUBS_OP1:
7232 case AArch64MachineCombinerPattern::FMULSUBS_OP2:
7233 case AArch64MachineCombinerPattern::FMULADDD_OP1:
7234 case AArch64MachineCombinerPattern::FMULADDD_OP2:
7235 case AArch64MachineCombinerPattern::FMULSUBD_OP1:
7236 case AArch64MachineCombinerPattern::FMULSUBD_OP2:
7237 case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
7238 case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
7239 case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
7240 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
7241 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
7242 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
7243 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
7244 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
7245 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
7246 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
7247 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
7248 case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
7249 case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
7250 case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
7251 case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
7252 case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
7253 case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
7254 case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
7255 case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
7256 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
7257 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
7258 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
7259 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
7260 case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
7261 case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
7262 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
7263 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
7264 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1:
7265 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
7266 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1:
7267 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
7268 case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
7269 case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
7270 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
7271 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
7272 case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
7273 case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
7274 case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
7275 case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
7276 case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
7277 case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
7278 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
7279 case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
7280 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
7281 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2:
7282 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
7283 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2:
7284 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
7285 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2:
7286 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
7287 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2:
7288 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
7289 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2:
7290 case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
7291 case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
7292 case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
7293 case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
7294 case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
7295 case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
7296 case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
7297 case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
7298 case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
7299 case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
7300 case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
7301 case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
7302 case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
7303 case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
7304 case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
7305 case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
7306 case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
7307 case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
7308 case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
7309 case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
7310 case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
7311 case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
7312 case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
7313 case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
7314 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
7315 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
7316 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
7317 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
7318 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
7319 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
7320 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
7321 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
7322 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
7323 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
7324 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
7325 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
7326 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
7327 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
7328 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
7329 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
7330 return true;
7331 } // end switch (Pattern)
7332 return false;
7333}
7334
7335/// Find other MI combine patterns.
7336static bool getMiscPatterns(MachineInstr &Root,
7337 SmallVectorImpl<unsigned> &Patterns) {
7338 // A - (B + C) ==> (A - B) - C or (A - C) - B
7339 unsigned Opc = Root.getOpcode();
7340 MachineBasicBlock &MBB = *Root.getParent();
7341
7342 switch (Opc) {
7343 case AArch64::SUBWrr:
7344 case AArch64::SUBSWrr:
7345 case AArch64::SUBXrr:
7346 case AArch64::SUBSXrr:
7347 // Found candidate root.
7348 break;
7349 default:
7350 return false;
7351 }
7352
7353 if (isCombineInstrSettingFlag(Opc) &&
7354 Root.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr, isDead: true) ==
7355 -1)
7356 return false;
7357
7358 if (canCombine(MBB, MO&: Root.getOperand(i: 2), CombineOpc: AArch64::ADDWrr) ||
7359 canCombine(MBB, MO&: Root.getOperand(i: 2), CombineOpc: AArch64::ADDSWrr) ||
7360 canCombine(MBB, MO&: Root.getOperand(i: 2), CombineOpc: AArch64::ADDXrr) ||
7361 canCombine(MBB, MO&: Root.getOperand(i: 2), CombineOpc: AArch64::ADDSXrr)) {
7362 Patterns.push_back(Elt: AArch64MachineCombinerPattern::SUBADD_OP1);
7363 Patterns.push_back(Elt: AArch64MachineCombinerPattern::SUBADD_OP2);
7364 return true;
7365 }
7366
7367 return false;
7368}
7369
7370CombinerObjective
7371AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
7372 switch (Pattern) {
7373 case AArch64MachineCombinerPattern::SUBADD_OP1:
7374 case AArch64MachineCombinerPattern::SUBADD_OP2:
7375 return CombinerObjective::MustReduceDepth;
7376 default:
7377 return TargetInstrInfo::getCombinerObjective(Pattern);
7378 }
7379}
7380
7381/// Return true when there is potentially a faster code sequence for an
7382/// instruction chain ending in \p Root. All potential patterns are listed in
7383/// the \p Pattern vector. Pattern should be sorted in priority order since the
7384/// pattern evaluator stops checking as soon as it finds a faster sequence.
7385
7386bool AArch64InstrInfo::getMachineCombinerPatterns(
7387 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7388 bool DoRegPressureReduce) const {
7389 // Integer patterns
7390 if (getMaddPatterns(Root, Patterns))
7391 return true;
7392 // Floating point patterns
7393 if (getFMULPatterns(Root, Patterns))
7394 return true;
7395 if (getFMAPatterns(Root, Patterns))
7396 return true;
7397 if (getFNEGPatterns(Root, Patterns))
7398 return true;
7399
7400 // Other patterns
7401 if (getMiscPatterns(Root, Patterns))
7402 return true;
7403
7404 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7405 DoRegPressureReduce);
7406}
7407
7408enum class FMAInstKind { Default, Indexed, Accumulator };
7409/// genFusedMultiply - Generate fused multiply instructions.
7410/// This function supports both integer and floating point instructions.
7411/// A typical example:
7412/// F|MUL I=A,B,0
7413/// F|ADD R,I,C
7414/// ==> F|MADD R,A,B,C
7415/// \param MF Containing MachineFunction
7416/// \param MRI Register information
7417/// \param TII Target information
7418/// \param Root is the F|ADD instruction
7419/// \param [out] InsInstrs is a vector of machine instructions and will
7420/// contain the generated madd instruction
7421/// \param IdxMulOpd is index of operand in Root that is the result of
7422/// the F|MUL. In the example above IdxMulOpd is 1.
7423/// \param MaddOpc the opcode fo the f|madd instruction
7424/// \param RC Register class of operands
7425/// \param kind of fma instruction (addressing mode) to be generated
7426/// \param ReplacedAddend is the result register from the instruction
7427/// replacing the non-combined operand, if any.
7428static MachineInstr *
7429genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
7430 const TargetInstrInfo *TII, MachineInstr &Root,
7431 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7432 unsigned MaddOpc, const TargetRegisterClass *RC,
7433 FMAInstKind kind = FMAInstKind::Default,
7434 const Register *ReplacedAddend = nullptr) {
7435 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7436
7437 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7438 MachineInstr *MUL = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: IdxMulOpd).getReg());
7439 Register ResultReg = Root.getOperand(i: 0).getReg();
7440 Register SrcReg0 = MUL->getOperand(i: 1).getReg();
7441 bool Src0IsKill = MUL->getOperand(i: 1).isKill();
7442 Register SrcReg1 = MUL->getOperand(i: 2).getReg();
7443 bool Src1IsKill = MUL->getOperand(i: 2).isKill();
7444
7445 Register SrcReg2;
7446 bool Src2IsKill;
7447 if (ReplacedAddend) {
7448 // If we just generated a new addend, we must be it's only use.
7449 SrcReg2 = *ReplacedAddend;
7450 Src2IsKill = true;
7451 } else {
7452 SrcReg2 = Root.getOperand(i: IdxOtherOpd).getReg();
7453 Src2IsKill = Root.getOperand(i: IdxOtherOpd).isKill();
7454 }
7455
7456 if (ResultReg.isVirtual())
7457 MRI.constrainRegClass(Reg: ResultReg, RC);
7458 if (SrcReg0.isVirtual())
7459 MRI.constrainRegClass(Reg: SrcReg0, RC);
7460 if (SrcReg1.isVirtual())
7461 MRI.constrainRegClass(Reg: SrcReg1, RC);
7462 if (SrcReg2.isVirtual())
7463 MRI.constrainRegClass(Reg: SrcReg2, RC);
7464
7465 MachineInstrBuilder MIB;
7466 if (kind == FMAInstKind::Default)
7467 MIB = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7468 .addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7469 .addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7470 .addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill));
7471 else if (kind == FMAInstKind::Indexed)
7472 MIB = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7473 .addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill))
7474 .addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7475 .addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7476 .addImm(Val: MUL->getOperand(i: 3).getImm());
7477 else if (kind == FMAInstKind::Accumulator)
7478 MIB = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7479 .addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill))
7480 .addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7481 .addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill));
7482 else
7483 assert(false && "Invalid FMA instruction kind \n");
7484 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7485 InsInstrs.push_back(Elt: MIB);
7486 return MUL;
7487}
7488
7489static MachineInstr *
7490genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI,
7491 const TargetInstrInfo *TII, MachineInstr &Root,
7492 SmallVectorImpl<MachineInstr *> &InsInstrs) {
7493 MachineInstr *MAD = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: 1).getReg());
7494
7495 unsigned Opc = 0;
7496 const TargetRegisterClass *RC = MRI.getRegClass(Reg: MAD->getOperand(i: 0).getReg());
7497 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7498 Opc = AArch64::FNMADDSrrr;
7499 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7500 Opc = AArch64::FNMADDDrrr;
7501 else
7502 return nullptr;
7503
7504 Register ResultReg = Root.getOperand(i: 0).getReg();
7505 Register SrcReg0 = MAD->getOperand(i: 1).getReg();
7506 Register SrcReg1 = MAD->getOperand(i: 2).getReg();
7507 Register SrcReg2 = MAD->getOperand(i: 3).getReg();
7508 bool Src0IsKill = MAD->getOperand(i: 1).isKill();
7509 bool Src1IsKill = MAD->getOperand(i: 2).isKill();
7510 bool Src2IsKill = MAD->getOperand(i: 3).isKill();
7511 if (ResultReg.isVirtual())
7512 MRI.constrainRegClass(Reg: ResultReg, RC);
7513 if (SrcReg0.isVirtual())
7514 MRI.constrainRegClass(Reg: SrcReg0, RC);
7515 if (SrcReg1.isVirtual())
7516 MRI.constrainRegClass(Reg: SrcReg1, RC);
7517 if (SrcReg2.isVirtual())
7518 MRI.constrainRegClass(Reg: SrcReg2, RC);
7519
7520 MachineInstrBuilder MIB =
7521 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: Opc), DestReg: ResultReg)
7522 .addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7523 .addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7524 .addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill));
7525 InsInstrs.push_back(Elt: MIB);
7526
7527 return MAD;
7528}
7529
7530/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7531static MachineInstr *
7532genIndexedMultiply(MachineInstr &Root,
7533 SmallVectorImpl<MachineInstr *> &InsInstrs,
7534 unsigned IdxDupOp, unsigned MulOpc,
7535 const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
7536 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
7537 "Invalid index of FMUL operand");
7538
7539 MachineFunction &MF = *Root.getMF();
7540 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
7541
7542 MachineInstr *Dup =
7543 MF.getRegInfo().getUniqueVRegDef(Reg: Root.getOperand(i: IdxDupOp).getReg());
7544
7545 if (Dup->getOpcode() == TargetOpcode::COPY)
7546 Dup = MRI.getUniqueVRegDef(Reg: Dup->getOperand(i: 1).getReg());
7547
7548 Register DupSrcReg = Dup->getOperand(i: 1).getReg();
7549 MRI.clearKillFlags(Reg: DupSrcReg);
7550 MRI.constrainRegClass(Reg: DupSrcReg, RC);
7551
7552 unsigned DupSrcLane = Dup->getOperand(i: 2).getImm();
7553
7554 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
7555 MachineOperand &MulOp = Root.getOperand(i: IdxMulOp);
7556
7557 Register ResultReg = Root.getOperand(i: 0).getReg();
7558
7559 MachineInstrBuilder MIB;
7560 MIB = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MulOpc), DestReg: ResultReg)
7561 .add(MO: MulOp)
7562 .addReg(RegNo: DupSrcReg)
7563 .addImm(Val: DupSrcLane);
7564
7565 InsInstrs.push_back(Elt: MIB);
7566 return &Root;
7567}
7568
7569/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
7570/// instructions.
7571///
7572/// \see genFusedMultiply
7573static MachineInstr *genFusedMultiplyAcc(
7574 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7575 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7576 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7577 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7578 kind: FMAInstKind::Accumulator);
7579}
7580
7581/// genNeg - Helper to generate an intermediate negation of the second operand
7582/// of Root
7583static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
7584 const TargetInstrInfo *TII, MachineInstr &Root,
7585 SmallVectorImpl<MachineInstr *> &InsInstrs,
7586 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7587 unsigned MnegOpc, const TargetRegisterClass *RC) {
7588 Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7589 MachineInstrBuilder MIB =
7590 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MnegOpc), DestReg: NewVR)
7591 .add(MO: Root.getOperand(i: 2));
7592 InsInstrs.push_back(Elt: MIB);
7593
7594 assert(InstrIdxForVirtReg.empty());
7595 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
7596
7597 return NewVR;
7598}
7599
7600/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7601/// instructions with an additional negation of the accumulator
7602static MachineInstr *genFusedMultiplyAccNeg(
7603 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7604 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7605 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7606 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7607 assert(IdxMulOpd == 1);
7608
7609 Register NewVR =
7610 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7611 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7612 kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7613}
7614
7615/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
7616/// instructions.
7617///
7618/// \see genFusedMultiply
7619static MachineInstr *genFusedMultiplyIdx(
7620 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7621 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7622 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7623 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7624 kind: FMAInstKind::Indexed);
7625}
7626
7627/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7628/// instructions with an additional negation of the accumulator
7629static MachineInstr *genFusedMultiplyIdxNeg(
7630 MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7631 MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7632 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7633 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7634 assert(IdxMulOpd == 1);
7635
7636 Register NewVR =
7637 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7638
7639 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7640 kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7641}
7642
7643/// genMaddR - Generate madd instruction and combine mul and add using
7644/// an extra virtual register
7645/// Example - an ADD intermediate needs to be stored in a register:
7646/// MUL I=A,B,0
7647/// ADD R,I,Imm
7648/// ==> ORR V, ZR, Imm
7649/// ==> MADD R,A,B,V
7650/// \param MF Containing MachineFunction
7651/// \param MRI Register information
7652/// \param TII Target information
7653/// \param Root is the ADD instruction
7654/// \param [out] InsInstrs is a vector of machine instructions and will
7655/// contain the generated madd instruction
7656/// \param IdxMulOpd is index of operand in Root that is the result of
7657/// the MUL. In the example above IdxMulOpd is 1.
7658/// \param MaddOpc the opcode fo the madd instruction
7659/// \param VR is a virtual register that holds the value of an ADD operand
7660/// (V in the example above).
7661/// \param RC Register class of operands
7662static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
7663 const TargetInstrInfo *TII, MachineInstr &Root,
7664 SmallVectorImpl<MachineInstr *> &InsInstrs,
7665 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
7666 const TargetRegisterClass *RC) {
7667 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7668
7669 MachineInstr *MUL = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: IdxMulOpd).getReg());
7670 Register ResultReg = Root.getOperand(i: 0).getReg();
7671 Register SrcReg0 = MUL->getOperand(i: 1).getReg();
7672 bool Src0IsKill = MUL->getOperand(i: 1).isKill();
7673 Register SrcReg1 = MUL->getOperand(i: 2).getReg();
7674 bool Src1IsKill = MUL->getOperand(i: 2).isKill();
7675
7676 if (ResultReg.isVirtual())
7677 MRI.constrainRegClass(Reg: ResultReg, RC);
7678 if (SrcReg0.isVirtual())
7679 MRI.constrainRegClass(Reg: SrcReg0, RC);
7680 if (SrcReg1.isVirtual())
7681 MRI.constrainRegClass(Reg: SrcReg1, RC);
7682 if (Register::isVirtualRegister(Reg: VR))
7683 MRI.constrainRegClass(Reg: VR, RC);
7684
7685 MachineInstrBuilder MIB =
7686 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7687 .addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7688 .addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7689 .addReg(RegNo: VR);
7690 // Insert the MADD
7691 InsInstrs.push_back(Elt: MIB);
7692 return MUL;
7693}
7694
7695/// Do the following transformation
7696/// A - (B + C) ==> (A - B) - C
7697/// A - (B + C) ==> (A - C) - B
7698static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
7699 const TargetInstrInfo *TII, MachineInstr &Root,
7700 SmallVectorImpl<MachineInstr *> &InsInstrs,
7701 SmallVectorImpl<MachineInstr *> &DelInstrs,
7702 unsigned IdxOpd1,
7703 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
7704 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
7705 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
7706 MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: 2).getReg());
7707
7708 Register ResultReg = Root.getOperand(i: 0).getReg();
7709 Register RegA = Root.getOperand(i: 1).getReg();
7710 bool RegAIsKill = Root.getOperand(i: 1).isKill();
7711 Register RegB = AddMI->getOperand(i: IdxOpd1).getReg();
7712 bool RegBIsKill = AddMI->getOperand(i: IdxOpd1).isKill();
7713 Register RegC = AddMI->getOperand(i: IdxOtherOpd).getReg();
7714 bool RegCIsKill = AddMI->getOperand(i: IdxOtherOpd).isKill();
7715 Register NewVR =
7716 MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: Root.getOperand(i: 2).getReg()));
7717
7718 unsigned Opcode = Root.getOpcode();
7719 if (Opcode == AArch64::SUBSWrr)
7720 Opcode = AArch64::SUBWrr;
7721 else if (Opcode == AArch64::SUBSXrr)
7722 Opcode = AArch64::SUBXrr;
7723 else
7724 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
7725 "Unexpected instruction opcode.");
7726
7727 uint32_t Flags = Root.mergeFlagsWith(Other: *AddMI);
7728 Flags &= ~MachineInstr::NoSWrap;
7729 Flags &= ~MachineInstr::NoUWrap;
7730
7731 MachineInstrBuilder MIB1 =
7732 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode), DestReg: NewVR)
7733 .addReg(RegNo: RegA, flags: getKillRegState(B: RegAIsKill))
7734 .addReg(RegNo: RegB, flags: getKillRegState(B: RegBIsKill))
7735 .setMIFlags(Flags);
7736 MachineInstrBuilder MIB2 =
7737 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode), DestReg: ResultReg)
7738 .addReg(RegNo: NewVR, flags: getKillRegState(B: true))
7739 .addReg(RegNo: RegC, flags: getKillRegState(B: RegCIsKill))
7740 .setMIFlags(Flags);
7741
7742 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
7743 InsInstrs.push_back(Elt: MIB1);
7744 InsInstrs.push_back(Elt: MIB2);
7745 DelInstrs.push_back(Elt: AddMI);
7746 DelInstrs.push_back(Elt: &Root);
7747}
7748
7749unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
7750 unsigned int AccumulatorOpCode) const {
7751 switch (AccumulatorOpCode) {
7752 case AArch64::UABALB_ZZZ_D:
7753 case AArch64::SABALB_ZZZ_D:
7754 case AArch64::UABALT_ZZZ_D:
7755 case AArch64::SABALT_ZZZ_D:
7756 return AArch64::ADD_ZZZ_D;
7757 case AArch64::UABALB_ZZZ_H:
7758 case AArch64::SABALB_ZZZ_H:
7759 case AArch64::UABALT_ZZZ_H:
7760 case AArch64::SABALT_ZZZ_H:
7761 return AArch64::ADD_ZZZ_H;
7762 case AArch64::UABALB_ZZZ_S:
7763 case AArch64::SABALB_ZZZ_S:
7764 case AArch64::UABALT_ZZZ_S:
7765 case AArch64::SABALT_ZZZ_S:
7766 return AArch64::ADD_ZZZ_S;
7767 case AArch64::UABALv16i8_v8i16:
7768 case AArch64::SABALv8i8_v8i16:
7769 case AArch64::SABAv8i16:
7770 case AArch64::UABAv8i16:
7771 return AArch64::ADDv8i16;
7772 case AArch64::SABALv2i32_v2i64:
7773 case AArch64::UABALv2i32_v2i64:
7774 case AArch64::SABALv4i32_v2i64:
7775 return AArch64::ADDv2i64;
7776 case AArch64::UABALv4i16_v4i32:
7777 case AArch64::SABALv4i16_v4i32:
7778 case AArch64::SABALv8i16_v4i32:
7779 case AArch64::SABAv4i32:
7780 case AArch64::UABAv4i32:
7781 return AArch64::ADDv4i32;
7782 case AArch64::UABALv4i32_v2i64:
7783 return AArch64::ADDv2i64;
7784 case AArch64::UABALv8i16_v4i32:
7785 return AArch64::ADDv4i32;
7786 case AArch64::UABALv8i8_v8i16:
7787 case AArch64::SABALv16i8_v8i16:
7788 return AArch64::ADDv8i16;
7789 case AArch64::UABAv16i8:
7790 case AArch64::SABAv16i8:
7791 return AArch64::ADDv16i8;
7792 case AArch64::UABAv4i16:
7793 case AArch64::SABAv4i16:
7794 return AArch64::ADDv4i16;
7795 case AArch64::UABAv2i32:
7796 case AArch64::SABAv2i32:
7797 return AArch64::ADDv2i32;
7798 case AArch64::UABAv8i8:
7799 case AArch64::SABAv8i8:
7800 return AArch64::ADDv8i8;
7801 default:
7802 llvm_unreachable("Unknown accumulator opcode");
7803 }
7804}
7805
7806/// When getMachineCombinerPatterns() finds potential patterns,
7807/// this function generates the instructions that could replace the
7808/// original code sequence
7809void AArch64InstrInfo::genAlternativeCodeSequence(
7810 MachineInstr &Root, unsigned Pattern,
7811 SmallVectorImpl<MachineInstr *> &InsInstrs,
7812 SmallVectorImpl<MachineInstr *> &DelInstrs,
7813 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
7814 MachineBasicBlock &MBB = *Root.getParent();
7815 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7816 MachineFunction &MF = *MBB.getParent();
7817 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
7818
7819 MachineInstr *MUL = nullptr;
7820 const TargetRegisterClass *RC;
7821 unsigned Opc;
7822 switch (Pattern) {
7823 default:
7824 // Reassociate instructions.
7825 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
7826 DelInstrs, InstIdxForVirtReg&: InstrIdxForVirtReg);
7827 return;
7828 case AArch64MachineCombinerPattern::SUBADD_OP1:
7829 // A - (B + C)
7830 // ==> (A - B) - C
7831 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, IdxOpd1: 1,
7832 InstrIdxForVirtReg);
7833 return;
7834 case AArch64MachineCombinerPattern::SUBADD_OP2:
7835 // A - (B + C)
7836 // ==> (A - C) - B
7837 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, IdxOpd1: 2,
7838 InstrIdxForVirtReg);
7839 return;
7840 case AArch64MachineCombinerPattern::MULADDW_OP1:
7841 case AArch64MachineCombinerPattern::MULADDX_OP1:
7842 // MUL I=A,B,0
7843 // ADD R,I,C
7844 // ==> MADD R,A,B,C
7845 // --- Create(MADD);
7846 if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) {
7847 Opc = AArch64::MADDWrrr;
7848 RC = &AArch64::GPR32RegClass;
7849 } else {
7850 Opc = AArch64::MADDXrrr;
7851 RC = &AArch64::GPR64RegClass;
7852 }
7853 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
7854 break;
7855 case AArch64MachineCombinerPattern::MULADDW_OP2:
7856 case AArch64MachineCombinerPattern::MULADDX_OP2:
7857 // MUL I=A,B,0
7858 // ADD R,C,I
7859 // ==> MADD R,A,B,C
7860 // --- Create(MADD);
7861 if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) {
7862 Opc = AArch64::MADDWrrr;
7863 RC = &AArch64::GPR32RegClass;
7864 } else {
7865 Opc = AArch64::MADDXrrr;
7866 RC = &AArch64::GPR64RegClass;
7867 }
7868 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
7869 break;
7870 case AArch64MachineCombinerPattern::MULADDWI_OP1:
7871 case AArch64MachineCombinerPattern::MULADDXI_OP1: {
7872 // MUL I=A,B,0
7873 // ADD R,I,Imm
7874 // ==> MOV V, Imm
7875 // ==> MADD R,A,B,V
7876 // --- Create(MADD);
7877 const TargetRegisterClass *OrrRC;
7878 unsigned BitSize, OrrOpc, ZeroReg;
7879 if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
7880 OrrOpc = AArch64::ORRWri;
7881 OrrRC = &AArch64::GPR32spRegClass;
7882 BitSize = 32;
7883 ZeroReg = AArch64::WZR;
7884 Opc = AArch64::MADDWrrr;
7885 RC = &AArch64::GPR32RegClass;
7886 } else {
7887 OrrOpc = AArch64::ORRXri;
7888 OrrRC = &AArch64::GPR64spRegClass;
7889 BitSize = 64;
7890 ZeroReg = AArch64::XZR;
7891 Opc = AArch64::MADDXrrr;
7892 RC = &AArch64::GPR64RegClass;
7893 }
7894 Register NewVR = MRI.createVirtualRegister(RegClass: OrrRC);
7895 uint64_t Imm = Root.getOperand(i: 2).getImm();
7896
7897 if (Root.getOperand(i: 3).isImm()) {
7898 unsigned Val = Root.getOperand(i: 3).getImm();
7899 Imm = Imm << Val;
7900 }
7901 uint64_t UImm = SignExtend64(X: Imm, B: BitSize);
7902 // The immediate can be composed via a single instruction.
7903 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
7904 AArch64_IMM::expandMOVImm(Imm: UImm, BitSize, Insn);
7905 if (Insn.size() != 1)
7906 return;
7907 auto MovI = Insn.begin();
7908 MachineInstrBuilder MIB1;
7909 // MOV is an alias for one of three instructions: movz, movn, and orr.
7910 if (MovI->Opcode == OrrOpc)
7911 MIB1 = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: OrrOpc), DestReg: NewVR)
7912 .addReg(RegNo: ZeroReg)
7913 .addImm(Val: MovI->Op2);
7914 else {
7915 if (BitSize == 32)
7916 assert((MovI->Opcode == AArch64::MOVNWi ||
7917 MovI->Opcode == AArch64::MOVZWi) &&
7918 "Expected opcode");
7919 else
7920 assert((MovI->Opcode == AArch64::MOVNXi ||
7921 MovI->Opcode == AArch64::MOVZXi) &&
7922 "Expected opcode");
7923 MIB1 = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MovI->Opcode), DestReg: NewVR)
7924 .addImm(Val: MovI->Op1)
7925 .addImm(Val: MovI->Op2);
7926 }
7927 InsInstrs.push_back(Elt: MIB1);
7928 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
7929 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, VR: NewVR, RC);
7930 break;
7931 }
7932 case AArch64MachineCombinerPattern::MULSUBW_OP1:
7933 case AArch64MachineCombinerPattern::MULSUBX_OP1: {
7934 // MUL I=A,B,0
7935 // SUB R,I, C
7936 // ==> SUB V, 0, C
7937 // ==> MADD R,A,B,V // = -C + A*B
7938 // --- Create(MADD);
7939 const TargetRegisterClass *SubRC;
7940 unsigned SubOpc, ZeroReg;
7941 if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) {
7942 SubOpc = AArch64::SUBWrr;
7943 SubRC = &AArch64::GPR32spRegClass;
7944 ZeroReg = AArch64::WZR;
7945 Opc = AArch64::MADDWrrr;
7946 RC = &AArch64::GPR32RegClass;
7947 } else {
7948 SubOpc = AArch64::SUBXrr;
7949 SubRC = &AArch64::GPR64spRegClass;
7950 ZeroReg = AArch64::XZR;
7951 Opc = AArch64::MADDXrrr;
7952 RC = &AArch64::GPR64RegClass;
7953 }
7954 Register NewVR = MRI.createVirtualRegister(RegClass: SubRC);
7955 // SUB NewVR, 0, C
7956 MachineInstrBuilder MIB1 =
7957 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: SubOpc), DestReg: NewVR)
7958 .addReg(RegNo: ZeroReg)
7959 .add(MO: Root.getOperand(i: 2));
7960 InsInstrs.push_back(Elt: MIB1);
7961 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
7962 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, VR: NewVR, RC);
7963 break;
7964 }
7965 case AArch64MachineCombinerPattern::MULSUBW_OP2:
7966 case AArch64MachineCombinerPattern::MULSUBX_OP2:
7967 // MUL I=A,B,0
7968 // SUB R,C,I
7969 // ==> MSUB R,A,B,C (computes C - A*B)
7970 // --- Create(MSUB);
7971 if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) {
7972 Opc = AArch64::MSUBWrrr;
7973 RC = &AArch64::GPR32RegClass;
7974 } else {
7975 Opc = AArch64::MSUBXrrr;
7976 RC = &AArch64::GPR64RegClass;
7977 }
7978 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
7979 break;
7980 case AArch64MachineCombinerPattern::MULSUBWI_OP1:
7981 case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
7982 // MUL I=A,B,0
7983 // SUB R,I, Imm
7984 // ==> MOV V, -Imm
7985 // ==> MADD R,A,B,V // = -Imm + A*B
7986 // --- Create(MADD);
7987 const TargetRegisterClass *OrrRC;
7988 unsigned BitSize, OrrOpc, ZeroReg;
7989 if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
7990 OrrOpc = AArch64::ORRWri;
7991 OrrRC = &AArch64::GPR32spRegClass;
7992 BitSize = 32;
7993 ZeroReg = AArch64::WZR;
7994 Opc = AArch64::MADDWrrr;
7995 RC = &AArch64::GPR32RegClass;
7996 } else {
7997 OrrOpc = AArch64::ORRXri;
7998 OrrRC = &AArch64::GPR64spRegClass;
7999 BitSize = 64;
8000 ZeroReg = AArch64::XZR;
8001 Opc = AArch64::MADDXrrr;
8002 RC = &AArch64::GPR64RegClass;
8003 }
8004 Register NewVR = MRI.createVirtualRegister(RegClass: OrrRC);
8005 uint64_t Imm = Root.getOperand(i: 2).getImm();
8006 if (Root.getOperand(i: 3).isImm()) {
8007 unsigned Val = Root.getOperand(i: 3).getImm();
8008 Imm = Imm << Val;
8009 }
8010 uint64_t UImm = SignExtend64(X: -Imm, B: BitSize);
8011 // The immediate can be composed via a single instruction.
8012 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
8013 AArch64_IMM::expandMOVImm(Imm: UImm, BitSize, Insn);
8014 if (Insn.size() != 1)
8015 return;
8016 auto MovI = Insn.begin();
8017 MachineInstrBuilder MIB1;
8018 // MOV is an alias for one of three instructions: movz, movn, and orr.
8019 if (MovI->Opcode == OrrOpc)
8020 MIB1 = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: OrrOpc), DestReg: NewVR)
8021 .addReg(RegNo: ZeroReg)
8022 .addImm(Val: MovI->Op2);
8023 else {
8024 if (BitSize == 32)
8025 assert((MovI->Opcode == AArch64::MOVNWi ||
8026 MovI->Opcode == AArch64::MOVZWi) &&
8027 "Expected opcode");
8028 else
8029 assert((MovI->Opcode == AArch64::MOVNXi ||
8030 MovI->Opcode == AArch64::MOVZXi) &&
8031 "Expected opcode");
8032 MIB1 = BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: MovI->Opcode), DestReg: NewVR)
8033 .addImm(Val: MovI->Op1)
8034 .addImm(Val: MovI->Op2);
8035 }
8036 InsInstrs.push_back(Elt: MIB1);
8037 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
8038 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, VR: NewVR, RC);
8039 break;
8040 }
8041 case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
8042 Opc = AArch64::MLAv8i8;
8043 RC = &AArch64::FPR64RegClass;
8044 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8045 break;
8046 case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
8047 Opc = AArch64::MLAv8i8;
8048 RC = &AArch64::FPR64RegClass;
8049 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8050 break;
8051 case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
8052 Opc = AArch64::MLAv16i8;
8053 RC = &AArch64::FPR128RegClass;
8054 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8055 break;
8056 case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
8057 Opc = AArch64::MLAv16i8;
8058 RC = &AArch64::FPR128RegClass;
8059 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8060 break;
8061 case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
8062 Opc = AArch64::MLAv4i16;
8063 RC = &AArch64::FPR64RegClass;
8064 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8065 break;
8066 case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
8067 Opc = AArch64::MLAv4i16;
8068 RC = &AArch64::FPR64RegClass;
8069 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8070 break;
8071 case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
8072 Opc = AArch64::MLAv8i16;
8073 RC = &AArch64::FPR128RegClass;
8074 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8075 break;
8076 case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
8077 Opc = AArch64::MLAv8i16;
8078 RC = &AArch64::FPR128RegClass;
8079 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8080 break;
8081 case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
8082 Opc = AArch64::MLAv2i32;
8083 RC = &AArch64::FPR64RegClass;
8084 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8085 break;
8086 case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
8087 Opc = AArch64::MLAv2i32;
8088 RC = &AArch64::FPR64RegClass;
8089 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8090 break;
8091 case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
8092 Opc = AArch64::MLAv4i32;
8093 RC = &AArch64::FPR128RegClass;
8094 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8095 break;
8096 case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
8097 Opc = AArch64::MLAv4i32;
8098 RC = &AArch64::FPR128RegClass;
8099 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8100 break;
8101
8102 case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
8103 Opc = AArch64::MLAv8i8;
8104 RC = &AArch64::FPR64RegClass;
8105 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8106 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv8i8,
8107 RC);
8108 break;
8109 case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
8110 Opc = AArch64::MLSv8i8;
8111 RC = &AArch64::FPR64RegClass;
8112 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8113 break;
8114 case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
8115 Opc = AArch64::MLAv16i8;
8116 RC = &AArch64::FPR128RegClass;
8117 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8118 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv16i8,
8119 RC);
8120 break;
8121 case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
8122 Opc = AArch64::MLSv16i8;
8123 RC = &AArch64::FPR128RegClass;
8124 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8125 break;
8126 case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
8127 Opc = AArch64::MLAv4i16;
8128 RC = &AArch64::FPR64RegClass;
8129 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8130 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i16,
8131 RC);
8132 break;
8133 case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
8134 Opc = AArch64::MLSv4i16;
8135 RC = &AArch64::FPR64RegClass;
8136 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8137 break;
8138 case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
8139 Opc = AArch64::MLAv8i16;
8140 RC = &AArch64::FPR128RegClass;
8141 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8142 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv8i16,
8143 RC);
8144 break;
8145 case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
8146 Opc = AArch64::MLSv8i16;
8147 RC = &AArch64::FPR128RegClass;
8148 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8149 break;
8150 case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
8151 Opc = AArch64::MLAv2i32;
8152 RC = &AArch64::FPR64RegClass;
8153 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8154 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv2i32,
8155 RC);
8156 break;
8157 case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
8158 Opc = AArch64::MLSv2i32;
8159 RC = &AArch64::FPR64RegClass;
8160 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8161 break;
8162 case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
8163 Opc = AArch64::MLAv4i32;
8164 RC = &AArch64::FPR128RegClass;
8165 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8166 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i32,
8167 RC);
8168 break;
8169 case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
8170 Opc = AArch64::MLSv4i32;
8171 RC = &AArch64::FPR128RegClass;
8172 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8173 break;
8174
8175 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
8176 Opc = AArch64::MLAv4i16_indexed;
8177 RC = &AArch64::FPR64RegClass;
8178 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8179 break;
8180 case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
8181 Opc = AArch64::MLAv4i16_indexed;
8182 RC = &AArch64::FPR64RegClass;
8183 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8184 break;
8185 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
8186 Opc = AArch64::MLAv8i16_indexed;
8187 RC = &AArch64::FPR128RegClass;
8188 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8189 break;
8190 case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
8191 Opc = AArch64::MLAv8i16_indexed;
8192 RC = &AArch64::FPR128RegClass;
8193 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8194 break;
8195 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
8196 Opc = AArch64::MLAv2i32_indexed;
8197 RC = &AArch64::FPR64RegClass;
8198 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8199 break;
8200 case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
8201 Opc = AArch64::MLAv2i32_indexed;
8202 RC = &AArch64::FPR64RegClass;
8203 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8204 break;
8205 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
8206 Opc = AArch64::MLAv4i32_indexed;
8207 RC = &AArch64::FPR128RegClass;
8208 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8209 break;
8210 case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
8211 Opc = AArch64::MLAv4i32_indexed;
8212 RC = &AArch64::FPR128RegClass;
8213 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8214 break;
8215
8216 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
8217 Opc = AArch64::MLAv4i16_indexed;
8218 RC = &AArch64::FPR64RegClass;
8219 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8220 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i16,
8221 RC);
8222 break;
8223 case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
8224 Opc = AArch64::MLSv4i16_indexed;
8225 RC = &AArch64::FPR64RegClass;
8226 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8227 break;
8228 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
8229 Opc = AArch64::MLAv8i16_indexed;
8230 RC = &AArch64::FPR128RegClass;
8231 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8232 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv8i16,
8233 RC);
8234 break;
8235 case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
8236 Opc = AArch64::MLSv8i16_indexed;
8237 RC = &AArch64::FPR128RegClass;
8238 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8239 break;
8240 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
8241 Opc = AArch64::MLAv2i32_indexed;
8242 RC = &AArch64::FPR64RegClass;
8243 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8244 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv2i32,
8245 RC);
8246 break;
8247 case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
8248 Opc = AArch64::MLSv2i32_indexed;
8249 RC = &AArch64::FPR64RegClass;
8250 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8251 break;
8252 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
8253 Opc = AArch64::MLAv4i32_indexed;
8254 RC = &AArch64::FPR128RegClass;
8255 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8256 InstrIdxForVirtReg, IdxMulOpd: 1, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i32,
8257 RC);
8258 break;
8259 case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
8260 Opc = AArch64::MLSv4i32_indexed;
8261 RC = &AArch64::FPR128RegClass;
8262 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8263 break;
8264
8265 // Floating Point Support
8266 case AArch64MachineCombinerPattern::FMULADDH_OP1:
8267 Opc = AArch64::FMADDHrrr;
8268 RC = &AArch64::FPR16RegClass;
8269 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8270 break;
8271 case AArch64MachineCombinerPattern::FMULADDS_OP1:
8272 Opc = AArch64::FMADDSrrr;
8273 RC = &AArch64::FPR32RegClass;
8274 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8275 break;
8276 case AArch64MachineCombinerPattern::FMULADDD_OP1:
8277 Opc = AArch64::FMADDDrrr;
8278 RC = &AArch64::FPR64RegClass;
8279 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8280 break;
8281
8282 case AArch64MachineCombinerPattern::FMULADDH_OP2:
8283 Opc = AArch64::FMADDHrrr;
8284 RC = &AArch64::FPR16RegClass;
8285 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8286 break;
8287 case AArch64MachineCombinerPattern::FMULADDS_OP2:
8288 Opc = AArch64::FMADDSrrr;
8289 RC = &AArch64::FPR32RegClass;
8290 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8291 break;
8292 case AArch64MachineCombinerPattern::FMULADDD_OP2:
8293 Opc = AArch64::FMADDDrrr;
8294 RC = &AArch64::FPR64RegClass;
8295 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8296 break;
8297
8298 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
8299 Opc = AArch64::FMLAv1i32_indexed;
8300 RC = &AArch64::FPR32RegClass;
8301 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8302 kind: FMAInstKind::Indexed);
8303 break;
8304 case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
8305 Opc = AArch64::FMLAv1i32_indexed;
8306 RC = &AArch64::FPR32RegClass;
8307 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8308 kind: FMAInstKind::Indexed);
8309 break;
8310
8311 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
8312 Opc = AArch64::FMLAv1i64_indexed;
8313 RC = &AArch64::FPR64RegClass;
8314 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8315 kind: FMAInstKind::Indexed);
8316 break;
8317 case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
8318 Opc = AArch64::FMLAv1i64_indexed;
8319 RC = &AArch64::FPR64RegClass;
8320 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8321 kind: FMAInstKind::Indexed);
8322 break;
8323
8324 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
8325 RC = &AArch64::FPR64RegClass;
8326 Opc = AArch64::FMLAv4i16_indexed;
8327 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8328 kind: FMAInstKind::Indexed);
8329 break;
8330 case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
8331 RC = &AArch64::FPR64RegClass;
8332 Opc = AArch64::FMLAv4f16;
8333 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8334 kind: FMAInstKind::Accumulator);
8335 break;
8336 case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
8337 RC = &AArch64::FPR64RegClass;
8338 Opc = AArch64::FMLAv4i16_indexed;
8339 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8340 kind: FMAInstKind::Indexed);
8341 break;
8342 case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
8343 RC = &AArch64::FPR64RegClass;
8344 Opc = AArch64::FMLAv4f16;
8345 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8346 kind: FMAInstKind::Accumulator);
8347 break;
8348
8349 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
8350 case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
8351 RC = &AArch64::FPR64RegClass;
8352 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
8353 Opc = AArch64::FMLAv2i32_indexed;
8354 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8355 kind: FMAInstKind::Indexed);
8356 } else {
8357 Opc = AArch64::FMLAv2f32;
8358 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8359 kind: FMAInstKind::Accumulator);
8360 }
8361 break;
8362 case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
8363 case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
8364 RC = &AArch64::FPR64RegClass;
8365 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
8366 Opc = AArch64::FMLAv2i32_indexed;
8367 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8368 kind: FMAInstKind::Indexed);
8369 } else {
8370 Opc = AArch64::FMLAv2f32;
8371 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8372 kind: FMAInstKind::Accumulator);
8373 }
8374 break;
8375
8376 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
8377 RC = &AArch64::FPR128RegClass;
8378 Opc = AArch64::FMLAv8i16_indexed;
8379 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8380 kind: FMAInstKind::Indexed);
8381 break;
8382 case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
8383 RC = &AArch64::FPR128RegClass;
8384 Opc = AArch64::FMLAv8f16;
8385 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8386 kind: FMAInstKind::Accumulator);
8387 break;
8388 case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
8389 RC = &AArch64::FPR128RegClass;
8390 Opc = AArch64::FMLAv8i16_indexed;
8391 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8392 kind: FMAInstKind::Indexed);
8393 break;
8394 case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
8395 RC = &AArch64::FPR128RegClass;
8396 Opc = AArch64::FMLAv8f16;
8397 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8398 kind: FMAInstKind::Accumulator);
8399 break;
8400
8401 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
8402 case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
8403 RC = &AArch64::FPR128RegClass;
8404 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
8405 Opc = AArch64::FMLAv2i64_indexed;
8406 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8407 kind: FMAInstKind::Indexed);
8408 } else {
8409 Opc = AArch64::FMLAv2f64;
8410 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8411 kind: FMAInstKind::Accumulator);
8412 }
8413 break;
8414 case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
8415 case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
8416 RC = &AArch64::FPR128RegClass;
8417 if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
8418 Opc = AArch64::FMLAv2i64_indexed;
8419 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8420 kind: FMAInstKind::Indexed);
8421 } else {
8422 Opc = AArch64::FMLAv2f64;
8423 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8424 kind: FMAInstKind::Accumulator);
8425 }
8426 break;
8427
8428 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
8429 case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
8430 RC = &AArch64::FPR128RegClass;
8431 if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
8432 Opc = AArch64::FMLAv4i32_indexed;
8433 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8434 kind: FMAInstKind::Indexed);
8435 } else {
8436 Opc = AArch64::FMLAv4f32;
8437 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8438 kind: FMAInstKind::Accumulator);
8439 }
8440 break;
8441
8442 case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
8443 case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
8444 RC = &AArch64::FPR128RegClass;
8445 if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
8446 Opc = AArch64::FMLAv4i32_indexed;
8447 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8448 kind: FMAInstKind::Indexed);
8449 } else {
8450 Opc = AArch64::FMLAv4f32;
8451 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8452 kind: FMAInstKind::Accumulator);
8453 }
8454 break;
8455
8456 case AArch64MachineCombinerPattern::FMULSUBH_OP1:
8457 Opc = AArch64::FNMSUBHrrr;
8458 RC = &AArch64::FPR16RegClass;
8459 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8460 break;
8461 case AArch64MachineCombinerPattern::FMULSUBS_OP1:
8462 Opc = AArch64::FNMSUBSrrr;
8463 RC = &AArch64::FPR32RegClass;
8464 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8465 break;
8466 case AArch64MachineCombinerPattern::FMULSUBD_OP1:
8467 Opc = AArch64::FNMSUBDrrr;
8468 RC = &AArch64::FPR64RegClass;
8469 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8470 break;
8471
8472 case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
8473 Opc = AArch64::FNMADDHrrr;
8474 RC = &AArch64::FPR16RegClass;
8475 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8476 break;
8477 case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
8478 Opc = AArch64::FNMADDSrrr;
8479 RC = &AArch64::FPR32RegClass;
8480 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8481 break;
8482 case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
8483 Opc = AArch64::FNMADDDrrr;
8484 RC = &AArch64::FPR64RegClass;
8485 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC);
8486 break;
8487
8488 case AArch64MachineCombinerPattern::FMULSUBH_OP2:
8489 Opc = AArch64::FMSUBHrrr;
8490 RC = &AArch64::FPR16RegClass;
8491 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8492 break;
8493 case AArch64MachineCombinerPattern::FMULSUBS_OP2:
8494 Opc = AArch64::FMSUBSrrr;
8495 RC = &AArch64::FPR32RegClass;
8496 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8497 break;
8498 case AArch64MachineCombinerPattern::FMULSUBD_OP2:
8499 Opc = AArch64::FMSUBDrrr;
8500 RC = &AArch64::FPR64RegClass;
8501 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC);
8502 break;
8503
8504 case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
8505 Opc = AArch64::FMLSv1i32_indexed;
8506 RC = &AArch64::FPR32RegClass;
8507 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8508 kind: FMAInstKind::Indexed);
8509 break;
8510
8511 case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
8512 Opc = AArch64::FMLSv1i64_indexed;
8513 RC = &AArch64::FPR64RegClass;
8514 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8515 kind: FMAInstKind::Indexed);
8516 break;
8517
8518 case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
8519 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
8520 RC = &AArch64::FPR64RegClass;
8521 Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8522 MachineInstrBuilder MIB1 =
8523 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: AArch64::FNEGv4f16), DestReg: NewVR)
8524 .add(MO: Root.getOperand(i: 2));
8525 InsInstrs.push_back(Elt: MIB1);
8526 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
8527 if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) {
8528 Opc = AArch64::FMLAv4f16;
8529 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8530 kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8531 } else {
8532 Opc = AArch64::FMLAv4i16_indexed;
8533 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8534 kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8535 }
8536 break;
8537 }
8538 case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
8539 RC = &AArch64::FPR64RegClass;
8540 Opc = AArch64::FMLSv4f16;
8541 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8542 kind: FMAInstKind::Accumulator);
8543 break;
8544 case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
8545 RC = &AArch64::FPR64RegClass;
8546 Opc = AArch64::FMLSv4i16_indexed;
8547 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8548 kind: FMAInstKind::Indexed);
8549 break;
8550
8551 case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
8552 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
8553 RC = &AArch64::FPR64RegClass;
8554 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
8555 Opc = AArch64::FMLSv2i32_indexed;
8556 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8557 kind: FMAInstKind::Indexed);
8558 } else {
8559 Opc = AArch64::FMLSv2f32;
8560 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8561 kind: FMAInstKind::Accumulator);
8562 }
8563 break;
8564
8565 case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
8566 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
8567 RC = &AArch64::FPR128RegClass;
8568 Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8569 MachineInstrBuilder MIB1 =
8570 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: AArch64::FNEGv8f16), DestReg: NewVR)
8571 .add(MO: Root.getOperand(i: 2));
8572 InsInstrs.push_back(Elt: MIB1);
8573 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
8574 if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) {
8575 Opc = AArch64::FMLAv8f16;
8576 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8577 kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8578 } else {
8579 Opc = AArch64::FMLAv8i16_indexed;
8580 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8581 kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8582 }
8583 break;
8584 }
8585 case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
8586 RC = &AArch64::FPR128RegClass;
8587 Opc = AArch64::FMLSv8f16;
8588 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8589 kind: FMAInstKind::Accumulator);
8590 break;
8591 case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
8592 RC = &AArch64::FPR128RegClass;
8593 Opc = AArch64::FMLSv8i16_indexed;
8594 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8595 kind: FMAInstKind::Indexed);
8596 break;
8597
8598 case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
8599 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
8600 RC = &AArch64::FPR128RegClass;
8601 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
8602 Opc = AArch64::FMLSv2i64_indexed;
8603 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8604 kind: FMAInstKind::Indexed);
8605 } else {
8606 Opc = AArch64::FMLSv2f64;
8607 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8608 kind: FMAInstKind::Accumulator);
8609 }
8610 break;
8611
8612 case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
8613 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
8614 RC = &AArch64::FPR128RegClass;
8615 if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
8616 Opc = AArch64::FMLSv4i32_indexed;
8617 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8618 kind: FMAInstKind::Indexed);
8619 } else {
8620 Opc = AArch64::FMLSv4f32;
8621 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 2, MaddOpc: Opc, RC,
8622 kind: FMAInstKind::Accumulator);
8623 }
8624 break;
8625 case AArch64MachineCombinerPattern::FMLSv2f32_OP1:
8626 case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
8627 RC = &AArch64::FPR64RegClass;
8628 Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8629 MachineInstrBuilder MIB1 =
8630 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: AArch64::FNEGv2f32), DestReg: NewVR)
8631 .add(MO: Root.getOperand(i: 2));
8632 InsInstrs.push_back(Elt: MIB1);
8633 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
8634 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
8635 Opc = AArch64::FMLAv2i32_indexed;
8636 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8637 kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8638 } else {
8639 Opc = AArch64::FMLAv2f32;
8640 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8641 kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8642 }
8643 break;
8644 }
8645 case AArch64MachineCombinerPattern::FMLSv4f32_OP1:
8646 case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
8647 RC = &AArch64::FPR128RegClass;
8648 Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8649 MachineInstrBuilder MIB1 =
8650 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: AArch64::FNEGv4f32), DestReg: NewVR)
8651 .add(MO: Root.getOperand(i: 2));
8652 InsInstrs.push_back(Elt: MIB1);
8653 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
8654 if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
8655 Opc = AArch64::FMLAv4i32_indexed;
8656 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8657 kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8658 } else {
8659 Opc = AArch64::FMLAv4f32;
8660 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8661 kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8662 }
8663 break;
8664 }
8665 case AArch64MachineCombinerPattern::FMLSv2f64_OP1:
8666 case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
8667 RC = &AArch64::FPR128RegClass;
8668 Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8669 MachineInstrBuilder MIB1 =
8670 BuildMI(MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: AArch64::FNEGv2f64), DestReg: NewVR)
8671 .add(MO: Root.getOperand(i: 2));
8672 InsInstrs.push_back(Elt: MIB1);
8673 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
8674 if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
8675 Opc = AArch64::FMLAv2i64_indexed;
8676 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8677 kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8678 } else {
8679 Opc = AArch64::FMLAv2f64;
8680 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: 1, MaddOpc: Opc, RC,
8681 kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8682 }
8683 break;
8684 }
8685 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
8686 case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: {
8687 unsigned IdxDupOp =
8688 (Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1
8689 : 2;
8690 genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv2i32_indexed,
8691 RC: &AArch64::FPR128RegClass, MRI);
8692 break;
8693 }
8694 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
8695 case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: {
8696 unsigned IdxDupOp =
8697 (Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1
8698 : 2;
8699 genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv2i64_indexed,
8700 RC: &AArch64::FPR128RegClass, MRI);
8701 break;
8702 }
8703 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
8704 case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: {
8705 unsigned IdxDupOp =
8706 (Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1
8707 : 2;
8708 genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv4i16_indexed,
8709 RC: &AArch64::FPR128_loRegClass, MRI);
8710 break;
8711 }
8712 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
8713 case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: {
8714 unsigned IdxDupOp =
8715 (Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1
8716 : 2;
8717 genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv4i32_indexed,
8718 RC: &AArch64::FPR128RegClass, MRI);
8719 break;
8720 }
8721 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
8722 case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: {
8723 unsigned IdxDupOp =
8724 (Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1
8725 : 2;
8726 genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv8i16_indexed,
8727 RC: &AArch64::FPR128_loRegClass, MRI);
8728 break;
8729 }
8730 case AArch64MachineCombinerPattern::FNMADD: {
8731 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
8732 break;
8733 }
8734
8735 } // end switch (Pattern)
8736 // Record MUL and ADD/SUB for deletion
8737 if (MUL)
8738 DelInstrs.push_back(Elt: MUL);
8739 DelInstrs.push_back(Elt: &Root);
8740
8741 // Set the flags on the inserted instructions to be the merged flags of the
8742 // instructions that we have combined.
8743 uint32_t Flags = Root.getFlags();
8744 if (MUL)
8745 Flags = Root.mergeFlagsWith(Other: *MUL);
8746 for (auto *MI : InsInstrs)
8747 MI->setFlags(Flags);
8748}
8749
8750/// Replace csincr-branch sequence by simple conditional branch
8751///
8752/// Examples:
8753/// 1. \code
8754/// csinc w9, wzr, wzr, <condition code>
8755/// tbnz w9, #0, 0x44
8756/// \endcode
8757/// to
8758/// \code
8759/// b.<inverted condition code>
8760/// \endcode
8761///
8762/// 2. \code
8763/// csinc w9, wzr, wzr, <condition code>
8764/// tbz w9, #0, 0x44
8765/// \endcode
8766/// to
8767/// \code
8768/// b.<condition code>
8769/// \endcode
8770///
8771/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
8772/// compare's constant operand is power of 2.
8773///
8774/// Examples:
8775/// \code
8776/// and w8, w8, #0x400
8777/// cbnz w8, L1
8778/// \endcode
8779/// to
8780/// \code
8781/// tbnz w8, #10, L1
8782/// \endcode
8783///
8784/// \param MI Conditional Branch
8785/// \return True when the simple conditional branch is generated
8786///
8787bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
8788 bool IsNegativeBranch = false;
8789 bool IsTestAndBranch = false;
8790 unsigned TargetBBInMI = 0;
8791 switch (MI.getOpcode()) {
8792 default:
8793 llvm_unreachable("Unknown branch instruction?");
8794 case AArch64::Bcc:
8795 case AArch64::CBWPri:
8796 case AArch64::CBXPri:
8797 case AArch64::CBWPrr:
8798 case AArch64::CBXPrr:
8799 return false;
8800 case AArch64::CBZW:
8801 case AArch64::CBZX:
8802 TargetBBInMI = 1;
8803 break;
8804 case AArch64::CBNZW:
8805 case AArch64::CBNZX:
8806 TargetBBInMI = 1;
8807 IsNegativeBranch = true;
8808 break;
8809 case AArch64::TBZW:
8810 case AArch64::TBZX:
8811 TargetBBInMI = 2;
8812 IsTestAndBranch = true;
8813 break;
8814 case AArch64::TBNZW:
8815 case AArch64::TBNZX:
8816 TargetBBInMI = 2;
8817 IsNegativeBranch = true;
8818 IsTestAndBranch = true;
8819 break;
8820 }
8821 // So we increment a zero register and test for bits other
8822 // than bit 0? Conservatively bail out in case the verifier
8823 // missed this case.
8824 if (IsTestAndBranch && MI.getOperand(i: 1).getImm())
8825 return false;
8826
8827 // Find Definition.
8828 assert(MI.getParent() && "Incomplete machine instruction\n");
8829 MachineBasicBlock *MBB = MI.getParent();
8830 MachineFunction *MF = MBB->getParent();
8831 MachineRegisterInfo *MRI = &MF->getRegInfo();
8832 Register VReg = MI.getOperand(i: 0).getReg();
8833 if (!VReg.isVirtual())
8834 return false;
8835
8836 MachineInstr *DefMI = MRI->getVRegDef(Reg: VReg);
8837
8838 // Look through COPY instructions to find definition.
8839 while (DefMI->isCopy()) {
8840 Register CopyVReg = DefMI->getOperand(i: 1).getReg();
8841 if (!MRI->hasOneNonDBGUse(RegNo: CopyVReg))
8842 return false;
8843 if (!MRI->hasOneDef(RegNo: CopyVReg))
8844 return false;
8845 DefMI = MRI->getVRegDef(Reg: CopyVReg);
8846 }
8847
8848 switch (DefMI->getOpcode()) {
8849 default:
8850 return false;
8851 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8852 case AArch64::ANDWri:
8853 case AArch64::ANDXri: {
8854 if (IsTestAndBranch)
8855 return false;
8856 if (DefMI->getParent() != MBB)
8857 return false;
8858 if (!MRI->hasOneNonDBGUse(RegNo: VReg))
8859 return false;
8860
8861 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8862 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
8863 val: DefMI->getOperand(i: 2).getImm(), regSize: Is32Bit ? 32 : 64);
8864 if (!isPowerOf2_64(Value: Mask))
8865 return false;
8866
8867 MachineOperand &MO = DefMI->getOperand(i: 1);
8868 Register NewReg = MO.getReg();
8869 if (!NewReg.isVirtual())
8870 return false;
8871
8872 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8873
8874 MachineBasicBlock &RefToMBB = *MBB;
8875 MachineBasicBlock *TBB = MI.getOperand(i: 1).getMBB();
8876 DebugLoc DL = MI.getDebugLoc();
8877 unsigned Imm = Log2_64(Value: Mask);
8878 unsigned Opc = (Imm < 32)
8879 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8880 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8881 MachineInstr *NewMI = BuildMI(BB&: RefToMBB, I&: MI, MIMD: DL, MCID: get(Opcode: Opc))
8882 .addReg(RegNo: NewReg)
8883 .addImm(Val: Imm)
8884 .addMBB(MBB: TBB);
8885 // Register lives on to the CBZ now.
8886 MO.setIsKill(false);
8887
8888 // For immediate smaller than 32, we need to use the 32-bit
8889 // variant (W) in all cases. Indeed the 64-bit variant does not
8890 // allow to encode them.
8891 // Therefore, if the input register is 64-bit, we need to take the
8892 // 32-bit sub-part.
8893 if (!Is32Bit && Imm < 32)
8894 NewMI->getOperand(i: 0).setSubReg(AArch64::sub_32);
8895 MI.eraseFromParent();
8896 return true;
8897 }
8898 // Look for CSINC
8899 case AArch64::CSINCWr:
8900 case AArch64::CSINCXr: {
8901 if (!(DefMI->getOperand(i: 1).getReg() == AArch64::WZR &&
8902 DefMI->getOperand(i: 2).getReg() == AArch64::WZR) &&
8903 !(DefMI->getOperand(i: 1).getReg() == AArch64::XZR &&
8904 DefMI->getOperand(i: 2).getReg() == AArch64::XZR))
8905 return false;
8906
8907 if (DefMI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /*TRI=*/nullptr,
8908 isDead: true) != -1)
8909 return false;
8910
8911 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(i: 3).getImm();
8912 // Convert only when the condition code is not modified between
8913 // the CSINC and the branch. The CC may be used by other
8914 // instructions in between.
8915 if (areCFlagsAccessedBetweenInstrs(From: DefMI, To: MI, TRI: &getRegisterInfo(), AccessToCheck: AK_Write))
8916 return false;
8917 MachineBasicBlock &RefToMBB = *MBB;
8918 MachineBasicBlock *TBB = MI.getOperand(i: TargetBBInMI).getMBB();
8919 DebugLoc DL = MI.getDebugLoc();
8920 if (IsNegativeBranch)
8921 CC = AArch64CC::getInvertedCondCode(Code: CC);
8922 BuildMI(BB&: RefToMBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::Bcc)).addImm(Val: CC).addMBB(MBB: TBB);
8923 MI.eraseFromParent();
8924 return true;
8925 }
8926 }
8927}
8928
8929std::pair<unsigned, unsigned>
8930AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
8931 const unsigned Mask = AArch64II::MO_FRAGMENT;
8932 return std::make_pair(x: TF & Mask, y: TF & ~Mask);
8933}
8934
8935ArrayRef<std::pair<unsigned, const char *>>
8936AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
8937 using namespace AArch64II;
8938
8939 static const std::pair<unsigned, const char *> TargetFlags[] = {
8940 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8941 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8942 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8943 {MO_HI12, "aarch64-hi12"}};
8944 return ArrayRef(TargetFlags);
8945}
8946
8947ArrayRef<std::pair<unsigned, const char *>>
8948AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
8949 using namespace AArch64II;
8950
8951 static const std::pair<unsigned, const char *> TargetFlags[] = {
8952 {MO_COFFSTUB, "aarch64-coffstub"},
8953 {MO_GOT, "aarch64-got"},
8954 {MO_NC, "aarch64-nc"},
8955 {MO_S, "aarch64-s"},
8956 {MO_TLS, "aarch64-tls"},
8957 {MO_DLLIMPORT, "aarch64-dllimport"},
8958 {MO_PREL, "aarch64-prel"},
8959 {MO_TAGGED, "aarch64-tagged"},
8960 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8961 };
8962 return ArrayRef(TargetFlags);
8963}
8964
8965ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
8966AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
8967 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8968 {{MOSuppressPair, "aarch64-suppress-pair"},
8969 {MOStridedAccess, "aarch64-strided-access"}};
8970 return ArrayRef(TargetFlags);
8971}
8972
8973/// Constants defining how certain sequences should be outlined.
8974/// This encompasses how an outlined function should be called, and what kind of
8975/// frame should be emitted for that outlined function.
8976///
8977/// \p MachineOutlinerDefault implies that the function should be called with
8978/// a save and restore of LR to the stack.
8979///
8980/// That is,
8981///
8982/// I1 Save LR OUTLINED_FUNCTION:
8983/// I2 --> BL OUTLINED_FUNCTION I1
8984/// I3 Restore LR I2
8985/// I3
8986/// RET
8987///
8988/// * Call construction overhead: 3 (save + BL + restore)
8989/// * Frame construction overhead: 1 (ret)
8990/// * Requires stack fixups? Yes
8991///
8992/// \p MachineOutlinerTailCall implies that the function is being created from
8993/// a sequence of instructions ending in a return.
8994///
8995/// That is,
8996///
8997/// I1 OUTLINED_FUNCTION:
8998/// I2 --> B OUTLINED_FUNCTION I1
8999/// RET I2
9000/// RET
9001///
9002/// * Call construction overhead: 1 (B)
9003/// * Frame construction overhead: 0 (Return included in sequence)
9004/// * Requires stack fixups? No
9005///
9006/// \p MachineOutlinerNoLRSave implies that the function should be called using
9007/// a BL instruction, but doesn't require LR to be saved and restored. This
9008/// happens when LR is known to be dead.
9009///
9010/// That is,
9011///
9012/// I1 OUTLINED_FUNCTION:
9013/// I2 --> BL OUTLINED_FUNCTION I1
9014/// I3 I2
9015/// I3
9016/// RET
9017///
9018/// * Call construction overhead: 1 (BL)
9019/// * Frame construction overhead: 1 (RET)
9020/// * Requires stack fixups? No
9021///
9022/// \p MachineOutlinerThunk implies that the function is being created from
9023/// a sequence of instructions ending in a call. The outlined function is
9024/// called with a BL instruction, and the outlined function tail-calls the
9025/// original call destination.
9026///
9027/// That is,
9028///
9029/// I1 OUTLINED_FUNCTION:
9030/// I2 --> BL OUTLINED_FUNCTION I1
9031/// BL f I2
9032/// B f
9033/// * Call construction overhead: 1 (BL)
9034/// * Frame construction overhead: 0
9035/// * Requires stack fixups? No
9036///
9037/// \p MachineOutlinerRegSave implies that the function should be called with a
9038/// save and restore of LR to an available register. This allows us to avoid
9039/// stack fixups. Note that this outlining variant is compatible with the
9040/// NoLRSave case.
9041///
9042/// That is,
9043///
9044/// I1 Save LR OUTLINED_FUNCTION:
9045/// I2 --> BL OUTLINED_FUNCTION I1
9046/// I3 Restore LR I2
9047/// I3
9048/// RET
9049///
9050/// * Call construction overhead: 3 (save + BL + restore)
9051/// * Frame construction overhead: 1 (ret)
9052/// * Requires stack fixups? No
9053enum MachineOutlinerClass {
9054 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9055 MachineOutlinerTailCall, /// Only emit a branch.
9056 MachineOutlinerNoLRSave, /// Emit a call and return.
9057 MachineOutlinerThunk, /// Emit a call and tail-call.
9058 MachineOutlinerRegSave /// Same as default, but save to a register.
9059};
9060
9061enum MachineOutlinerMBBFlags {
9062 LRUnavailableSomewhere = 0x2,
9063 HasCalls = 0x4,
9064 UnsafeRegsDead = 0x8
9065};
9066
9067Register
9068AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9069 MachineFunction *MF = C.getMF();
9070 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9071 const AArch64RegisterInfo *ARI =
9072 static_cast<const AArch64RegisterInfo *>(&TRI);
9073 // Check if there is an available register across the sequence that we can
9074 // use.
9075 for (unsigned Reg : AArch64::GPR64RegClass) {
9076 if (!ARI->isReservedReg(MF: *MF, Reg) &&
9077 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9078 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9079 Reg != AArch64::X17 && // Ditto for X17.
9080 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9081 C.isAvailableInsideSeq(Reg, TRI))
9082 return Reg;
9083 }
9084 return Register();
9085}
9086
9087static bool
9088outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
9089 const outliner::Candidate &b) {
9090 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9091 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9092
9093 return MFIa->shouldSignReturnAddress(SpillsLR: false) == MFIb->shouldSignReturnAddress(SpillsLR: false) &&
9094 MFIa->shouldSignReturnAddress(SpillsLR: true) == MFIb->shouldSignReturnAddress(SpillsLR: true);
9095}
9096
9097static bool
9098outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
9099 const outliner::Candidate &b) {
9100 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9101 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9102
9103 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9104}
9105
9106static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
9107 const outliner::Candidate &b) {
9108 const AArch64Subtarget &SubtargetA =
9109 a.getMF()->getSubtarget<AArch64Subtarget>();
9110 const AArch64Subtarget &SubtargetB =
9111 b.getMF()->getSubtarget<AArch64Subtarget>();
9112 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9113}
9114
9115std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9116AArch64InstrInfo::getOutliningCandidateInfo(
9117 const MachineModuleInfo &MMI,
9118 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9119 unsigned MinRepeats) const {
9120 unsigned SequenceSize = 0;
9121 for (auto &MI : RepeatedSequenceLocs[0])
9122 SequenceSize += getInstSizeInBytes(MI);
9123
9124 unsigned NumBytesToCreateFrame = 0;
9125
9126 // We only allow outlining for functions having exactly matching return
9127 // address signing attributes, i.e., all share the same value for the
9128 // attribute "sign-return-address" and all share the same type of key they
9129 // are signed with.
9130 // Additionally we require all functions to simultaneously either support
9131 // v8.3a features or not. Otherwise an outlined function could get signed
9132 // using dedicated v8.3 instructions and a call from a function that doesn't
9133 // support v8.3 instructions would therefore be invalid.
9134 if (std::adjacent_find(
9135 first: RepeatedSequenceLocs.begin(), last: RepeatedSequenceLocs.end(),
9136 binary_pred: [](const outliner::Candidate &a, const outliner::Candidate &b) {
9137 // Return true if a and b are non-equal w.r.t. return address
9138 // signing or support of v8.3a features
9139 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9140 outliningCandidatesSigningKeyConsensus(a, b) &&
9141 outliningCandidatesV8_3OpsConsensus(a, b)) {
9142 return false;
9143 }
9144 return true;
9145 }) != RepeatedSequenceLocs.end()) {
9146 return std::nullopt;
9147 }
9148
9149 // Since at this point all candidates agree on their return address signing
9150 // picking just one is fine. If the candidate functions potentially sign their
9151 // return addresses, the outlined function should do the same. Note that in
9152 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9153 // not certainly true that the outlined function will have to sign its return
9154 // address but this decision is made later, when the decision to outline
9155 // has already been made.
9156 // The same holds for the number of additional instructions we need: On
9157 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9158 // necessary. However, at this point we don't know if the outlined function
9159 // will have a RET instruction so we assume the worst.
9160 const TargetRegisterInfo &TRI = getRegisterInfo();
9161 // Performing a tail call may require extra checks when PAuth is enabled.
9162 // If PAuth is disabled, set it to zero for uniformity.
9163 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9164 if (RepeatedSequenceLocs[0]
9165 .getMF()
9166 ->getInfo<AArch64FunctionInfo>()
9167 ->shouldSignReturnAddress(SpillsLR: true)) {
9168 // One PAC and one AUT instructions
9169 NumBytesToCreateFrame += 8;
9170
9171 // PAuth is enabled - set extra tail call cost, if any.
9172 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9173 MF: *RepeatedSequenceLocs[0].getMF());
9174 NumBytesToCheckLRInTCEpilogue =
9175 AArch64PAuth::getCheckerSizeInBytes(Method: LRCheckMethod);
9176 // Checking the authenticated LR value may significantly impact
9177 // SequenceSize, so account for it for more precise results.
9178 if (isTailCallReturnInst(MI: RepeatedSequenceLocs[0].back()))
9179 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9180
9181 // We have to check if sp modifying instructions would get outlined.
9182 // If so we only allow outlining if sp is unchanged overall, so matching
9183 // sub and add instructions are okay to outline, all other sp modifications
9184 // are not
9185 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9186 int SPValue = 0;
9187 for (auto &MI : C) {
9188 if (MI.modifiesRegister(Reg: AArch64::SP, TRI: &TRI)) {
9189 switch (MI.getOpcode()) {
9190 case AArch64::ADDXri:
9191 case AArch64::ADDWri:
9192 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9193 assert(MI.getOperand(2).isImm() &&
9194 "Expected operand to be immediate");
9195 assert(MI.getOperand(1).isReg() &&
9196 "Expected operand to be a register");
9197 // Check if the add just increments sp. If so, we search for
9198 // matching sub instructions that decrement sp. If not, the
9199 // modification is illegal
9200 if (MI.getOperand(i: 1).getReg() == AArch64::SP)
9201 SPValue += MI.getOperand(i: 2).getImm();
9202 else
9203 return true;
9204 break;
9205 case AArch64::SUBXri:
9206 case AArch64::SUBWri:
9207 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9208 assert(MI.getOperand(2).isImm() &&
9209 "Expected operand to be immediate");
9210 assert(MI.getOperand(1).isReg() &&
9211 "Expected operand to be a register");
9212 // Check if the sub just decrements sp. If so, we search for
9213 // matching add instructions that increment sp. If not, the
9214 // modification is illegal
9215 if (MI.getOperand(i: 1).getReg() == AArch64::SP)
9216 SPValue -= MI.getOperand(i: 2).getImm();
9217 else
9218 return true;
9219 break;
9220 default:
9221 return true;
9222 }
9223 }
9224 }
9225 if (SPValue)
9226 return true;
9227 return false;
9228 };
9229 // Remove candidates with illegal stack modifying instructions
9230 llvm::erase_if(C&: RepeatedSequenceLocs, P: hasIllegalSPModification);
9231
9232 // If the sequence doesn't have enough candidates left, then we're done.
9233 if (RepeatedSequenceLocs.size() < MinRepeats)
9234 return std::nullopt;
9235 }
9236
9237 // Properties about candidate MBBs that hold for all of them.
9238 unsigned FlagsSetInAll = 0xF;
9239
9240 // Compute liveness information for each candidate, and set FlagsSetInAll.
9241 for (outliner::Candidate &C : RepeatedSequenceLocs)
9242 FlagsSetInAll &= C.Flags;
9243
9244 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9245
9246 // Helper lambda which sets call information for every candidate.
9247 auto SetCandidateCallInfo =
9248 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9249 for (outliner::Candidate &C : RepeatedSequenceLocs)
9250 C.setCallInfo(CID: CallID, CO: NumBytesForCall);
9251 };
9252
9253 unsigned FrameID = MachineOutlinerDefault;
9254 NumBytesToCreateFrame += 4;
9255
9256 bool HasBTI = any_of(Range&: RepeatedSequenceLocs, P: [](outliner::Candidate &C) {
9257 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9258 });
9259
9260 // We check to see if CFI Instructions are present, and if they are
9261 // we find the number of CFI Instructions in the candidates.
9262 unsigned CFICount = 0;
9263 for (auto &I : RepeatedSequenceLocs[0]) {
9264 if (I.isCFIInstruction())
9265 CFICount++;
9266 }
9267
9268 // We compare the number of found CFI Instructions to the number of CFI
9269 // instructions in the parent function for each candidate. We must check this
9270 // since if we outline one of the CFI instructions in a function, we have to
9271 // outline them all for correctness. If we do not, the address offsets will be
9272 // incorrect between the two sections of the program.
9273 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9274 std::vector<MCCFIInstruction> CFIInstructions =
9275 C.getMF()->getFrameInstructions();
9276
9277 if (CFICount > 0 && CFICount != CFIInstructions.size())
9278 return std::nullopt;
9279 }
9280
9281 // Returns true if an instructions is safe to fix up, false otherwise.
9282 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9283 if (MI.isCall())
9284 return true;
9285
9286 if (!MI.modifiesRegister(Reg: AArch64::SP, TRI: &TRI) &&
9287 !MI.readsRegister(Reg: AArch64::SP, TRI: &TRI))
9288 return true;
9289
9290 // Any modification of SP will break our code to save/restore LR.
9291 // FIXME: We could handle some instructions which add a constant
9292 // offset to SP, with a bit more work.
9293 if (MI.modifiesRegister(Reg: AArch64::SP, TRI: &TRI))
9294 return false;
9295
9296 // At this point, we have a stack instruction that we might need to
9297 // fix up. We'll handle it if it's a load or store.
9298 if (MI.mayLoadOrStore()) {
9299 const MachineOperand *Base; // Filled with the base operand of MI.
9300 int64_t Offset; // Filled with the offset of MI.
9301 bool OffsetIsScalable;
9302
9303 // Does it allow us to offset the base operand and is the base the
9304 // register SP?
9305 if (!getMemOperandWithOffset(MI, BaseOp&: Base, Offset, OffsetIsScalable, TRI: &TRI) ||
9306 !Base->isReg() || Base->getReg() != AArch64::SP)
9307 return false;
9308
9309 // Fixe-up code below assumes bytes.
9310 if (OffsetIsScalable)
9311 return false;
9312
9313 // Find the minimum/maximum offset for this instruction and check
9314 // if fixing it up would be in range.
9315 int64_t MinOffset,
9316 MaxOffset; // Unscaled offsets for the instruction.
9317 // The scale to multiply the offsets by.
9318 TypeSize Scale(0U, false), DummyWidth(0U, false);
9319 getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width&: DummyWidth, MinOffset, MaxOffset);
9320
9321 Offset += 16; // Update the offset to what it would be if we outlined.
9322 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9323 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9324 return false;
9325
9326 // It's in range, so we can outline it.
9327 return true;
9328 }
9329
9330 // FIXME: Add handling for instructions like "add x0, sp, #8".
9331
9332 // We can't fix it up, so don't outline it.
9333 return false;
9334 };
9335
9336 // True if it's possible to fix up each stack instruction in this sequence.
9337 // Important for frames/call variants that modify the stack.
9338 bool AllStackInstrsSafe =
9339 llvm::all_of(Range&: RepeatedSequenceLocs[0], P: IsSafeToFixup);
9340
9341 // If the last instruction in any candidate is a terminator, then we should
9342 // tail call all of the candidates.
9343 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9344 FrameID = MachineOutlinerTailCall;
9345 NumBytesToCreateFrame = 0;
9346 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9347 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9348 }
9349
9350 else if (LastInstrOpcode == AArch64::BL ||
9351 ((LastInstrOpcode == AArch64::BLR ||
9352 LastInstrOpcode == AArch64::BLRNoIP) &&
9353 !HasBTI)) {
9354 // FIXME: Do we need to check if the code after this uses the value of LR?
9355 FrameID = MachineOutlinerThunk;
9356 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9357 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9358 }
9359
9360 else {
9361 // We need to decide how to emit calls + frames. We can always emit the same
9362 // frame if we don't need to save to the stack. If we have to save to the
9363 // stack, then we need a different frame.
9364 unsigned NumBytesNoStackCalls = 0;
9365 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9366
9367 // Check if we have to save LR.
9368 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9369 bool LRAvailable =
9370 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
9371 ? C.isAvailableAcrossAndOutOfSeq(Reg: AArch64::LR, TRI)
9372 : true;
9373 // If we have a noreturn caller, then we're going to be conservative and
9374 // say that we have to save LR. If we don't have a ret at the end of the
9375 // block, then we can't reason about liveness accurately.
9376 //
9377 // FIXME: We can probably do better than always disabling this in
9378 // noreturn functions by fixing up the liveness info.
9379 bool IsNoReturn =
9380 C.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoReturn);
9381
9382 // Is LR available? If so, we don't need a save.
9383 if (LRAvailable && !IsNoReturn) {
9384 NumBytesNoStackCalls += 4;
9385 C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: 4);
9386 CandidatesWithoutStackFixups.push_back(x: C);
9387 }
9388
9389 // Is an unused register available? If so, we won't modify the stack, so
9390 // we can outline with the same frame type as those that don't save LR.
9391 else if (findRegisterToSaveLRTo(C)) {
9392 NumBytesNoStackCalls += 12;
9393 C.setCallInfo(CID: MachineOutlinerRegSave, CO: 12);
9394 CandidatesWithoutStackFixups.push_back(x: C);
9395 }
9396
9397 // Is SP used in the sequence at all? If not, we don't have to modify
9398 // the stack, so we are guaranteed to get the same frame.
9399 else if (C.isAvailableInsideSeq(Reg: AArch64::SP, TRI)) {
9400 NumBytesNoStackCalls += 12;
9401 C.setCallInfo(CID: MachineOutlinerDefault, CO: 12);
9402 CandidatesWithoutStackFixups.push_back(x: C);
9403 }
9404
9405 // If we outline this, we need to modify the stack. Pretend we don't
9406 // outline this by saving all of its bytes.
9407 else {
9408 NumBytesNoStackCalls += SequenceSize;
9409 }
9410 }
9411
9412 // If there are no places where we have to save LR, then note that we
9413 // don't have to update the stack. Otherwise, give every candidate the
9414 // default call type, as long as it's safe to do so.
9415 if (!AllStackInstrsSafe ||
9416 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9417 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9418 FrameID = MachineOutlinerNoLRSave;
9419 if (RepeatedSequenceLocs.size() < MinRepeats)
9420 return std::nullopt;
9421 } else {
9422 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9423
9424 // Bugzilla ID: 46767
9425 // TODO: Check if fixing up the stack more than once is safe so we can
9426 // outline these.
9427 //
9428 // An outline resulting in a caller that requires stack fixups at the
9429 // callsite to a callee that also requires stack fixups can happen when
9430 // there are no available registers at the candidate callsite for a
9431 // candidate that itself also has calls.
9432 //
9433 // In other words if function_containing_sequence in the following pseudo
9434 // assembly requires that we save LR at the point of the call, but there
9435 // are no available registers: in this case we save using SP and as a
9436 // result the SP offsets requires stack fixups by multiples of 16.
9437 //
9438 // function_containing_sequence:
9439 // ...
9440 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9441 // call OUTLINED_FUNCTION_N
9442 // restore LR from SP
9443 // ...
9444 //
9445 // OUTLINED_FUNCTION_N:
9446 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9447 // ...
9448 // bl foo
9449 // restore LR from SP
9450 // ret
9451 //
9452 // Because the code to handle more than one stack fixup does not
9453 // currently have the proper checks for legality, these cases will assert
9454 // in the AArch64 MachineOutliner. This is because the code to do this
9455 // needs more hardening, testing, better checks that generated code is
9456 // legal, etc and because it is only verified to handle a single pass of
9457 // stack fixup.
9458 //
9459 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9460 // these cases until they are known to be handled. Bugzilla 46767 is
9461 // referenced in comments at the assert site.
9462 //
9463 // To avoid asserting (or generating non-legal code on noassert builds)
9464 // we remove all candidates which would need more than one stack fixup by
9465 // pruning the cases where the candidate has calls while also having no
9466 // available LR and having no available general purpose registers to copy
9467 // LR to (ie one extra stack save/restore).
9468 //
9469 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9470 erase_if(C&: RepeatedSequenceLocs, P: [this, &TRI](outliner::Candidate &C) {
9471 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9472 return (llvm::any_of(Range&: C, P: IsCall)) &&
9473 (!C.isAvailableAcrossAndOutOfSeq(Reg: AArch64::LR, TRI) ||
9474 !findRegisterToSaveLRTo(C));
9475 });
9476 }
9477 }
9478
9479 // If we dropped all of the candidates, bail out here.
9480 if (RepeatedSequenceLocs.size() < MinRepeats)
9481 return std::nullopt;
9482 }
9483
9484 // Does every candidate's MBB contain a call? If so, then we might have a call
9485 // in the range.
9486 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9487 // Check if the range contains a call. These require a save + restore of the
9488 // link register.
9489 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9490 bool ModStackToSaveLR = false;
9491 if (any_of(Range: drop_end(RangeOrContainer&: FirstCand),
9492 P: [](const MachineInstr &MI) { return MI.isCall(); }))
9493 ModStackToSaveLR = true;
9494
9495 // Handle the last instruction separately. If this is a tail call, then the
9496 // last instruction is a call. We don't want to save + restore in this case.
9497 // However, it could be possible that the last instruction is a call without
9498 // it being valid to tail call this sequence. We should consider this as
9499 // well.
9500 else if (FrameID != MachineOutlinerThunk &&
9501 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9502 ModStackToSaveLR = true;
9503
9504 if (ModStackToSaveLR) {
9505 // We can't fix up the stack. Bail out.
9506 if (!AllStackInstrsSafe)
9507 return std::nullopt;
9508
9509 // Save + restore LR.
9510 NumBytesToCreateFrame += 8;
9511 }
9512 }
9513
9514 // If we have CFI instructions, we can only outline if the outlined section
9515 // can be a tail call
9516 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9517 return std::nullopt;
9518
9519 return std::make_unique<outliner::OutlinedFunction>(
9520 args&: RepeatedSequenceLocs, args&: SequenceSize, args&: NumBytesToCreateFrame, args&: FrameID);
9521}
9522
9523void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9524 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9525 // If a bunch of candidates reach this point they must agree on their return
9526 // address signing. It is therefore enough to just consider the signing
9527 // behaviour of one of them
9528 const auto &CFn = Candidates.front().getMF()->getFunction();
9529
9530 if (CFn.hasFnAttribute(Kind: "ptrauth-returns"))
9531 F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "ptrauth-returns"));
9532 if (CFn.hasFnAttribute(Kind: "ptrauth-auth-traps"))
9533 F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "ptrauth-auth-traps"));
9534 // Since all candidates belong to the same module, just copy the
9535 // function-level attributes of an arbitrary function.
9536 if (CFn.hasFnAttribute(Kind: "sign-return-address"))
9537 F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address"));
9538 if (CFn.hasFnAttribute(Kind: "sign-return-address-key"))
9539 F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address-key"));
9540
9541 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9542}
9543
9544bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9545 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9546 const Function &F = MF.getFunction();
9547
9548 // Can F be deduplicated by the linker? If it can, don't outline from it.
9549 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9550 return false;
9551
9552 // Don't outline from functions with section markings; the program could
9553 // expect that all the code is in the named section.
9554 // FIXME: Allow outlining from multiple functions with the same section
9555 // marking.
9556 if (F.hasSection())
9557 return false;
9558
9559 // Outlining from functions with redzones is unsafe since the outliner may
9560 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9561 // outline from it.
9562 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
9563 if (!AFI || AFI->hasRedZone().value_or(u: true))
9564 return false;
9565
9566 // FIXME: Determine whether it is safe to outline from functions which contain
9567 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9568 // outlined together and ensure it is safe to outline with async unwind info,
9569 // required for saving & restoring VG around calls.
9570 if (AFI->hasStreamingModeChanges())
9571 return false;
9572
9573 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9574 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
9575 return false;
9576
9577 // It's safe to outline from MF.
9578 return true;
9579}
9580
9581SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9582AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
9583 unsigned &Flags) const {
9584 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
9585 "Must track liveness!");
9586 SmallVector<
9587 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9588 Ranges;
9589 // According to the AArch64 Procedure Call Standard, the following are
9590 // undefined on entry/exit from a function call:
9591 //
9592 // * Registers x16, x17, (and thus w16, w17)
9593 // * Condition codes (and thus the NZCV register)
9594 //
9595 // If any of these registers are used inside or live across an outlined
9596 // function, then they may be modified later, either by the compiler or
9597 // some other tool (like the linker).
9598 //
9599 // To avoid outlining in these situations, partition each block into ranges
9600 // where these registers are dead. We will only outline from those ranges.
9601 LiveRegUnits LRU(getRegisterInfo());
9602 auto AreAllUnsafeRegsDead = [&LRU]() {
9603 return LRU.available(Reg: AArch64::W16) && LRU.available(Reg: AArch64::W17) &&
9604 LRU.available(Reg: AArch64::NZCV);
9605 };
9606
9607 // We need to know if LR is live across an outlining boundary later on in
9608 // order to decide how we'll create the outlined call, frame, etc.
9609 //
9610 // It's pretty expensive to check this for *every candidate* within a block.
9611 // That's some potentially n^2 behaviour, since in the worst case, we'd need
9612 // to compute liveness from the end of the block for O(n) candidates within
9613 // the block.
9614 //
9615 // So, to improve the average case, let's keep track of liveness from the end
9616 // of the block to the beginning of *every outlinable range*. If we know that
9617 // LR is available in every range we could outline from, then we know that
9618 // we don't need to check liveness for any candidate within that range.
9619 bool LRAvailableEverywhere = true;
9620 // Compute liveness bottom-up.
9621 LRU.addLiveOuts(MBB);
9622 // Update flags that require info about the entire MBB.
9623 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
9624 if (MI.isCall() && !MI.isTerminator())
9625 Flags |= MachineOutlinerMBBFlags::HasCalls;
9626 };
9627 // Range: [RangeBegin, RangeEnd)
9628 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
9629 unsigned RangeLen;
9630 auto CreateNewRangeStartingAt =
9631 [&RangeBegin, &RangeEnd,
9632 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
9633 RangeBegin = NewBegin;
9634 RangeEnd = std::next(x: RangeBegin);
9635 RangeLen = 0;
9636 };
9637 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
9638 // At least one unsafe register is not dead. We do not want to outline at
9639 // this point. If it is long enough to outline from, save the range
9640 // [RangeBegin, RangeEnd).
9641 if (RangeLen > 1)
9642 Ranges.push_back(Elt: std::make_pair(x&: RangeBegin, y&: RangeEnd));
9643 };
9644 // Find the first point where all unsafe registers are dead.
9645 // FIND: <safe instr> <-- end of first potential range
9646 // SKIP: <unsafe def>
9647 // SKIP: ... everything between ...
9648 // SKIP: <unsafe use>
9649 auto FirstPossibleEndPt = MBB.instr_rbegin();
9650 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
9651 LRU.stepBackward(MI: *FirstPossibleEndPt);
9652 // Update flags that impact how we outline across the entire block,
9653 // regardless of safety.
9654 UpdateWholeMBBFlags(*FirstPossibleEndPt);
9655 if (AreAllUnsafeRegsDead())
9656 break;
9657 }
9658 // If we exhausted the entire block, we have no safe ranges to outline.
9659 if (FirstPossibleEndPt == MBB.instr_rend())
9660 return Ranges;
9661 // Current range.
9662 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
9663 // StartPt points to the first place where all unsafe registers
9664 // are dead (if there is any such point). Begin partitioning the MBB into
9665 // ranges.
9666 for (auto &MI : make_range(x: FirstPossibleEndPt, y: MBB.instr_rend())) {
9667 LRU.stepBackward(MI);
9668 UpdateWholeMBBFlags(MI);
9669 if (!AreAllUnsafeRegsDead()) {
9670 SaveRangeIfNonEmpty();
9671 CreateNewRangeStartingAt(MI.getIterator());
9672 continue;
9673 }
9674 LRAvailableEverywhere &= LRU.available(Reg: AArch64::LR);
9675 RangeBegin = MI.getIterator();
9676 ++RangeLen;
9677 }
9678 // Above loop misses the last (or only) range. If we are still safe, then
9679 // let's save the range.
9680 if (AreAllUnsafeRegsDead())
9681 SaveRangeIfNonEmpty();
9682 if (Ranges.empty())
9683 return Ranges;
9684 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
9685 // the order.
9686 std::reverse(first: Ranges.begin(), last: Ranges.end());
9687 // If there is at least one outlinable range where LR is unavailable
9688 // somewhere, remember that.
9689 if (!LRAvailableEverywhere)
9690 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
9691 return Ranges;
9692}
9693
9694outliner::InstrType
9695AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
9696 MachineBasicBlock::iterator &MIT,
9697 unsigned Flags) const {
9698 MachineInstr &MI = *MIT;
9699
9700 // Don't outline anything used for return address signing. The outlined
9701 // function will get signed later if needed
9702 switch (MI.getOpcode()) {
9703 case AArch64::PACM:
9704 case AArch64::PACIASP:
9705 case AArch64::PACIBSP:
9706 case AArch64::PACIASPPC:
9707 case AArch64::PACIBSPPC:
9708 case AArch64::AUTIASP:
9709 case AArch64::AUTIBSP:
9710 case AArch64::AUTIASPPCi:
9711 case AArch64::AUTIASPPCr:
9712 case AArch64::AUTIBSPPCi:
9713 case AArch64::AUTIBSPPCr:
9714 case AArch64::RETAA:
9715 case AArch64::RETAB:
9716 case AArch64::RETAASPPCi:
9717 case AArch64::RETAASPPCr:
9718 case AArch64::RETABSPPCi:
9719 case AArch64::RETABSPPCr:
9720 case AArch64::EMITBKEY:
9721 case AArch64::PAUTH_PROLOGUE:
9722 case AArch64::PAUTH_EPILOGUE:
9723 return outliner::InstrType::Illegal;
9724 }
9725
9726 // We can only outline these if we will tail call the outlined function, or
9727 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
9728 // in a tail call.
9729 //
9730 // FIXME: If the proper fixups for the offset are implemented, this should be
9731 // possible.
9732 if (MI.isCFIInstruction())
9733 return outliner::InstrType::Legal;
9734
9735 // Is this a terminator for a basic block?
9736 if (MI.isTerminator())
9737 // TargetInstrInfo::getOutliningType has already filtered out anything
9738 // that would break this, so we can allow it here.
9739 return outliner::InstrType::Legal;
9740
9741 // Make sure none of the operands are un-outlinable.
9742 for (const MachineOperand &MOP : MI.operands()) {
9743 // A check preventing CFI indices was here before, but only CFI
9744 // instructions should have those.
9745 assert(!MOP.isCFIIndex());
9746
9747 // If it uses LR or W30 explicitly, then don't touch it.
9748 if (MOP.isReg() && !MOP.isImplicit() &&
9749 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
9750 return outliner::InstrType::Illegal;
9751 }
9752
9753 // Special cases for instructions that can always be outlined, but will fail
9754 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
9755 // be outlined because they don't require a *specific* value to be in LR.
9756 if (MI.getOpcode() == AArch64::ADRP)
9757 return outliner::InstrType::Legal;
9758
9759 // If MI is a call we might be able to outline it. We don't want to outline
9760 // any calls that rely on the position of items on the stack. When we outline
9761 // something containing a call, we have to emit a save and restore of LR in
9762 // the outlined function. Currently, this always happens by saving LR to the
9763 // stack. Thus, if we outline, say, half the parameters for a function call
9764 // plus the call, then we'll break the callee's expectations for the layout
9765 // of the stack.
9766 //
9767 // FIXME: Allow calls to functions which construct a stack frame, as long
9768 // as they don't access arguments on the stack.
9769 // FIXME: Figure out some way to analyze functions defined in other modules.
9770 // We should be able to compute the memory usage based on the IR calling
9771 // convention, even if we can't see the definition.
9772 if (MI.isCall()) {
9773 // Get the function associated with the call. Look at each operand and find
9774 // the one that represents the callee and get its name.
9775 const Function *Callee = nullptr;
9776 for (const MachineOperand &MOP : MI.operands()) {
9777 if (MOP.isGlobal()) {
9778 Callee = dyn_cast<Function>(Val: MOP.getGlobal());
9779 break;
9780 }
9781 }
9782
9783 // Never outline calls to mcount. There isn't any rule that would require
9784 // this, but the Linux kernel's "ftrace" feature depends on it.
9785 if (Callee && Callee->getName() == "\01_mcount")
9786 return outliner::InstrType::Illegal;
9787
9788 // If we don't know anything about the callee, assume it depends on the
9789 // stack layout of the caller. In that case, it's only legal to outline
9790 // as a tail-call. Explicitly list the call instructions we know about so we
9791 // don't get unexpected results with call pseudo-instructions.
9792 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
9793 if (MI.getOpcode() == AArch64::BLR ||
9794 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
9795 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
9796
9797 if (!Callee)
9798 return UnknownCallOutlineType;
9799
9800 // We have a function we have information about. Check it if it's something
9801 // can safely outline.
9802 MachineFunction *CalleeMF = MMI.getMachineFunction(F: *Callee);
9803
9804 // We don't know what's going on with the callee at all. Don't touch it.
9805 if (!CalleeMF)
9806 return UnknownCallOutlineType;
9807
9808 // Check if we know anything about the callee saves on the function. If we
9809 // don't, then don't touch it, since that implies that we haven't
9810 // computed anything about its stack frame yet.
9811 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
9812 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
9813 MFI.getNumObjects() > 0)
9814 return UnknownCallOutlineType;
9815
9816 // At this point, we can say that CalleeMF ought to not pass anything on the
9817 // stack. Therefore, we can outline it.
9818 return outliner::InstrType::Legal;
9819 }
9820
9821 // Don't touch the link register or W30.
9822 if (MI.readsRegister(Reg: AArch64::W30, TRI: &getRegisterInfo()) ||
9823 MI.modifiesRegister(Reg: AArch64::W30, TRI: &getRegisterInfo()))
9824 return outliner::InstrType::Illegal;
9825
9826 // Don't outline BTI instructions, because that will prevent the outlining
9827 // site from being indirectly callable.
9828 if (hasBTISemantics(MI))
9829 return outliner::InstrType::Illegal;
9830
9831 return outliner::InstrType::Legal;
9832}
9833
9834void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
9835 for (MachineInstr &MI : MBB) {
9836 const MachineOperand *Base;
9837 TypeSize Width(0, false);
9838 int64_t Offset;
9839 bool OffsetIsScalable;
9840
9841 // Is this a load or store with an immediate offset with SP as the base?
9842 if (!MI.mayLoadOrStore() ||
9843 !getMemOperandWithOffsetWidth(LdSt: MI, BaseOp&: Base, Offset, OffsetIsScalable, Width,
9844 TRI: &RI) ||
9845 (Base->isReg() && Base->getReg() != AArch64::SP))
9846 continue;
9847
9848 // It is, so we have to fix it up.
9849 TypeSize Scale(0U, false);
9850 int64_t Dummy1, Dummy2;
9851
9852 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(LdSt&: MI);
9853 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9854 getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width, MinOffset&: Dummy1, MaxOffset&: Dummy2);
9855 assert(Scale != 0 && "Unexpected opcode!");
9856 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9857
9858 // We've pushed the return address to the stack, so add 16 to the offset.
9859 // This is safe, since we already checked if it would overflow when we
9860 // checked if this instruction was legal to outline.
9861 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
9862 StackOffsetOperand.setImm(NewImm);
9863 }
9864}
9865
9866static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
9867 const AArch64InstrInfo *TII,
9868 bool ShouldSignReturnAddr) {
9869 if (!ShouldSignReturnAddr)
9870 return;
9871
9872 BuildMI(BB&: MBB, I: MBB.begin(), MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::PAUTH_PROLOGUE))
9873 .setMIFlag(MachineInstr::FrameSetup);
9874 BuildMI(BB&: MBB, I: MBB.getFirstInstrTerminator(), MIMD: DebugLoc(),
9875 MCID: TII->get(Opcode: AArch64::PAUTH_EPILOGUE))
9876 .setMIFlag(MachineInstr::FrameDestroy);
9877}
9878
9879void AArch64InstrInfo::buildOutlinedFrame(
9880 MachineBasicBlock &MBB, MachineFunction &MF,
9881 const outliner::OutlinedFunction &OF) const {
9882
9883 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
9884
9885 if (OF.FrameConstructionID == MachineOutlinerTailCall)
9886 FI->setOutliningStyle("Tail Call");
9887 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
9888 // For thunk outlining, rewrite the last instruction from a call to a
9889 // tail-call.
9890 MachineInstr *Call = &*--MBB.instr_end();
9891 unsigned TailOpcode;
9892 if (Call->getOpcode() == AArch64::BL) {
9893 TailOpcode = AArch64::TCRETURNdi;
9894 } else {
9895 assert(Call->getOpcode() == AArch64::BLR ||
9896 Call->getOpcode() == AArch64::BLRNoIP);
9897 TailOpcode = AArch64::TCRETURNriALL;
9898 }
9899 MachineInstr *TC = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: TailOpcode))
9900 .add(MO: Call->getOperand(i: 0))
9901 .addImm(Val: 0);
9902 MBB.insert(I: MBB.end(), MI: TC);
9903 Call->eraseFromParent();
9904
9905 FI->setOutliningStyle("Thunk");
9906 }
9907
9908 bool IsLeafFunction = true;
9909
9910 // Is there a call in the outlined range?
9911 auto IsNonTailCall = [](const MachineInstr &MI) {
9912 return MI.isCall() && !MI.isReturn();
9913 };
9914
9915 if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) {
9916 // Fix up the instructions in the range, since we're going to modify the
9917 // stack.
9918
9919 // Bugzilla ID: 46767
9920 // TODO: Check if fixing up twice is safe so we can outline these.
9921 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
9922 "Can only fix up stack references once");
9923 fixupPostOutline(MBB);
9924
9925 IsLeafFunction = false;
9926
9927 // LR has to be a live in so that we can save it.
9928 if (!MBB.isLiveIn(Reg: AArch64::LR))
9929 MBB.addLiveIn(PhysReg: AArch64::LR);
9930
9931 MachineBasicBlock::iterator It = MBB.begin();
9932 MachineBasicBlock::iterator Et = MBB.end();
9933
9934 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
9935 OF.FrameConstructionID == MachineOutlinerThunk)
9936 Et = std::prev(x: MBB.end());
9937
9938 // Insert a save before the outlined region
9939 MachineInstr *STRXpre = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::STRXpre))
9940 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
9941 .addReg(RegNo: AArch64::LR)
9942 .addReg(RegNo: AArch64::SP)
9943 .addImm(Val: -16);
9944 It = MBB.insert(I: It, MI: STRXpre);
9945
9946 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
9947 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
9948
9949 // Add a CFI saying the stack was moved 16 B down.
9950 CFIBuilder.buildDefCFAOffset(Offset: 16);
9951
9952 // Add a CFI saying that the LR that we want to find is now 16 B higher
9953 // than before.
9954 CFIBuilder.buildOffset(Reg: AArch64::LR, Offset: -16);
9955 }
9956
9957 // Insert a restore before the terminator for the function.
9958 MachineInstr *LDRXpost = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::LDRXpost))
9959 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
9960 .addReg(RegNo: AArch64::LR, flags: RegState::Define)
9961 .addReg(RegNo: AArch64::SP)
9962 .addImm(Val: 16);
9963 Et = MBB.insert(I: Et, MI: LDRXpost);
9964 }
9965
9966 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(SpillsLR: !IsLeafFunction);
9967
9968 // If this is a tail call outlined function, then there's already a return.
9969 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
9970 OF.FrameConstructionID == MachineOutlinerThunk) {
9971 signOutlinedFunction(MF, MBB, TII: this, ShouldSignReturnAddr);
9972 return;
9973 }
9974
9975 // It's not a tail call, so we have to insert the return ourselves.
9976
9977 // LR has to be a live in so that we can return to it.
9978 if (!MBB.isLiveIn(Reg: AArch64::LR))
9979 MBB.addLiveIn(PhysReg: AArch64::LR);
9980
9981 MachineInstr *ret = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::RET))
9982 .addReg(RegNo: AArch64::LR);
9983 MBB.insert(I: MBB.end(), MI: ret);
9984
9985 signOutlinedFunction(MF, MBB, TII: this, ShouldSignReturnAddr);
9986
9987 FI->setOutliningStyle("Function");
9988
9989 // Did we have to modify the stack by saving the link register?
9990 if (OF.FrameConstructionID != MachineOutlinerDefault)
9991 return;
9992
9993 // We modified the stack.
9994 // Walk over the basic block and fix up all the stack accesses.
9995 fixupPostOutline(MBB);
9996}
9997
9998MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
9999 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
10000 MachineFunction &MF, outliner::Candidate &C) const {
10001
10002 // Are we tail calling?
10003 if (C.CallConstructionID == MachineOutlinerTailCall) {
10004 // If yes, then we can just branch to the label.
10005 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::TCRETURNdi))
10006 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()))
10007 .addImm(Val: 0));
10008 return It;
10009 }
10010
10011 // Are we saving the link register?
10012 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10013 C.CallConstructionID == MachineOutlinerThunk) {
10014 // No, so just insert the call.
10015 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::BL))
10016 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())));
10017 return It;
10018 }
10019
10020 // We want to return the spot where we inserted the call.
10021 MachineBasicBlock::iterator CallPt;
10022
10023 // Instructions for saving and restoring LR around the call instruction we're
10024 // going to insert.
10025 MachineInstr *Save;
10026 MachineInstr *Restore;
10027 // Can we save to a register?
10028 if (C.CallConstructionID == MachineOutlinerRegSave) {
10029 // FIXME: This logic should be sunk into a target-specific interface so that
10030 // we don't have to recompute the register.
10031 Register Reg = findRegisterToSaveLRTo(C);
10032 assert(Reg && "No callee-saved register available?");
10033
10034 // LR has to be a live in so that we can save it.
10035 if (!MBB.isLiveIn(Reg: AArch64::LR))
10036 MBB.addLiveIn(PhysReg: AArch64::LR);
10037
10038 // Save and restore LR from Reg.
10039 Save = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::ORRXrs), DestReg: Reg)
10040 .addReg(RegNo: AArch64::XZR)
10041 .addReg(RegNo: AArch64::LR)
10042 .addImm(Val: 0);
10043 Restore = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::ORRXrs), DestReg: AArch64::LR)
10044 .addReg(RegNo: AArch64::XZR)
10045 .addReg(RegNo: Reg)
10046 .addImm(Val: 0);
10047 } else {
10048 // We have the default case. Save and restore from SP.
10049 Save = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::STRXpre))
10050 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
10051 .addReg(RegNo: AArch64::LR)
10052 .addReg(RegNo: AArch64::SP)
10053 .addImm(Val: -16);
10054 Restore = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::LDRXpost))
10055 .addReg(RegNo: AArch64::SP, flags: RegState::Define)
10056 .addReg(RegNo: AArch64::LR, flags: RegState::Define)
10057 .addReg(RegNo: AArch64::SP)
10058 .addImm(Val: 16);
10059 }
10060
10061 It = MBB.insert(I: It, MI: Save);
10062 It++;
10063
10064 // Insert the call.
10065 It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: AArch64::BL))
10066 .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())));
10067 CallPt = It;
10068 It++;
10069
10070 It = MBB.insert(I: It, MI: Restore);
10071 return CallPt;
10072}
10073
10074bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10075 MachineFunction &MF) const {
10076 return MF.getFunction().hasMinSize();
10077}
10078
10079void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10080 MachineBasicBlock::iterator Iter,
10081 DebugLoc &DL,
10082 bool AllowSideEffects) const {
10083 const MachineFunction &MF = *MBB.getParent();
10084 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10085 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10086
10087 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10088 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::MOVZXi), DestReg: Reg).addImm(Val: 0).addImm(Val: 0);
10089 } else if (STI.isSVEorStreamingSVEAvailable()) {
10090 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::DUP_ZI_D), DestReg: Reg)
10091 .addImm(Val: 0)
10092 .addImm(Val: 0);
10093 } else if (STI.isNeonAvailable()) {
10094 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::MOVIv2d_ns), DestReg: Reg)
10095 .addImm(Val: 0);
10096 } else {
10097 // This is a streaming-compatible function without SVE. We don't have full
10098 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10099 // So given `movi v..` would be illegal use `fmov d..` instead.
10100 assert(STI.hasNEON() && "Expected to have NEON.");
10101 Register Reg64 = TRI.getSubReg(Reg, Idx: AArch64::dsub);
10102 BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::FMOVD0), DestReg: Reg64);
10103 }
10104}
10105
10106std::optional<DestSourcePair>
10107AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
10108
10109 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10110 // and zero immediate operands used as an alias for mov instruction.
10111 if (((MI.getOpcode() == AArch64::ORRWrs &&
10112 MI.getOperand(i: 1).getReg() == AArch64::WZR &&
10113 MI.getOperand(i: 3).getImm() == 0x0) ||
10114 (MI.getOpcode() == AArch64::ORRWrr &&
10115 MI.getOperand(i: 1).getReg() == AArch64::WZR)) &&
10116 // Check that the w->w move is not a zero-extending w->x mov.
10117 (!MI.getOperand(i: 0).getReg().isVirtual() ||
10118 MI.getOperand(i: 0).getSubReg() == 0) &&
10119 (!MI.getOperand(i: 0).getReg().isPhysical() ||
10120 MI.findRegisterDefOperandIdx(Reg: getXRegFromWReg(Reg: MI.getOperand(i: 0).getReg()),
10121 /*TRI=*/nullptr) == -1))
10122 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
10123
10124 if (MI.getOpcode() == AArch64::ORRXrs &&
10125 MI.getOperand(i: 1).getReg() == AArch64::XZR &&
10126 MI.getOperand(i: 3).getImm() == 0x0)
10127 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
10128
10129 return std::nullopt;
10130}
10131
10132std::optional<DestSourcePair>
10133AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const {
10134 if ((MI.getOpcode() == AArch64::ORRWrs &&
10135 MI.getOperand(i: 1).getReg() == AArch64::WZR &&
10136 MI.getOperand(i: 3).getImm() == 0x0) ||
10137 (MI.getOpcode() == AArch64::ORRWrr &&
10138 MI.getOperand(i: 1).getReg() == AArch64::WZR))
10139 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 2)};
10140 return std::nullopt;
10141}
10142
10143std::optional<RegImmPair>
10144AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10145 int Sign = 1;
10146 int64_t Offset = 0;
10147
10148 // TODO: Handle cases where Reg is a super- or sub-register of the
10149 // destination register.
10150 const MachineOperand &Op0 = MI.getOperand(i: 0);
10151 if (!Op0.isReg() || Reg != Op0.getReg())
10152 return std::nullopt;
10153
10154 switch (MI.getOpcode()) {
10155 default:
10156 return std::nullopt;
10157 case AArch64::SUBWri:
10158 case AArch64::SUBXri:
10159 case AArch64::SUBSWri:
10160 case AArch64::SUBSXri:
10161 Sign *= -1;
10162 [[fallthrough]];
10163 case AArch64::ADDSWri:
10164 case AArch64::ADDSXri:
10165 case AArch64::ADDWri:
10166 case AArch64::ADDXri: {
10167 // TODO: Third operand can be global address (usually some string).
10168 if (!MI.getOperand(i: 0).isReg() || !MI.getOperand(i: 1).isReg() ||
10169 !MI.getOperand(i: 2).isImm())
10170 return std::nullopt;
10171 int Shift = MI.getOperand(i: 3).getImm();
10172 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10173 Offset = Sign * (MI.getOperand(i: 2).getImm() << Shift);
10174 }
10175 }
10176 return RegImmPair{MI.getOperand(i: 1).getReg(), Offset};
10177}
10178
10179/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10180/// the destination register then, if possible, describe the value in terms of
10181/// the source register.
10182static std::optional<ParamLoadedValue>
10183describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
10184 const TargetInstrInfo *TII,
10185 const TargetRegisterInfo *TRI) {
10186 auto DestSrc = TII->isCopyLikeInstr(MI);
10187 if (!DestSrc)
10188 return std::nullopt;
10189
10190 Register DestReg = DestSrc->Destination->getReg();
10191 Register SrcReg = DestSrc->Source->getReg();
10192
10193 auto Expr = DIExpression::get(Context&: MI.getMF()->getFunction().getContext(), Elements: {});
10194
10195 // If the described register is the destination, just return the source.
10196 if (DestReg == DescribedReg)
10197 return ParamLoadedValue(MachineOperand::CreateReg(Reg: SrcReg, isDef: false), Expr);
10198
10199 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10200 if (MI.getOpcode() == AArch64::ORRWrs &&
10201 TRI->isSuperRegister(RegA: DestReg, RegB: DescribedReg))
10202 return ParamLoadedValue(MachineOperand::CreateReg(Reg: SrcReg, isDef: false), Expr);
10203
10204 // We may need to describe the lower part of a ORRXrs move.
10205 if (MI.getOpcode() == AArch64::ORRXrs &&
10206 TRI->isSubRegister(RegA: DestReg, RegB: DescribedReg)) {
10207 Register SrcSubReg = TRI->getSubReg(Reg: SrcReg, Idx: AArch64::sub_32);
10208 return ParamLoadedValue(MachineOperand::CreateReg(Reg: SrcSubReg, isDef: false), Expr);
10209 }
10210
10211 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10212 "Unhandled ORR[XW]rs copy case");
10213
10214 return std::nullopt;
10215}
10216
10217bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10218 // Functions cannot be split to different sections on AArch64 if they have
10219 // a red zone. This is because relaxing a cross-section branch may require
10220 // incrementing the stack pointer to spill a register, which would overwrite
10221 // the red zone.
10222 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(u: true))
10223 return false;
10224
10225 return TargetInstrInfo::isFunctionSafeToSplit(MF);
10226}
10227
10228bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10229 const MachineBasicBlock &MBB) const {
10230 // Asm Goto blocks can contain conditional branches to goto labels, which can
10231 // get moved out of range of the branch instruction.
10232 auto isAsmGoto = [](const MachineInstr &MI) {
10233 return MI.getOpcode() == AArch64::INLINEASM_BR;
10234 };
10235 if (llvm::any_of(Range: MBB, P: isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10236 return false;
10237
10238 // Because jump tables are label-relative instead of table-relative, they all
10239 // must be in the same section or relocation fixup handling will fail.
10240
10241 // Check if MBB is a jump table target
10242 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10243 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10244 return llvm::is_contained(Range: JTE.MBBs, Element: &MBB);
10245 };
10246 if (MJTI != nullptr && llvm::any_of(Range: MJTI->getJumpTables(), P: containsMBB))
10247 return false;
10248
10249 // Check if MBB contains a jump table lookup
10250 for (const MachineInstr &MI : MBB) {
10251 switch (MI.getOpcode()) {
10252 case TargetOpcode::G_BRJT:
10253 case AArch64::JumpTableDest32:
10254 case AArch64::JumpTableDest16:
10255 case AArch64::JumpTableDest8:
10256 return false;
10257 default:
10258 continue;
10259 }
10260 }
10261
10262 // MBB isn't a special case, so it's safe to be split to the cold section.
10263 return true;
10264}
10265
10266std::optional<ParamLoadedValue>
10267AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10268 Register Reg) const {
10269 const MachineFunction *MF = MI.getMF();
10270 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10271 switch (MI.getOpcode()) {
10272 case AArch64::MOVZWi:
10273 case AArch64::MOVZXi: {
10274 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10275 // 64-bit parameters, so we need to consider super-registers.
10276 if (!TRI->isSuperRegisterEq(RegA: MI.getOperand(i: 0).getReg(), RegB: Reg))
10277 return std::nullopt;
10278
10279 if (!MI.getOperand(i: 1).isImm())
10280 return std::nullopt;
10281 int64_t Immediate = MI.getOperand(i: 1).getImm();
10282 int Shift = MI.getOperand(i: 2).getImm();
10283 return ParamLoadedValue(MachineOperand::CreateImm(Val: Immediate << Shift),
10284 nullptr);
10285 }
10286 case AArch64::ORRWrs:
10287 case AArch64::ORRXrs:
10288 return describeORRLoadedValue(MI, DescribedReg: Reg, TII: this, TRI);
10289 }
10290
10291 return TargetInstrInfo::describeLoadedValue(MI, Reg);
10292}
10293
10294bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10295 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10296 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10297 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10298 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10299
10300 // Anyexts are nops.
10301 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10302 return true;
10303
10304 Register DefReg = ExtMI.getOperand(i: 0).getReg();
10305 if (!MRI.hasOneNonDBGUse(RegNo: DefReg))
10306 return false;
10307
10308 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10309 // addressing mode.
10310 auto *UserMI = &*MRI.use_instr_nodbg_begin(RegNo: DefReg);
10311 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10312}
10313
10314uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10315 return get(Opcode: Opc).TSFlags & AArch64::ElementSizeMask;
10316}
10317
10318bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10319 return get(Opcode: Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10320}
10321
10322bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10323 return get(Opcode: Opc).TSFlags & AArch64::InstrFlagIsWhile;
10324}
10325
10326unsigned int
10327AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10328 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10329}
10330
10331bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10332 unsigned Scale) const {
10333 if (Offset && Scale)
10334 return false;
10335
10336 // Check Reg + Imm
10337 if (!Scale) {
10338 // 9-bit signed offset
10339 if (isInt<9>(x: Offset))
10340 return true;
10341
10342 // 12-bit unsigned offset
10343 unsigned Shift = Log2_64(Value: NumBytes);
10344 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10345 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10346 (Offset >> Shift) << Shift == Offset)
10347 return true;
10348 return false;
10349 }
10350
10351 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10352 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10353}
10354
10355unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
10356 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10357 return AArch64::BLRNoIP;
10358 else
10359 return AArch64::BLR;
10360}
10361
10362MachineBasicBlock::iterator
10363AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
10364 Register TargetReg, bool FrameSetup) const {
10365 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10366
10367 MachineBasicBlock &MBB = *MBBI->getParent();
10368 MachineFunction &MF = *MBB.getParent();
10369 const AArch64InstrInfo *TII =
10370 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10371 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10372 DebugLoc DL = MBB.findDebugLoc(MBBI);
10373
10374 MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator());
10375 MachineBasicBlock *LoopTestMBB =
10376 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
10377 MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
10378 MachineBasicBlock *LoopBodyMBB =
10379 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
10380 MF.insert(MBBI: MBBInsertPoint, MBB: LoopBodyMBB);
10381 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
10382 MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
10383 MachineInstr::MIFlag Flags =
10384 FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
10385
10386 // LoopTest:
10387 // SUB SP, SP, #ProbeSize
10388 emitFrameOffset(MBB&: *LoopTestMBB, MBBI: LoopTestMBB->end(), DL, DestReg: AArch64::SP,
10389 SrcReg: AArch64::SP, Offset: StackOffset::getFixed(Fixed: -ProbeSize), TII, Flag: Flags);
10390
10391 // CMP SP, TargetReg
10392 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXrx64),
10393 DestReg: AArch64::XZR)
10394 .addReg(RegNo: AArch64::SP)
10395 .addReg(RegNo: TargetReg)
10396 .addImm(Val: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTX, Imm: 0))
10397 .setMIFlags(Flags);
10398
10399 // B.<Cond> LoopExit
10400 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
10401 .addImm(Val: AArch64CC::LE)
10402 .addMBB(MBB: ExitMBB)
10403 .setMIFlags(Flags);
10404
10405 // STR XZR, [SP]
10406 BuildMI(BB&: *LoopBodyMBB, I: LoopBodyMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
10407 .addReg(RegNo: AArch64::XZR)
10408 .addReg(RegNo: AArch64::SP)
10409 .addImm(Val: 0)
10410 .setMIFlags(Flags);
10411
10412 // B loop
10413 BuildMI(BB&: *LoopBodyMBB, I: LoopBodyMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
10414 .addMBB(MBB: LoopTestMBB)
10415 .setMIFlags(Flags);
10416
10417 // LoopExit:
10418 // MOV SP, TargetReg
10419 BuildMI(BB&: *ExitMBB, I: ExitMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri), DestReg: AArch64::SP)
10420 .addReg(RegNo: TargetReg)
10421 .addImm(Val: 0)
10422 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0))
10423 .setMIFlags(Flags);
10424
10425 // LDR XZR, [SP]
10426 BuildMI(BB&: *ExitMBB, I: ExitMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
10427 .addReg(RegNo: AArch64::XZR, flags: RegState::Define)
10428 .addReg(RegNo: AArch64::SP)
10429 .addImm(Val: 0)
10430 .setMIFlags(Flags);
10431
10432 ExitMBB->splice(Where: ExitMBB->end(), Other: &MBB, From: std::next(x: MBBI), To: MBB.end());
10433 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
10434
10435 LoopTestMBB->addSuccessor(Succ: ExitMBB);
10436 LoopTestMBB->addSuccessor(Succ: LoopBodyMBB);
10437 LoopBodyMBB->addSuccessor(Succ: LoopTestMBB);
10438 MBB.addSuccessor(Succ: LoopTestMBB);
10439
10440 // Update liveins.
10441 if (MF.getRegInfo().reservedRegsFrozen())
10442 fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopBodyMBB, LoopTestMBB});
10443
10444 return ExitMBB->begin();
10445}
10446
10447namespace {
10448class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10449 MachineFunction *MF;
10450 const TargetInstrInfo *TII;
10451 const TargetRegisterInfo *TRI;
10452 MachineRegisterInfo &MRI;
10453
10454 /// The block of the loop
10455 MachineBasicBlock *LoopBB;
10456 /// The conditional branch of the loop
10457 MachineInstr *CondBranch;
10458 /// The compare instruction for loop control
10459 MachineInstr *Comp;
10460 /// The number of the operand of the loop counter value in Comp
10461 unsigned CompCounterOprNum;
10462 /// The instruction that updates the loop counter value
10463 MachineInstr *Update;
10464 /// The number of the operand of the loop counter value in Update
10465 unsigned UpdateCounterOprNum;
10466 /// The initial value of the loop counter
10467 Register Init;
10468 /// True iff Update is a predecessor of Comp
10469 bool IsUpdatePriorComp;
10470
10471 /// The normalized condition used by createTripCountGreaterCondition()
10472 SmallVector<MachineOperand, 4> Cond;
10473
10474public:
10475 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10476 MachineInstr *Comp, unsigned CompCounterOprNum,
10477 MachineInstr *Update, unsigned UpdateCounterOprNum,
10478 Register Init, bool IsUpdatePriorComp,
10479 const SmallVectorImpl<MachineOperand> &Cond)
10480 : MF(Comp->getParent()->getParent()),
10481 TII(MF->getSubtarget().getInstrInfo()),
10482 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10483 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10484 CompCounterOprNum(CompCounterOprNum), Update(Update),
10485 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10486 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10487
10488 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10489 // Make the instructions for loop control be placed in stage 0.
10490 // The predecessors of Comp are considered by the caller.
10491 return MI == Comp;
10492 }
10493
10494 std::optional<bool> createTripCountGreaterCondition(
10495 int TC, MachineBasicBlock &MBB,
10496 SmallVectorImpl<MachineOperand> &CondParam) override {
10497 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10498 // Cond is normalized for such use.
10499 // The predecessors of the branch are assumed to have already been inserted.
10500 CondParam = Cond;
10501 return {};
10502 }
10503
10504 void createRemainingIterationsGreaterCondition(
10505 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10506 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10507
10508 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10509
10510 void adjustTripCount(int TripCountAdjust) override {}
10511
10512 bool isMVEExpanderSupported() override { return true; }
10513};
10514} // namespace
10515
10516/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10517/// is replaced by ReplaceReg. The output register is newly created.
10518/// The other operands are unchanged from MI.
10519static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10520 Register ReplaceReg, MachineBasicBlock &MBB,
10521 MachineBasicBlock::iterator InsertTo) {
10522 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10523 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10524 const TargetRegisterInfo *TRI =
10525 MBB.getParent()->getSubtarget().getRegisterInfo();
10526 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(Orig: MI);
10527 Register Result = 0;
10528 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10529 if (I == 0 && NewMI->getOperand(i: 0).getReg().isVirtual()) {
10530 Result = MRI.createVirtualRegister(
10531 RegClass: MRI.getRegClass(Reg: NewMI->getOperand(i: 0).getReg()));
10532 NewMI->getOperand(i: I).setReg(Result);
10533 } else if (I == ReplaceOprNum) {
10534 MRI.constrainRegClass(
10535 Reg: ReplaceReg,
10536 RC: TII->getRegClass(MCID: NewMI->getDesc(), OpNum: I, TRI, MF: *MBB.getParent()));
10537 NewMI->getOperand(i: I).setReg(ReplaceReg);
10538 }
10539 }
10540 MBB.insert(I: InsertTo, MI: NewMI);
10541 return Result;
10542}
10543
10544void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10545 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10546 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) {
10547 // Create and accumulate conditions for next TC iterations.
10548 // Example:
10549 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10550 // # iteration of the kernel
10551 //
10552 // # insert the following instructions
10553 // cond = CSINCXr 0, 0, C, implicit $nzcv
10554 // counter = ADDXri counter, 1 # clone from this->Update
10555 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10556 // cond = CSINCXr cond, cond, C, implicit $nzcv
10557 // ... (repeat TC times)
10558 // SUBSXri cond, 0, implicit-def $nzcv
10559
10560 assert(CondBranch->getOpcode() == AArch64::Bcc);
10561 // CondCode to exit the loop
10562 AArch64CC::CondCode CC =
10563 (AArch64CC::CondCode)CondBranch->getOperand(i: 0).getImm();
10564 if (CondBranch->getOperand(i: 1).getMBB() == LoopBB)
10565 CC = AArch64CC::getInvertedCondCode(Code: CC);
10566
10567 // Accumulate conditions to exit the loop
10568 Register AccCond = AArch64::XZR;
10569
10570 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10571 auto AccumulateCond = [&](Register CurCond,
10572 AArch64CC::CondCode CC) -> Register {
10573 Register NewCond = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
10574 BuildMI(BB&: MBB, I: MBB.end(), MIMD: Comp->getDebugLoc(), MCID: TII->get(Opcode: AArch64::CSINCXr))
10575 .addReg(RegNo: NewCond, flags: RegState::Define)
10576 .addReg(RegNo: CurCond)
10577 .addReg(RegNo: CurCond)
10578 .addImm(Val: AArch64CC::getInvertedCondCode(Code: CC));
10579 return NewCond;
10580 };
10581
10582 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
10583 // Update and Comp for I==0 are already exists in MBB
10584 // (MBB is an unrolled kernel)
10585 Register Counter;
10586 for (int I = 0; I <= TC; ++I) {
10587 Register NextCounter;
10588 if (I != 0)
10589 NextCounter =
10590 cloneInstr(MI: Comp, ReplaceOprNum: CompCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10591
10592 AccCond = AccumulateCond(AccCond, CC);
10593
10594 if (I != TC) {
10595 if (I == 0) {
10596 if (Update != Comp && IsUpdatePriorComp) {
10597 Counter =
10598 LastStage0Insts[Comp]->getOperand(i: CompCounterOprNum).getReg();
10599 NextCounter = cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB,
10600 InsertTo: MBB.end());
10601 } else {
10602 // can use already calculated value
10603 NextCounter = LastStage0Insts[Update]->getOperand(i: 0).getReg();
10604 }
10605 } else if (Update != Comp) {
10606 NextCounter =
10607 cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10608 }
10609 }
10610 Counter = NextCounter;
10611 }
10612 } else {
10613 Register Counter;
10614 if (LastStage0Insts.empty()) {
10615 // use initial counter value (testing if the trip count is sufficient to
10616 // be executed by pipelined code)
10617 Counter = Init;
10618 if (IsUpdatePriorComp)
10619 Counter =
10620 cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10621 } else {
10622 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
10623 Counter = LastStage0Insts[Comp]->getOperand(i: CompCounterOprNum).getReg();
10624 }
10625
10626 for (int I = 0; I <= TC; ++I) {
10627 Register NextCounter;
10628 NextCounter =
10629 cloneInstr(MI: Comp, ReplaceOprNum: CompCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10630 AccCond = AccumulateCond(AccCond, CC);
10631 if (I != TC && Update != Comp)
10632 NextCounter =
10633 cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10634 Counter = NextCounter;
10635 }
10636 }
10637
10638 // If AccCond == 0, the remainder is greater than TC.
10639 BuildMI(BB&: MBB, I: MBB.end(), MIMD: Comp->getDebugLoc(), MCID: TII->get(Opcode: AArch64::SUBSXri))
10640 .addReg(RegNo: AArch64::XZR, flags: RegState::Define | RegState::Dead)
10641 .addReg(RegNo: AccCond)
10642 .addImm(Val: 0)
10643 .addImm(Val: 0);
10644 Cond.clear();
10645 Cond.push_back(Elt: MachineOperand::CreateImm(Val: AArch64CC::EQ));
10646}
10647
10648static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
10649 Register &RegMBB, Register &RegOther) {
10650 assert(Phi.getNumOperands() == 5);
10651 if (Phi.getOperand(i: 2).getMBB() == MBB) {
10652 RegMBB = Phi.getOperand(i: 1).getReg();
10653 RegOther = Phi.getOperand(i: 3).getReg();
10654 } else {
10655 assert(Phi.getOperand(4).getMBB() == MBB);
10656 RegMBB = Phi.getOperand(i: 3).getReg();
10657 RegOther = Phi.getOperand(i: 1).getReg();
10658 }
10659}
10660
10661static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
10662 if (!Reg.isVirtual())
10663 return false;
10664 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
10665 return MRI.getVRegDef(Reg)->getParent() != BB;
10666}
10667
10668/// If Reg is an induction variable, return true and set some parameters
10669static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
10670 MachineInstr *&UpdateInst,
10671 unsigned &UpdateCounterOprNum, Register &InitReg,
10672 bool &IsUpdatePriorComp) {
10673 // Example:
10674 //
10675 // Preheader:
10676 // InitReg = ...
10677 // LoopBB:
10678 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
10679 // Reg = COPY Reg0 ; COPY is ignored.
10680 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
10681 // ; Reg is the value calculated in the previous
10682 // ; iteration, so IsUpdatePriorComp == false.
10683
10684 if (LoopBB->pred_size() != 2)
10685 return false;
10686 if (!Reg.isVirtual())
10687 return false;
10688 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
10689 UpdateInst = nullptr;
10690 UpdateCounterOprNum = 0;
10691 InitReg = 0;
10692 IsUpdatePriorComp = true;
10693 Register CurReg = Reg;
10694 while (true) {
10695 MachineInstr *Def = MRI.getVRegDef(Reg: CurReg);
10696 if (Def->getParent() != LoopBB)
10697 return false;
10698 if (Def->isCopy()) {
10699 // Ignore copy instructions unless they contain subregisters
10700 if (Def->getOperand(i: 0).getSubReg() || Def->getOperand(i: 1).getSubReg())
10701 return false;
10702 CurReg = Def->getOperand(i: 1).getReg();
10703 } else if (Def->isPHI()) {
10704 if (InitReg != 0)
10705 return false;
10706 if (!UpdateInst)
10707 IsUpdatePriorComp = false;
10708 extractPhiReg(Phi: *Def, MBB: LoopBB, RegMBB&: CurReg, RegOther&: InitReg);
10709 } else {
10710 if (UpdateInst)
10711 return false;
10712 switch (Def->getOpcode()) {
10713 case AArch64::ADDSXri:
10714 case AArch64::ADDSWri:
10715 case AArch64::SUBSXri:
10716 case AArch64::SUBSWri:
10717 case AArch64::ADDXri:
10718 case AArch64::ADDWri:
10719 case AArch64::SUBXri:
10720 case AArch64::SUBWri:
10721 UpdateInst = Def;
10722 UpdateCounterOprNum = 1;
10723 break;
10724 case AArch64::ADDSXrr:
10725 case AArch64::ADDSWrr:
10726 case AArch64::SUBSXrr:
10727 case AArch64::SUBSWrr:
10728 case AArch64::ADDXrr:
10729 case AArch64::ADDWrr:
10730 case AArch64::SUBXrr:
10731 case AArch64::SUBWrr:
10732 UpdateInst = Def;
10733 if (isDefinedOutside(Reg: Def->getOperand(i: 2).getReg(), BB: LoopBB))
10734 UpdateCounterOprNum = 1;
10735 else if (isDefinedOutside(Reg: Def->getOperand(i: 1).getReg(), BB: LoopBB))
10736 UpdateCounterOprNum = 2;
10737 else
10738 return false;
10739 break;
10740 default:
10741 return false;
10742 }
10743 CurReg = Def->getOperand(i: UpdateCounterOprNum).getReg();
10744 }
10745
10746 if (!CurReg.isVirtual())
10747 return false;
10748 if (Reg == CurReg)
10749 break;
10750 }
10751
10752 if (!UpdateInst)
10753 return false;
10754
10755 return true;
10756}
10757
10758std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
10759AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
10760 // Accept loops that meet the following conditions
10761 // * The conditional branch is BCC
10762 // * The compare instruction is ADDS/SUBS/WHILEXX
10763 // * One operand of the compare is an induction variable and the other is a
10764 // loop invariant value
10765 // * The induction variable is incremented/decremented by a single instruction
10766 // * Does not contain CALL or instructions which have unmodeled side effects
10767
10768 for (MachineInstr &MI : *LoopBB)
10769 if (MI.isCall() || MI.hasUnmodeledSideEffects())
10770 // This instruction may use NZCV, which interferes with the instruction to
10771 // be inserted for loop control.
10772 return nullptr;
10773
10774 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
10775 SmallVector<MachineOperand, 4> Cond;
10776 if (analyzeBranch(MBB&: *LoopBB, TBB, FBB, Cond))
10777 return nullptr;
10778
10779 // Infinite loops are not supported
10780 if (TBB == LoopBB && FBB == LoopBB)
10781 return nullptr;
10782
10783 // Must be conditional branch
10784 if (TBB != LoopBB && FBB == nullptr)
10785 return nullptr;
10786
10787 assert((TBB == LoopBB || FBB == LoopBB) &&
10788 "The Loop must be a single-basic-block loop");
10789
10790 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
10791 const TargetRegisterInfo &TRI = getRegisterInfo();
10792
10793 if (CondBranch->getOpcode() != AArch64::Bcc)
10794 return nullptr;
10795
10796 // Normalization for createTripCountGreaterCondition()
10797 if (TBB == LoopBB)
10798 reverseBranchCondition(Cond);
10799
10800 MachineInstr *Comp = nullptr;
10801 unsigned CompCounterOprNum = 0;
10802 for (MachineInstr &MI : reverse(C&: *LoopBB)) {
10803 if (MI.modifiesRegister(Reg: AArch64::NZCV, TRI: &TRI)) {
10804 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
10805 // operands is a loop invariant value
10806
10807 switch (MI.getOpcode()) {
10808 case AArch64::SUBSXri:
10809 case AArch64::SUBSWri:
10810 case AArch64::ADDSXri:
10811 case AArch64::ADDSWri:
10812 Comp = &MI;
10813 CompCounterOprNum = 1;
10814 break;
10815 case AArch64::ADDSWrr:
10816 case AArch64::ADDSXrr:
10817 case AArch64::SUBSWrr:
10818 case AArch64::SUBSXrr:
10819 Comp = &MI;
10820 break;
10821 default:
10822 if (isWhileOpcode(Opc: MI.getOpcode())) {
10823 Comp = &MI;
10824 break;
10825 }
10826 return nullptr;
10827 }
10828
10829 if (CompCounterOprNum == 0) {
10830 if (isDefinedOutside(Reg: Comp->getOperand(i: 1).getReg(), BB: LoopBB))
10831 CompCounterOprNum = 2;
10832 else if (isDefinedOutside(Reg: Comp->getOperand(i: 2).getReg(), BB: LoopBB))
10833 CompCounterOprNum = 1;
10834 else
10835 return nullptr;
10836 }
10837 break;
10838 }
10839 }
10840 if (!Comp)
10841 return nullptr;
10842
10843 MachineInstr *Update = nullptr;
10844 Register Init;
10845 bool IsUpdatePriorComp;
10846 unsigned UpdateCounterOprNum;
10847 if (!getIndVarInfo(Reg: Comp->getOperand(i: CompCounterOprNum).getReg(), LoopBB,
10848 UpdateInst&: Update, UpdateCounterOprNum, InitReg&: Init, IsUpdatePriorComp))
10849 return nullptr;
10850
10851 return std::make_unique<AArch64PipelinerLoopInfo>(
10852 args&: LoopBB, args&: CondBranch, args&: Comp, args&: CompCounterOprNum, args&: Update, args&: UpdateCounterOprNum,
10853 args&: Init, args&: IsUpdatePriorComp, args&: Cond);
10854}
10855
10856/// verifyInstruction - Perform target specific instruction verification.
10857bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
10858 StringRef &ErrInfo) const {
10859
10860 // Verify that immediate offsets on load/store instructions are within range.
10861 // Stack objects with an FI operand are excluded as they can be fixed up
10862 // during PEI.
10863 TypeSize Scale(0U, false), Width(0U, false);
10864 int64_t MinOffset, MaxOffset;
10865 if (getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
10866 unsigned ImmIdx = getLoadStoreImmIdx(Opc: MI.getOpcode());
10867 if (MI.getOperand(i: ImmIdx).isImm() && !MI.getOperand(i: ImmIdx - 1).isFI()) {
10868 int64_t Imm = MI.getOperand(i: ImmIdx).getImm();
10869 if (Imm < MinOffset || Imm > MaxOffset) {
10870 ErrInfo = "Unexpected immediate on load/store instruction";
10871 return false;
10872 }
10873 }
10874 }
10875 return true;
10876}
10877
10878#define GET_INSTRINFO_HELPERS
10879#define GET_INSTRMAP_INFO
10880#include "AArch64GenInstrInfo.inc"
10881