1//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
14#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
15
16#include "AArch64.h"
17#include "AArch64RegisterInfo.h"
18#include "llvm/CodeGen/TargetInstrInfo.h"
19#include "llvm/Support/TypeSize.h"
20#include <optional>
21
22#define GET_INSTRINFO_HEADER
23#include "AArch64GenInstrInfo.inc"
24
25namespace llvm {
26
27class AArch64Subtarget;
28
29static const MachineMemOperand::Flags MOSuppressPair =
30 MachineMemOperand::MOTargetFlag1;
31static const MachineMemOperand::Flags MOStridedAccess =
32 MachineMemOperand::MOTargetFlag2;
33
34#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
35
36// AArch64 MachineCombiner patterns
37enum AArch64MachineCombinerPattern : unsigned {
38 // These are patterns used to reduce the length of dependence chain.
39 SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START,
40 SUBADD_OP2,
41
42 // These are multiply-add patterns matched by the AArch64 machine combiner.
43 MULADDW_OP1,
44 MULADDW_OP2,
45 MULSUBW_OP1,
46 MULSUBW_OP2,
47 MULADDWI_OP1,
48 MULSUBWI_OP1,
49 MULADDX_OP1,
50 MULADDX_OP2,
51 MULSUBX_OP1,
52 MULSUBX_OP2,
53 MULADDXI_OP1,
54 MULSUBXI_OP1,
55 // NEON integers vectors
56 MULADDv8i8_OP1,
57 MULADDv8i8_OP2,
58 MULADDv16i8_OP1,
59 MULADDv16i8_OP2,
60 MULADDv4i16_OP1,
61 MULADDv4i16_OP2,
62 MULADDv8i16_OP1,
63 MULADDv8i16_OP2,
64 MULADDv2i32_OP1,
65 MULADDv2i32_OP2,
66 MULADDv4i32_OP1,
67 MULADDv4i32_OP2,
68
69 MULSUBv8i8_OP1,
70 MULSUBv8i8_OP2,
71 MULSUBv16i8_OP1,
72 MULSUBv16i8_OP2,
73 MULSUBv4i16_OP1,
74 MULSUBv4i16_OP2,
75 MULSUBv8i16_OP1,
76 MULSUBv8i16_OP2,
77 MULSUBv2i32_OP1,
78 MULSUBv2i32_OP2,
79 MULSUBv4i32_OP1,
80 MULSUBv4i32_OP2,
81
82 MULADDv4i16_indexed_OP1,
83 MULADDv4i16_indexed_OP2,
84 MULADDv8i16_indexed_OP1,
85 MULADDv8i16_indexed_OP2,
86 MULADDv2i32_indexed_OP1,
87 MULADDv2i32_indexed_OP2,
88 MULADDv4i32_indexed_OP1,
89 MULADDv4i32_indexed_OP2,
90
91 MULSUBv4i16_indexed_OP1,
92 MULSUBv4i16_indexed_OP2,
93 MULSUBv8i16_indexed_OP1,
94 MULSUBv8i16_indexed_OP2,
95 MULSUBv2i32_indexed_OP1,
96 MULSUBv2i32_indexed_OP2,
97 MULSUBv4i32_indexed_OP1,
98 MULSUBv4i32_indexed_OP2,
99
100 // Floating Point
101 FMULADDH_OP1,
102 FMULADDH_OP2,
103 FMULSUBH_OP1,
104 FMULSUBH_OP2,
105 FMULADDS_OP1,
106 FMULADDS_OP2,
107 FMULSUBS_OP1,
108 FMULSUBS_OP2,
109 FMULADDD_OP1,
110 FMULADDD_OP2,
111 FMULSUBD_OP1,
112 FMULSUBD_OP2,
113 FNMULSUBH_OP1,
114 FNMULSUBS_OP1,
115 FNMULSUBD_OP1,
116 FMLAv1i32_indexed_OP1,
117 FMLAv1i32_indexed_OP2,
118 FMLAv1i64_indexed_OP1,
119 FMLAv1i64_indexed_OP2,
120 FMLAv4f16_OP1,
121 FMLAv4f16_OP2,
122 FMLAv8f16_OP1,
123 FMLAv8f16_OP2,
124 FMLAv2f32_OP2,
125 FMLAv2f32_OP1,
126 FMLAv2f64_OP1,
127 FMLAv2f64_OP2,
128 FMLAv4i16_indexed_OP1,
129 FMLAv4i16_indexed_OP2,
130 FMLAv8i16_indexed_OP1,
131 FMLAv8i16_indexed_OP2,
132 FMLAv2i32_indexed_OP1,
133 FMLAv2i32_indexed_OP2,
134 FMLAv2i64_indexed_OP1,
135 FMLAv2i64_indexed_OP2,
136 FMLAv4f32_OP1,
137 FMLAv4f32_OP2,
138 FMLAv4i32_indexed_OP1,
139 FMLAv4i32_indexed_OP2,
140 FMLSv1i32_indexed_OP2,
141 FMLSv1i64_indexed_OP2,
142 FMLSv4f16_OP1,
143 FMLSv4f16_OP2,
144 FMLSv8f16_OP1,
145 FMLSv8f16_OP2,
146 FMLSv2f32_OP1,
147 FMLSv2f32_OP2,
148 FMLSv2f64_OP1,
149 FMLSv2f64_OP2,
150 FMLSv4i16_indexed_OP1,
151 FMLSv4i16_indexed_OP2,
152 FMLSv8i16_indexed_OP1,
153 FMLSv8i16_indexed_OP2,
154 FMLSv2i32_indexed_OP1,
155 FMLSv2i32_indexed_OP2,
156 FMLSv2i64_indexed_OP1,
157 FMLSv2i64_indexed_OP2,
158 FMLSv4f32_OP1,
159 FMLSv4f32_OP2,
160 FMLSv4i32_indexed_OP1,
161 FMLSv4i32_indexed_OP2,
162
163 FMULv2i32_indexed_OP1,
164 FMULv2i32_indexed_OP2,
165 FMULv2i64_indexed_OP1,
166 FMULv2i64_indexed_OP2,
167 FMULv4i16_indexed_OP1,
168 FMULv4i16_indexed_OP2,
169 FMULv4i32_indexed_OP1,
170 FMULv4i32_indexed_OP2,
171 FMULv8i16_indexed_OP1,
172 FMULv8i16_indexed_OP2,
173
174 FNMADD,
175
176 GATHER_LANE_i32,
177 GATHER_LANE_i16,
178 GATHER_LANE_i8
179};
180class AArch64InstrInfo final : public AArch64GenInstrInfo {
181 const AArch64RegisterInfo RI;
182 const AArch64Subtarget &Subtarget;
183
184public:
185 explicit AArch64InstrInfo(const AArch64Subtarget &STI);
186
187 /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
188 /// such, whenever a client has an instance of instruction info, it should
189 /// always be able to get register info as well (through this method).
190 const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
191
192 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
193
194 bool isAsCheapAsAMove(const MachineInstr &MI) const override;
195
196 bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
197 Register &DstReg, unsigned &SubIdx) const override;
198
199 bool
200 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
201 const MachineInstr &MIb) const override;
202
203 Register isLoadFromStackSlot(const MachineInstr &MI,
204 int &FrameIndex) const override;
205 Register isStoreToStackSlot(const MachineInstr &MI,
206 int &FrameIndex) const override;
207
208 /// Check for post-frame ptr elimination stack locations as well. This uses a
209 /// heuristic so it isn't reliable for correctness.
210 Register isStoreToStackSlotPostFE(const MachineInstr &MI,
211 int &FrameIndex) const override;
212 /// Check for post-frame ptr elimination stack locations as well. This uses a
213 /// heuristic so it isn't reliable for correctness.
214 Register isLoadFromStackSlotPostFE(const MachineInstr &MI,
215 int &FrameIndex) const override;
216
217 /// Does this instruction set its full destination register to zero?
218 static bool isGPRZero(const MachineInstr &MI);
219
220 /// Does this instruction rename a GPR without modifying bits?
221 static bool isGPRCopy(const MachineInstr &MI);
222
223 /// Does this instruction rename an FPR without modifying bits?
224 static bool isFPRCopy(const MachineInstr &MI);
225
226 /// Return true if pairing the given load or store is hinted to be
227 /// unprofitable.
228 static bool isLdStPairSuppressed(const MachineInstr &MI);
229
230 /// Return true if the given load or store is a strided memory access.
231 static bool isStridedAccess(const MachineInstr &MI);
232
233 /// Return true if it has an unscaled load/store offset.
234 static bool hasUnscaledLdStOffset(unsigned Opc);
235 static bool hasUnscaledLdStOffset(MachineInstr &MI) {
236 return hasUnscaledLdStOffset(Opc: MI.getOpcode());
237 }
238
239 /// Returns the unscaled load/store for the scaled load/store opcode,
240 /// if there is a corresponding unscaled variant available.
241 static std::optional<unsigned> getUnscaledLdSt(unsigned Opc);
242
243 /// Scaling factor for (scaled or unscaled) load or store.
244 static int getMemScale(unsigned Opc);
245 static int getMemScale(const MachineInstr &MI) {
246 return getMemScale(Opc: MI.getOpcode());
247 }
248
249 /// Returns whether the instruction is a pre-indexed load.
250 static bool isPreLd(const MachineInstr &MI);
251
252 /// Returns whether the instruction is a pre-indexed store.
253 static bool isPreSt(const MachineInstr &MI);
254
255 /// Returns whether the instruction is a pre-indexed load/store.
256 static bool isPreLdSt(const MachineInstr &MI);
257
258 /// Returns whether the instruction is a zero-extending load.
259 static bool isZExtLoad(const MachineInstr &MI);
260
261 /// Returns whether the instruction is a sign-extending load.
262 static bool isSExtLoad(const MachineInstr &MI);
263
264 /// Returns whether the instruction is a paired load/store.
265 static bool isPairedLdSt(const MachineInstr &MI);
266
267 /// Returns the base register operator of a load/store.
268 static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);
269
270 /// Returns the immediate offset operator of a load/store.
271 static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
272
273 /// Returns whether the physical register is FP or NEON.
274 static bool isFpOrNEON(Register Reg);
275
276 /// Returns the shift amount operator of a load/store.
277 static const MachineOperand &getLdStAmountOp(const MachineInstr &MI);
278
279 /// Returns whether the instruction is FP or NEON.
280 static bool isFpOrNEON(const MachineInstr &MI);
281
282 /// Returns whether the instruction is in H form (16 bit operands)
283 static bool isHForm(const MachineInstr &MI);
284
285 /// Returns whether the instruction is in Q form (128 bit operands)
286 static bool isQForm(const MachineInstr &MI);
287
288 /// Returns whether the instruction can be compatible with non-zero BTYPE.
289 static bool hasBTISemantics(const MachineInstr &MI);
290
291 /// Returns the index for the immediate for a given instruction.
292 static unsigned getLoadStoreImmIdx(unsigned Opc);
293
294 /// Return true if pairing the given load or store may be paired with another.
295 static bool isPairableLdStInst(const MachineInstr &MI);
296
297 /// Returns true if MI is one of the TCRETURN* instructions.
298 static bool isTailCallReturnInst(const MachineInstr &MI);
299
300 /// Return the opcode that set flags when possible. The caller is
301 /// responsible for ensuring the opc has a flag setting equivalent.
302 static unsigned convertToFlagSettingOpc(unsigned Opc);
303
304 /// Return true if this is a load/store that can be potentially paired/merged.
305 bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
306
307 /// Hint that pairing the given load or store is unprofitable.
308 static void suppressLdStPair(MachineInstr &MI);
309
310 std::optional<ExtAddrMode>
311 getAddrModeFromMemoryOp(const MachineInstr &MemI,
312 const TargetRegisterInfo *TRI) const override;
313
314 bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
315 const MachineInstr &AddrI,
316 ExtAddrMode &AM) const override;
317
318 MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
319 const ExtAddrMode &AM) const override;
320
321 bool getMemOperandsWithOffsetWidth(
322 const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
323 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
324 const TargetRegisterInfo *TRI) const override;
325
326 /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
327 /// This is true for some SVE instructions like ldr/str that have a
328 /// 'reg + imm' addressing mode where the immediate is an index to the
329 /// scalable vector located at 'reg + imm * vscale x #bytes'.
330 bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
331 const MachineOperand *&BaseOp,
332 int64_t &Offset, bool &OffsetIsScalable,
333 TypeSize &Width,
334 const TargetRegisterInfo *TRI) const;
335
336 /// Return the immediate offset of the base register in a load/store \p LdSt.
337 MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
338
339 /// Returns true if opcode \p Opc is a memory operation. If it is, set
340 /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
341 ///
342 /// For unscaled instructions, \p Scale is set to 1. All values are in bytes.
343 /// MinOffset/MaxOffset are the un-scaled limits of the immediate in the
344 /// instruction, the actual offset limit is [MinOffset*Scale,
345 /// MaxOffset*Scale].
346 static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
347 int64_t &MinOffset, int64_t &MaxOffset);
348
349 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
350 int64_t Offset1, bool OffsetIsScalable1,
351 ArrayRef<const MachineOperand *> BaseOps2,
352 int64_t Offset2, bool OffsetIsScalable2,
353 unsigned ClusterSize,
354 unsigned NumBytes) const override;
355
356 void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
357 const DebugLoc &DL, MCRegister DestReg,
358 MCRegister SrcReg, bool KillSrc, unsigned Opcode,
359 llvm::ArrayRef<unsigned> Indices) const;
360 void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
361 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
362 bool KillSrc, unsigned Opcode, unsigned ZeroReg,
363 llvm::ArrayRef<unsigned> Indices) const;
364 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
365 const DebugLoc &DL, Register DestReg, Register SrcReg,
366 bool KillSrc, bool RenamableDest = false,
367 bool RenamableSrc = false) const override;
368
369 void storeRegToStackSlot(
370 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
371 bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
372 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
373
374 void loadRegFromStackSlot(
375 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
376 Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
377 Register VReg, unsigned SubReg = 0,
378 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
379
380 // This tells target independent code that it is okay to pass instructions
381 // with subreg operands to foldMemoryOperandImpl.
382 bool isSubregFoldable() const override { return true; }
383
384 using TargetInstrInfo::foldMemoryOperandImpl;
385 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
386 ArrayRef<unsigned> Ops, int FrameIndex,
387 MachineInstr *&CopyMI,
388 LiveIntervals *LIS = nullptr,
389 VirtRegMap *VRM = nullptr) const override;
390
391 /// \returns true if a branch from an instruction with opcode \p BranchOpc
392 /// bytes is capable of jumping to a position \p BrOffset bytes away.
393 bool isBranchOffsetInRange(unsigned BranchOpc,
394 int64_t BrOffset) const override;
395
396 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
397
398 void insertIndirectBranch(MachineBasicBlock &MBB,
399 MachineBasicBlock &NewDestBB,
400 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
401 int64_t BrOffset, RegScavenger *RS) const override;
402
403 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
404 MachineBasicBlock *&FBB,
405 SmallVectorImpl<MachineOperand> &Cond,
406 bool AllowModify = false) const override;
407 bool analyzeBranchPredicate(MachineBasicBlock &MBB,
408 MachineBranchPredicate &MBP,
409 bool AllowModify) const override;
410 unsigned removeBranch(MachineBasicBlock &MBB,
411 int *BytesRemoved = nullptr) const override;
412 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
413 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
414 const DebugLoc &DL,
415 int *BytesAdded = nullptr) const override;
416
417 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
418 analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
419
420 bool
421 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
422 bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
423 Register, Register, Register, int &, int &,
424 int &) const override;
425 void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
426 const DebugLoc &DL, Register DstReg,
427 ArrayRef<MachineOperand> Cond, Register TrueReg,
428 Register FalseReg) const override;
429
430 void insertNoop(MachineBasicBlock &MBB,
431 MachineBasicBlock::iterator MI) const override;
432
433 MCInst getNop() const override;
434
435 bool isSchedulingBoundary(const MachineInstr &MI,
436 const MachineBasicBlock *MBB,
437 const MachineFunction &MF) const override;
438
439 /// analyzeCompare - For a comparison instruction, return the source registers
440 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
441 /// Return true if the comparison instruction can be analyzed.
442 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
443 Register &SrcReg2, int64_t &CmpMask,
444 int64_t &CmpValue) const override;
445 /// optimizeCompareInstr - Convert the instruction supplying the argument to
446 /// the comparison into one that sets the zero bit in the flags register.
447 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
448 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
449 const MachineRegisterInfo *MRI) const override;
450 bool optimizeCondBranch(MachineInstr &MI) const override;
451
452 CombinerObjective getCombinerObjective(unsigned Pattern) const override;
453 /// Return true when a code sequence can improve throughput. It
454 /// should be called only for instructions in loops.
455 /// \param Pattern - combiner pattern
456 bool isThroughputPattern(unsigned Pattern) const override;
457 /// Return true when there is potentially a faster code sequence
458 /// for an instruction chain ending in ``Root``. All potential patterns are
459 /// listed in the ``Patterns`` array.
460 bool getMachineCombinerPatterns(MachineInstr &Root,
461 SmallVectorImpl<unsigned> &Patterns,
462 bool DoRegPressureReduce) const override;
463 /// Return true when Inst is associative and commutative so that it can be
464 /// reassociated. If Invert is true, then the inverse of Inst operation must
465 /// be checked.
466 bool isAssociativeAndCommutative(const MachineInstr &Inst,
467 bool Invert) const override;
468
469 /// Returns true if \P Opcode is an instruction which performs accumulation
470 /// into a destination register.
471 bool isAccumulationOpcode(unsigned Opcode) const override;
472
473 /// Returns an opcode which defines the accumulator used by \P Opcode.
474 unsigned getAccumulationStartOpcode(unsigned Opcode) const override;
475
476 unsigned
477 getReduceOpcodeForAccumulator(unsigned int AccumulatorOpCode) const override;
478
479 /// When getMachineCombinerPatterns() finds patterns, this function
480 /// generates the instructions that could replace the original code
481 /// sequence
482 void genAlternativeCodeSequence(
483 MachineInstr &Root, unsigned Pattern,
484 SmallVectorImpl<MachineInstr *> &InsInstrs,
485 SmallVectorImpl<MachineInstr *> &DelInstrs,
486 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const override;
487 /// AArch64 supports MachineCombiner.
488 bool useMachineCombiner() const override;
489
490 bool expandPostRAPseudo(MachineInstr &MI) const override;
491
492 std::pair<unsigned, unsigned>
493 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
494 ArrayRef<std::pair<unsigned, const char *>>
495 getSerializableDirectMachineOperandTargetFlags() const override;
496 ArrayRef<std::pair<unsigned, const char *>>
497 getSerializableBitmaskMachineOperandTargetFlags() const override;
498 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
499 getSerializableMachineMemOperandTargetFlags() const override;
500
501 bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
502 bool OutlineFromLinkOnceODRs) const override;
503 std::optional<std::unique_ptr<outliner::OutlinedFunction>>
504 getOutliningCandidateInfo(
505 const MachineModuleInfo &MMI,
506 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
507 unsigned MinRepeats) const override;
508 void mergeOutliningCandidateAttributes(
509 Function &F, std::vector<outliner::Candidate> &Candidates) const override;
510 outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI,
511 MachineBasicBlock::iterator &MIT,
512 unsigned Flags) const override;
513 SmallVector<
514 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
515 getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override;
516 void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
517 const outliner::OutlinedFunction &OF) const override;
518 MachineBasicBlock::iterator
519 insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
520 MachineBasicBlock::iterator &It, MachineFunction &MF,
521 outliner::Candidate &C) const override;
522 bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
523
524 void buildClearRegister(Register Reg, MachineBasicBlock &MBB,
525 MachineBasicBlock::iterator Iter, DebugLoc &DL,
526 bool AllowSideEffects = true) const override;
527
528 /// Returns the vector element size (B, H, S or D) of an SVE opcode.
529 uint64_t getElementSizeForOpcode(unsigned Opc) const;
530 /// Returns true if the opcode is for an SVE instruction that sets the
531 /// condition codes as if it's results had been fed to a PTEST instruction
532 /// along with the same general predicate.
533 bool isPTestLikeOpcode(unsigned Opc) const;
534 /// Returns true if the opcode is for an SVE WHILE## instruction.
535 bool isWhileOpcode(unsigned Opc) const;
536 /// Returns true if the instruction has a shift by immediate that can be
537 /// executed in one cycle less.
538 static bool isFalkorShiftExtFast(const MachineInstr &MI);
539 /// Return true if the instructions is a SEH instruction used for unwinding
540 /// on Windows.
541 static bool isSEHInstruction(const MachineInstr &MI);
542
543 std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
544 Register Reg) const override;
545
546 bool isFunctionSafeToSplit(const MachineFunction &MF) const override;
547
548 bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override;
549
550 std::optional<ParamLoadedValue>
551 describeLoadedValue(const MachineInstr &MI, Register Reg) const override;
552
553 unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
554
555 bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
556 MachineRegisterInfo &MRI) const override;
557
558 static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
559 int64_t &NumBytes,
560 int64_t &NumPredicateVectors,
561 int64_t &NumDataVectors);
562 static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
563 int64_t &ByteSized,
564 int64_t &VGSized);
565
566 // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can
567 // be used for a load/store of NumBytes. BaseReg is always present and
568 // implicit.
569 bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
570 unsigned Scale) const;
571
572 // Decrement the SP, issuing probes along the way. `TargetReg` is the new top
573 // of the stack. `FrameSetup` is passed as true, if the allocation is a part
574 // of constructing the activation frame of a function.
575 MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI,
576 Register TargetReg,
577 bool FrameSetup) const;
578
579 static int
580 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr);
581
582 /// Insert a `PAUTH_EPILOGUE` pseudo before the first terminator in \p MBB to
583 /// authenticate the return address. Adds an implicit def of X16 when the
584 /// branch protection uses PAuthLR but the subtarget lacks PAuthLR
585 /// instructions. If the epilogue has callee-popped argument stack to restore,
586 /// it additionally implicit defines X15 and X17 to cover clobbered registers
587 /// for the required sequence on subtargets both with and without PAuthLR
588 /// instructions.
589 void createPauthEpilogueInstr(MachineBasicBlock &MBB, DebugLoc DL) const;
590
591#define GET_INSTRINFO_HELPER_DECLS
592#include "AArch64GenInstrInfo.inc"
593
594protected:
595 /// If the specific machine instruction is an instruction that moves/copies
596 /// value from one register to another register return destination and source
597 /// registers as machine operands.
598 std::optional<DestSourcePair>
599 isCopyInstrImpl(const MachineInstr &MI) const override;
600 std::optional<DestSourcePair>
601 isCopyLikeInstrImpl(const MachineInstr &MI) const override;
602
603private:
604 /// Sets the offsets on outlined instructions in \p MBB which use SP
605 /// so that they will be valid post-outlining.
606 ///
607 /// \param MBB A \p MachineBasicBlock in an outlined function.
608 void fixupPostOutline(MachineBasicBlock &MBB) const;
609
610 void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
611 MachineBasicBlock *TBB,
612 ArrayRef<MachineOperand> Cond) const;
613 bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
614 const MachineRegisterInfo &MRI) const;
615 bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg,
616 int CmpValue, const MachineRegisterInfo &MRI) const;
617
618 /// Returns an unused general-purpose register which can be used for
619 /// constructing an outlined call if one exists. Returns 0 otherwise.
620 Register findRegisterToSaveLRTo(outliner::Candidate &C) const;
621
622 /// Remove a ptest of a predicate-generating operation that already sets, or
623 /// can be made to set, the condition codes in an identical manner
624 bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
625 unsigned PredReg,
626 const MachineRegisterInfo *MRI) const;
627 std::optional<unsigned>
628 canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
629 MachineInstr *Pred, const MachineRegisterInfo *MRI) const;
630
631 /// verifyInstruction - Perform target specific instruction verification.
632 bool verifyInstruction(const MachineInstr &MI,
633 StringRef &ErrInfo) const override;
634};
635
636struct UsedNZCV {
637 bool N = false;
638 bool Z = false;
639 bool C = false;
640 bool V = false;
641
642 UsedNZCV() = default;
643
644 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
645 this->N |= UsedFlags.N;
646 this->Z |= UsedFlags.Z;
647 this->C |= UsedFlags.C;
648 this->V |= UsedFlags.V;
649 return *this;
650 }
651};
652
653/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
654/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
655/// \returns std::nullopt otherwise.
656///
657/// Collect instructions using that flags in \p CCUseInstrs if provided.
658std::optional<UsedNZCV>
659examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
660 const TargetRegisterInfo &TRI,
661 SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);
662
663/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
664/// which either reads or clobbers NZCV.
665bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
666 const MachineInstr &UseMI,
667 const TargetRegisterInfo *TRI);
668
669MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg,
670 unsigned Reg, const StackOffset &Offset,
671 bool LastAdjustmentWasScalable = true);
672MCCFIInstruction
673createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg,
674 const StackOffset &OffsetFromDefCFA,
675 std::optional<int64_t> IncomingVGOffsetFromDefCFA);
676
677/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
678/// plus Offset. This is intended to be used from within the prolog/epilog
679/// insertion (PEI) pass, where a virtual scratch register may be allocated
680/// if necessary, to be replaced by the scavenger at the end of PEI.
681void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
682 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
683 StackOffset Offset, const TargetInstrInfo *TII,
684 MachineInstr::MIFlag = MachineInstr::NoFlags,
685 bool SetNZCV = false, bool NeedsWinCFI = false,
686 bool *HasWinCFI = nullptr, bool EmitCFAOffset = false,
687 StackOffset InitialOffset = {},
688 unsigned FrameReg = AArch64::SP);
689
690/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
691/// FP. Return false if the offset could not be handled directly in MI, and
692/// return the left-over portion by reference.
693bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
694 unsigned FrameReg, StackOffset &Offset,
695 const AArch64InstrInfo *TII);
696
697/// Use to report the frame offset status in isAArch64FrameOffsetLegal.
698enum AArch64FrameOffsetStatus {
699 AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
700 AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
701 AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
702};
703
704/// Check if the @p Offset is a valid frame offset for @p MI.
705/// The returned value reports the validity of the frame offset for @p MI.
706/// It uses the values defined by AArch64FrameOffsetStatus for that.
707/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
708/// use an offset.eq
709/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
710/// rewritten in @p MI.
711/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
712/// amount that is off the limit of the legal offset.
713/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
714/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
715/// If set, @p EmittableOffset contains the amount that can be set in @p MI
716/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
717/// is a legal offset.
718int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset,
719 bool *OutUseUnscaledOp = nullptr,
720 unsigned *OutUnscaledOp = nullptr,
721 int64_t *EmittableOffset = nullptr);
722
723bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII);
724
725static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
726
727static inline bool isCondBranchOpcode(int Opc) {
728 switch (Opc) {
729 case AArch64::Bcc:
730 case AArch64::CBZW:
731 case AArch64::CBZX:
732 case AArch64::CBNZW:
733 case AArch64::CBNZX:
734 case AArch64::TBZW:
735 case AArch64::TBZX:
736 case AArch64::TBNZW:
737 case AArch64::TBNZX:
738 case AArch64::CBWPri:
739 case AArch64::CBXPri:
740 case AArch64::CBBAssertExt:
741 case AArch64::CBHAssertExt:
742 case AArch64::CBWPrr:
743 case AArch64::CBXPrr:
744 return true;
745 default:
746 return false;
747 }
748}
749
750static inline bool isIndirectBranchOpcode(int Opc) {
751 switch (Opc) {
752 case AArch64::BR:
753 case AArch64::BRAA:
754 case AArch64::BRAB:
755 case AArch64::BRAAZ:
756 case AArch64::BRABZ:
757 return true;
758 }
759 return false;
760}
761
762static inline bool isIndirectCallOpcode(unsigned Opc) {
763 switch (Opc) {
764 case AArch64::BLR:
765 case AArch64::BLRAA:
766 case AArch64::BLRAB:
767 case AArch64::BLRAAZ:
768 case AArch64::BLRABZ:
769 return true;
770 default:
771 return false;
772 }
773}
774
775static inline bool isPTrueOpcode(unsigned Opc) {
776 switch (Opc) {
777 case AArch64::PTRUE_B:
778 case AArch64::PTRUE_H:
779 case AArch64::PTRUE_S:
780 case AArch64::PTRUE_D:
781 return true;
782 default:
783 return false;
784 }
785}
786
787/// Return opcode to be used for indirect calls.
788unsigned getBLRCallOpcode(const MachineFunction &MF);
789
790/// Return XPAC opcode to be used for a ptrauth strip using the given key.
791static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) {
792 using namespace AArch64PACKey;
793 switch (K) {
794 case IA: case IB: return AArch64::XPACI;
795 case DA: case DB: return AArch64::XPACD;
796 }
797 llvm_unreachable("Unhandled AArch64PACKey::ID enum");
798}
799
800/// Return AUT opcode to be used for a ptrauth auth using the given key, or its
801/// AUT*Z variant that doesn't take a discriminator operand, using zero instead.
802static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) {
803 using namespace AArch64PACKey;
804 switch (K) {
805 case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA;
806 case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB;
807 case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA;
808 case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB;
809 }
810 llvm_unreachable("Unhandled AArch64PACKey::ID enum");
811}
812
813/// Return PAC opcode to be used for a ptrauth sign using the given key, or its
814/// PAC*Z variant that doesn't take a discriminator operand, using zero instead.
815static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) {
816 using namespace AArch64PACKey;
817 switch (K) {
818 case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA;
819 case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB;
820 case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA;
821 case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB;
822 }
823 llvm_unreachable("Unhandled AArch64PACKey::ID enum");
824}
825
826/// Return B(L)RA opcode to be used for an authenticated branch or call using
827/// the given key, or its B(L)RA*Z variant that doesn't take a discriminator
828/// operand, using zero instead.
829static inline unsigned getBranchOpcodeForKey(bool IsCall, AArch64PACKey::ID K,
830 bool Zero) {
831 using namespace AArch64PACKey;
832 static const unsigned BranchOpcode[2][2] = {
833 {AArch64::BRAA, AArch64::BRAAZ},
834 {AArch64::BRAB, AArch64::BRABZ},
835 };
836 static const unsigned CallOpcode[2][2] = {
837 {AArch64::BLRAA, AArch64::BLRAAZ},
838 {AArch64::BLRAB, AArch64::BLRABZ},
839 };
840
841 assert((K == IA || K == IB) && "B(L)RA* instructions require IA or IB key");
842 if (IsCall)
843 return CallOpcode[K == IB][Zero];
844 return BranchOpcode[K == IB][Zero];
845}
846
847// struct TSFlags {
848#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
849#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits
850#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
851#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
852#define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits
853// }
854
855namespace AArch64 {
856
857// clang-format off
858enum ElementSizeType {
859 ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7),
860 ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0),
861 ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1),
862 ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2),
863 ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3),
864 ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4),
865};
866
867enum DestructiveInstType {
868 DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
869 NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
870 DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
871 DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
872 DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
873 DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
874 DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
875 DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
876 DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
877 DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
878 Destructive2xRegImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9),
879 DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0xa),
880 DestructivePredicate = TSFLAG_DESTRUCTIVE_INST_TYPE(0xb),
881 DestructiveBinaryImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0xc),
882};
883
884enum FalseLaneType {
885 FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3),
886 FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1),
887 FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
888};
889
890// clang-format on
891
892// NOTE: This is a bit field.
893static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1);
894static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
895
896enum SMEMatrixType {
897 SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7),
898 SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0),
899 SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1),
900 SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2),
901 SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3),
902 SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4),
903 SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5),
904 SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6),
905};
906
907#undef TSFLAG_ELEMENT_SIZE_TYPE
908#undef TSFLAG_DESTRUCTIVE_INST_TYPE
909#undef TSFLAG_FALSE_LANE_TYPE
910#undef TSFLAG_INSTR_FLAGS
911#undef TSFLAG_SME_MATRIX_TYPE
912
913int32_t getSVEPseudoMap(uint32_t Opcode);
914int32_t getSVERevInstr(uint32_t Opcode);
915int32_t getSVENonRevInstr(uint32_t Opcode);
916
917int32_t getSMEPseudoMap(uint32_t Opcode);
918}
919
920} // end namespace llvm
921
922#endif
923