1 | //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the AArch64 implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H |
14 | #define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H |
15 | |
16 | #include "AArch64.h" |
17 | #include "AArch64RegisterInfo.h" |
18 | #include "llvm/CodeGen/TargetInstrInfo.h" |
19 | #include "llvm/Support/TypeSize.h" |
20 | #include <optional> |
21 | |
22 | #define |
23 | #include "AArch64GenInstrInfo.inc" |
24 | |
25 | namespace llvm { |
26 | |
27 | class AArch64Subtarget; |
28 | |
29 | static const MachineMemOperand::Flags MOSuppressPair = |
30 | MachineMemOperand::MOTargetFlag1; |
31 | static const MachineMemOperand::Flags MOStridedAccess = |
32 | MachineMemOperand::MOTargetFlag2; |
33 | |
34 | #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" |
35 | |
36 | // AArch64 MachineCombiner patterns |
37 | enum AArch64MachineCombinerPattern : unsigned { |
38 | // These are patterns used to reduce the length of dependence chain. |
39 | SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START, |
40 | SUBADD_OP2, |
41 | |
42 | // These are multiply-add patterns matched by the AArch64 machine combiner. |
43 | MULADDW_OP1, |
44 | MULADDW_OP2, |
45 | MULSUBW_OP1, |
46 | MULSUBW_OP2, |
47 | MULADDWI_OP1, |
48 | MULSUBWI_OP1, |
49 | MULADDX_OP1, |
50 | MULADDX_OP2, |
51 | MULSUBX_OP1, |
52 | MULSUBX_OP2, |
53 | MULADDXI_OP1, |
54 | MULSUBXI_OP1, |
55 | // NEON integers vectors |
56 | MULADDv8i8_OP1, |
57 | MULADDv8i8_OP2, |
58 | MULADDv16i8_OP1, |
59 | MULADDv16i8_OP2, |
60 | MULADDv4i16_OP1, |
61 | MULADDv4i16_OP2, |
62 | MULADDv8i16_OP1, |
63 | MULADDv8i16_OP2, |
64 | MULADDv2i32_OP1, |
65 | MULADDv2i32_OP2, |
66 | MULADDv4i32_OP1, |
67 | MULADDv4i32_OP2, |
68 | |
69 | MULSUBv8i8_OP1, |
70 | MULSUBv8i8_OP2, |
71 | MULSUBv16i8_OP1, |
72 | MULSUBv16i8_OP2, |
73 | MULSUBv4i16_OP1, |
74 | MULSUBv4i16_OP2, |
75 | MULSUBv8i16_OP1, |
76 | MULSUBv8i16_OP2, |
77 | MULSUBv2i32_OP1, |
78 | MULSUBv2i32_OP2, |
79 | MULSUBv4i32_OP1, |
80 | MULSUBv4i32_OP2, |
81 | |
82 | MULADDv4i16_indexed_OP1, |
83 | MULADDv4i16_indexed_OP2, |
84 | MULADDv8i16_indexed_OP1, |
85 | MULADDv8i16_indexed_OP2, |
86 | MULADDv2i32_indexed_OP1, |
87 | MULADDv2i32_indexed_OP2, |
88 | MULADDv4i32_indexed_OP1, |
89 | MULADDv4i32_indexed_OP2, |
90 | |
91 | MULSUBv4i16_indexed_OP1, |
92 | MULSUBv4i16_indexed_OP2, |
93 | MULSUBv8i16_indexed_OP1, |
94 | MULSUBv8i16_indexed_OP2, |
95 | MULSUBv2i32_indexed_OP1, |
96 | MULSUBv2i32_indexed_OP2, |
97 | MULSUBv4i32_indexed_OP1, |
98 | MULSUBv4i32_indexed_OP2, |
99 | |
100 | // Floating Point |
101 | FMULADDH_OP1, |
102 | FMULADDH_OP2, |
103 | FMULSUBH_OP1, |
104 | FMULSUBH_OP2, |
105 | FMULADDS_OP1, |
106 | FMULADDS_OP2, |
107 | FMULSUBS_OP1, |
108 | FMULSUBS_OP2, |
109 | FMULADDD_OP1, |
110 | FMULADDD_OP2, |
111 | FMULSUBD_OP1, |
112 | FMULSUBD_OP2, |
113 | FNMULSUBH_OP1, |
114 | FNMULSUBS_OP1, |
115 | FNMULSUBD_OP1, |
116 | FMLAv1i32_indexed_OP1, |
117 | FMLAv1i32_indexed_OP2, |
118 | FMLAv1i64_indexed_OP1, |
119 | FMLAv1i64_indexed_OP2, |
120 | FMLAv4f16_OP1, |
121 | FMLAv4f16_OP2, |
122 | FMLAv8f16_OP1, |
123 | FMLAv8f16_OP2, |
124 | FMLAv2f32_OP2, |
125 | FMLAv2f32_OP1, |
126 | FMLAv2f64_OP1, |
127 | FMLAv2f64_OP2, |
128 | FMLAv4i16_indexed_OP1, |
129 | FMLAv4i16_indexed_OP2, |
130 | FMLAv8i16_indexed_OP1, |
131 | FMLAv8i16_indexed_OP2, |
132 | FMLAv2i32_indexed_OP1, |
133 | FMLAv2i32_indexed_OP2, |
134 | FMLAv2i64_indexed_OP1, |
135 | FMLAv2i64_indexed_OP2, |
136 | FMLAv4f32_OP1, |
137 | FMLAv4f32_OP2, |
138 | FMLAv4i32_indexed_OP1, |
139 | FMLAv4i32_indexed_OP2, |
140 | FMLSv1i32_indexed_OP2, |
141 | FMLSv1i64_indexed_OP2, |
142 | FMLSv4f16_OP1, |
143 | FMLSv4f16_OP2, |
144 | FMLSv8f16_OP1, |
145 | FMLSv8f16_OP2, |
146 | FMLSv2f32_OP1, |
147 | FMLSv2f32_OP2, |
148 | FMLSv2f64_OP1, |
149 | FMLSv2f64_OP2, |
150 | FMLSv4i16_indexed_OP1, |
151 | FMLSv4i16_indexed_OP2, |
152 | FMLSv8i16_indexed_OP1, |
153 | FMLSv8i16_indexed_OP2, |
154 | FMLSv2i32_indexed_OP1, |
155 | FMLSv2i32_indexed_OP2, |
156 | FMLSv2i64_indexed_OP1, |
157 | FMLSv2i64_indexed_OP2, |
158 | FMLSv4f32_OP1, |
159 | FMLSv4f32_OP2, |
160 | FMLSv4i32_indexed_OP1, |
161 | FMLSv4i32_indexed_OP2, |
162 | |
163 | FMULv2i32_indexed_OP1, |
164 | FMULv2i32_indexed_OP2, |
165 | FMULv2i64_indexed_OP1, |
166 | FMULv2i64_indexed_OP2, |
167 | FMULv4i16_indexed_OP1, |
168 | FMULv4i16_indexed_OP2, |
169 | FMULv4i32_indexed_OP1, |
170 | FMULv4i32_indexed_OP2, |
171 | FMULv8i16_indexed_OP1, |
172 | FMULv8i16_indexed_OP2, |
173 | |
174 | FNMADD, |
175 | }; |
176 | class AArch64InstrInfo final : public AArch64GenInstrInfo { |
177 | const AArch64RegisterInfo RI; |
178 | const AArch64Subtarget &Subtarget; |
179 | |
180 | public: |
181 | explicit AArch64InstrInfo(const AArch64Subtarget &STI); |
182 | |
183 | /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As |
184 | /// such, whenever a client has an instance of instruction info, it should |
185 | /// always be able to get register info as well (through this method). |
186 | const AArch64RegisterInfo &getRegisterInfo() const { return RI; } |
187 | |
188 | unsigned getInstSizeInBytes(const MachineInstr &MI) const override; |
189 | |
190 | bool isAsCheapAsAMove(const MachineInstr &MI) const override; |
191 | |
192 | bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, |
193 | Register &DstReg, unsigned &SubIdx) const override; |
194 | |
195 | bool |
196 | areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
197 | const MachineInstr &MIb) const override; |
198 | |
199 | Register isLoadFromStackSlot(const MachineInstr &MI, |
200 | int &FrameIndex) const override; |
201 | Register isStoreToStackSlot(const MachineInstr &MI, |
202 | int &FrameIndex) const override; |
203 | |
204 | /// Does this instruction set its full destination register to zero? |
205 | static bool isGPRZero(const MachineInstr &MI); |
206 | |
207 | /// Does this instruction rename a GPR without modifying bits? |
208 | static bool isGPRCopy(const MachineInstr &MI); |
209 | |
210 | /// Does this instruction rename an FPR without modifying bits? |
211 | static bool isFPRCopy(const MachineInstr &MI); |
212 | |
213 | /// Return true if pairing the given load or store is hinted to be |
214 | /// unprofitable. |
215 | static bool isLdStPairSuppressed(const MachineInstr &MI); |
216 | |
217 | /// Return true if the given load or store is a strided memory access. |
218 | static bool isStridedAccess(const MachineInstr &MI); |
219 | |
220 | /// Return true if it has an unscaled load/store offset. |
221 | static bool hasUnscaledLdStOffset(unsigned Opc); |
222 | static bool hasUnscaledLdStOffset(MachineInstr &MI) { |
223 | return hasUnscaledLdStOffset(Opc: MI.getOpcode()); |
224 | } |
225 | |
226 | /// Returns the unscaled load/store for the scaled load/store opcode, |
227 | /// if there is a corresponding unscaled variant available. |
228 | static std::optional<unsigned> getUnscaledLdSt(unsigned Opc); |
229 | |
230 | /// Scaling factor for (scaled or unscaled) load or store. |
231 | static int getMemScale(unsigned Opc); |
232 | static int getMemScale(const MachineInstr &MI) { |
233 | return getMemScale(Opc: MI.getOpcode()); |
234 | } |
235 | |
236 | /// Returns whether the instruction is a pre-indexed load. |
237 | static bool isPreLd(const MachineInstr &MI); |
238 | |
239 | /// Returns whether the instruction is a pre-indexed store. |
240 | static bool isPreSt(const MachineInstr &MI); |
241 | |
242 | /// Returns whether the instruction is a pre-indexed load/store. |
243 | static bool isPreLdSt(const MachineInstr &MI); |
244 | |
245 | /// Returns whether the instruction is a paired load/store. |
246 | static bool isPairedLdSt(const MachineInstr &MI); |
247 | |
248 | /// Returns the base register operator of a load/store. |
249 | static const MachineOperand &getLdStBaseOp(const MachineInstr &MI); |
250 | |
251 | /// Returns the immediate offset operator of a load/store. |
252 | static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); |
253 | |
254 | /// Returns whether the physical register is FP or NEON. |
255 | static bool isFpOrNEON(Register Reg); |
256 | |
257 | /// Returns whether the instruction is FP or NEON. |
258 | static bool isFpOrNEON(const MachineInstr &MI); |
259 | |
260 | /// Returns whether the instruction is in H form (16 bit operands) |
261 | static bool isHForm(const MachineInstr &MI); |
262 | |
263 | /// Returns whether the instruction is in Q form (128 bit operands) |
264 | static bool isQForm(const MachineInstr &MI); |
265 | |
266 | /// Returns whether the instruction can be compatible with non-zero BTYPE. |
267 | static bool hasBTISemantics(const MachineInstr &MI); |
268 | |
269 | /// Returns the index for the immediate for a given instruction. |
270 | static unsigned getLoadStoreImmIdx(unsigned Opc); |
271 | |
272 | /// Return true if pairing the given load or store may be paired with another. |
273 | static bool isPairableLdStInst(const MachineInstr &MI); |
274 | |
275 | /// Returns true if MI is one of the TCRETURN* instructions. |
276 | static bool isTailCallReturnInst(const MachineInstr &MI); |
277 | |
278 | /// Return the opcode that set flags when possible. The caller is |
279 | /// responsible for ensuring the opc has a flag setting equivalent. |
280 | static unsigned convertToFlagSettingOpc(unsigned Opc); |
281 | |
282 | /// Return true if this is a load/store that can be potentially paired/merged. |
283 | bool isCandidateToMergeOrPair(const MachineInstr &MI) const; |
284 | |
285 | /// Hint that pairing the given load or store is unprofitable. |
286 | static void suppressLdStPair(MachineInstr &MI); |
287 | |
288 | std::optional<ExtAddrMode> |
289 | getAddrModeFromMemoryOp(const MachineInstr &MemI, |
290 | const TargetRegisterInfo *TRI) const override; |
291 | |
292 | bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, |
293 | const MachineInstr &AddrI, |
294 | ExtAddrMode &AM) const override; |
295 | |
296 | MachineInstr *emitLdStWithAddr(MachineInstr &MemI, |
297 | const ExtAddrMode &AM) const override; |
298 | |
299 | bool getMemOperandsWithOffsetWidth( |
300 | const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps, |
301 | int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, |
302 | const TargetRegisterInfo *TRI) const override; |
303 | |
304 | /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`. |
305 | /// This is true for some SVE instructions like ldr/str that have a |
306 | /// 'reg + imm' addressing mode where the immediate is an index to the |
307 | /// scalable vector located at 'reg + imm * vscale x #bytes'. |
308 | bool getMemOperandWithOffsetWidth(const MachineInstr &MI, |
309 | const MachineOperand *&BaseOp, |
310 | int64_t &Offset, bool &OffsetIsScalable, |
311 | TypeSize &Width, |
312 | const TargetRegisterInfo *TRI) const; |
313 | |
314 | /// Return the immediate offset of the base register in a load/store \p LdSt. |
315 | MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; |
316 | |
317 | /// Returns true if opcode \p Opc is a memory operation. If it is, set |
318 | /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. |
319 | /// |
320 | /// For unscaled instructions, \p Scale is set to 1. |
321 | static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, |
322 | int64_t &MinOffset, int64_t &MaxOffset); |
323 | |
324 | bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
325 | int64_t Offset1, bool OffsetIsScalable1, |
326 | ArrayRef<const MachineOperand *> BaseOps2, |
327 | int64_t Offset2, bool OffsetIsScalable2, |
328 | unsigned ClusterSize, |
329 | unsigned NumBytes) const override; |
330 | |
331 | void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
332 | const DebugLoc &DL, MCRegister DestReg, |
333 | MCRegister SrcReg, bool KillSrc, unsigned Opcode, |
334 | llvm::ArrayRef<unsigned> Indices) const; |
335 | void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
336 | DebugLoc DL, unsigned DestReg, unsigned SrcReg, |
337 | bool KillSrc, unsigned Opcode, unsigned ZeroReg, |
338 | llvm::ArrayRef<unsigned> Indices) const; |
339 | void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
340 | const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, |
341 | bool KillSrc) const override; |
342 | |
343 | void storeRegToStackSlot(MachineBasicBlock &MBB, |
344 | MachineBasicBlock::iterator MBBI, Register SrcReg, |
345 | bool isKill, int FrameIndex, |
346 | const TargetRegisterClass *RC, |
347 | const TargetRegisterInfo *TRI, |
348 | Register VReg) const override; |
349 | |
350 | void loadRegFromStackSlot(MachineBasicBlock &MBB, |
351 | MachineBasicBlock::iterator MBBI, Register DestReg, |
352 | int FrameIndex, const TargetRegisterClass *RC, |
353 | const TargetRegisterInfo *TRI, |
354 | Register VReg) const override; |
355 | |
356 | // This tells target independent code that it is okay to pass instructions |
357 | // with subreg operands to foldMemoryOperandImpl. |
358 | bool isSubregFoldable() const override { return true; } |
359 | |
360 | using TargetInstrInfo::foldMemoryOperandImpl; |
361 | MachineInstr * |
362 | foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, |
363 | ArrayRef<unsigned> Ops, |
364 | MachineBasicBlock::iterator InsertPt, int FrameIndex, |
365 | LiveIntervals *LIS = nullptr, |
366 | VirtRegMap *VRM = nullptr) const override; |
367 | |
368 | /// \returns true if a branch from an instruction with opcode \p BranchOpc |
369 | /// bytes is capable of jumping to a position \p BrOffset bytes away. |
370 | bool isBranchOffsetInRange(unsigned BranchOpc, |
371 | int64_t BrOffset) const override; |
372 | |
373 | MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; |
374 | |
375 | void insertIndirectBranch(MachineBasicBlock &MBB, |
376 | MachineBasicBlock &NewDestBB, |
377 | MachineBasicBlock &RestoreBB, const DebugLoc &DL, |
378 | int64_t BrOffset, RegScavenger *RS) const override; |
379 | |
380 | bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
381 | MachineBasicBlock *&FBB, |
382 | SmallVectorImpl<MachineOperand> &Cond, |
383 | bool AllowModify = false) const override; |
384 | bool analyzeBranchPredicate(MachineBasicBlock &MBB, |
385 | MachineBranchPredicate &MBP, |
386 | bool AllowModify) const override; |
387 | unsigned removeBranch(MachineBasicBlock &MBB, |
388 | int *BytesRemoved = nullptr) const override; |
389 | unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, |
390 | MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, |
391 | const DebugLoc &DL, |
392 | int *BytesAdded = nullptr) const override; |
393 | |
394 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
395 | analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; |
396 | |
397 | bool |
398 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; |
399 | bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, |
400 | Register, Register, Register, int &, int &, |
401 | int &) const override; |
402 | void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
403 | const DebugLoc &DL, Register DstReg, |
404 | ArrayRef<MachineOperand> Cond, Register TrueReg, |
405 | Register FalseReg) const override; |
406 | |
407 | void insertNoop(MachineBasicBlock &MBB, |
408 | MachineBasicBlock::iterator MI) const override; |
409 | |
410 | MCInst getNop() const override; |
411 | |
412 | bool isSchedulingBoundary(const MachineInstr &MI, |
413 | const MachineBasicBlock *MBB, |
414 | const MachineFunction &MF) const override; |
415 | |
416 | /// analyzeCompare - For a comparison instruction, return the source registers |
417 | /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. |
418 | /// Return true if the comparison instruction can be analyzed. |
419 | bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
420 | Register &SrcReg2, int64_t &CmpMask, |
421 | int64_t &CmpValue) const override; |
422 | /// optimizeCompareInstr - Convert the instruction supplying the argument to |
423 | /// the comparison into one that sets the zero bit in the flags register. |
424 | bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, |
425 | Register SrcReg2, int64_t CmpMask, int64_t CmpValue, |
426 | const MachineRegisterInfo *MRI) const override; |
427 | bool optimizeCondBranch(MachineInstr &MI) const override; |
428 | |
429 | CombinerObjective getCombinerObjective(unsigned Pattern) const override; |
430 | /// Return true when a code sequence can improve throughput. It |
431 | /// should be called only for instructions in loops. |
432 | /// \param Pattern - combiner pattern |
433 | bool isThroughputPattern(unsigned Pattern) const override; |
434 | /// Return true when there is potentially a faster code sequence |
435 | /// for an instruction chain ending in ``Root``. All potential patterns are |
436 | /// listed in the ``Patterns`` array. |
437 | bool getMachineCombinerPatterns(MachineInstr &Root, |
438 | SmallVectorImpl<unsigned> &Patterns, |
439 | bool DoRegPressureReduce) const override; |
440 | /// Return true when Inst is associative and commutative so that it can be |
441 | /// reassociated. If Invert is true, then the inverse of Inst operation must |
442 | /// be checked. |
443 | bool isAssociativeAndCommutative(const MachineInstr &Inst, |
444 | bool Invert) const override; |
445 | /// When getMachineCombinerPatterns() finds patterns, this function generates |
446 | /// the instructions that could replace the original code sequence |
447 | void genAlternativeCodeSequence( |
448 | MachineInstr &Root, unsigned Pattern, |
449 | SmallVectorImpl<MachineInstr *> &InsInstrs, |
450 | SmallVectorImpl<MachineInstr *> &DelInstrs, |
451 | DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override; |
452 | /// AArch64 supports MachineCombiner. |
453 | bool useMachineCombiner() const override; |
454 | |
455 | bool expandPostRAPseudo(MachineInstr &MI) const override; |
456 | |
457 | std::pair<unsigned, unsigned> |
458 | decomposeMachineOperandsTargetFlags(unsigned TF) const override; |
459 | ArrayRef<std::pair<unsigned, const char *>> |
460 | getSerializableDirectMachineOperandTargetFlags() const override; |
461 | ArrayRef<std::pair<unsigned, const char *>> |
462 | getSerializableBitmaskMachineOperandTargetFlags() const override; |
463 | ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> |
464 | getSerializableMachineMemOperandTargetFlags() const override; |
465 | |
466 | bool isFunctionSafeToOutlineFrom(MachineFunction &MF, |
467 | bool OutlineFromLinkOnceODRs) const override; |
468 | std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo( |
469 | std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override; |
470 | void mergeOutliningCandidateAttributes( |
471 | Function &F, std::vector<outliner::Candidate> &Candidates) const override; |
472 | outliner::InstrType |
473 | getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; |
474 | SmallVector< |
475 | std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> |
476 | getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override; |
477 | void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, |
478 | const outliner::OutlinedFunction &OF) const override; |
479 | MachineBasicBlock::iterator |
480 | insertOutlinedCall(Module &M, MachineBasicBlock &MBB, |
481 | MachineBasicBlock::iterator &It, MachineFunction &MF, |
482 | outliner::Candidate &C) const override; |
483 | bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; |
484 | |
485 | void buildClearRegister(Register Reg, MachineBasicBlock &MBB, |
486 | MachineBasicBlock::iterator Iter, DebugLoc &DL, |
487 | bool AllowSideEffects = true) const override; |
488 | |
489 | /// Returns the vector element size (B, H, S or D) of an SVE opcode. |
490 | uint64_t getElementSizeForOpcode(unsigned Opc) const; |
491 | /// Returns true if the opcode is for an SVE instruction that sets the |
492 | /// condition codes as if it's results had been fed to a PTEST instruction |
493 | /// along with the same general predicate. |
494 | bool isPTestLikeOpcode(unsigned Opc) const; |
495 | /// Returns true if the opcode is for an SVE WHILE## instruction. |
496 | bool isWhileOpcode(unsigned Opc) const; |
497 | /// Returns true if the instruction has a shift by immediate that can be |
498 | /// executed in one cycle less. |
499 | static bool isFalkorShiftExtFast(const MachineInstr &MI); |
500 | /// Return true if the instructions is a SEH instruciton used for unwinding |
501 | /// on Windows. |
502 | static bool isSEHInstruction(const MachineInstr &MI); |
503 | |
504 | std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI, |
505 | Register Reg) const override; |
506 | |
507 | bool isFunctionSafeToSplit(const MachineFunction &MF) const override; |
508 | |
509 | bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override; |
510 | |
511 | std::optional<ParamLoadedValue> |
512 | describeLoadedValue(const MachineInstr &MI, Register Reg) const override; |
513 | |
514 | unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; |
515 | |
516 | bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, |
517 | MachineRegisterInfo &MRI) const override; |
518 | |
519 | static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, |
520 | int64_t &NumBytes, |
521 | int64_t &NumPredicateVectors, |
522 | int64_t &NumDataVectors); |
523 | static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, |
524 | int64_t &ByteSized, |
525 | int64_t &VGSized); |
526 | |
527 | // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can |
528 | // be used for a load/store of NumBytes. BaseReg is always present and |
529 | // implicit. |
530 | bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, |
531 | unsigned Scale) const; |
532 | |
533 | // Decrement the SP, issuing probes along the way. `TargetReg` is the new top |
534 | // of the stack. `FrameSetup` is passed as true, if the allocation is a part |
535 | // of constructing the activation frame of a function. |
536 | MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, |
537 | Register TargetReg, |
538 | bool FrameSetup) const; |
539 | |
540 | #define GET_INSTRINFO_HELPER_DECLS |
541 | #include "AArch64GenInstrInfo.inc" |
542 | |
543 | protected: |
544 | /// If the specific machine instruction is an instruction that moves/copies |
545 | /// value from one register to another register return destination and source |
546 | /// registers as machine operands. |
547 | std::optional<DestSourcePair> |
548 | isCopyInstrImpl(const MachineInstr &MI) const override; |
549 | std::optional<DestSourcePair> |
550 | isCopyLikeInstrImpl(const MachineInstr &MI) const override; |
551 | |
552 | private: |
553 | unsigned getInstBundleLength(const MachineInstr &MI) const; |
554 | |
555 | /// Sets the offsets on outlined instructions in \p MBB which use SP |
556 | /// so that they will be valid post-outlining. |
557 | /// |
558 | /// \param MBB A \p MachineBasicBlock in an outlined function. |
559 | void fixupPostOutline(MachineBasicBlock &MBB) const; |
560 | |
561 | void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, |
562 | MachineBasicBlock *TBB, |
563 | ArrayRef<MachineOperand> Cond) const; |
564 | bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, |
565 | const MachineRegisterInfo &MRI) const; |
566 | bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, |
567 | int CmpValue, const MachineRegisterInfo &MRI) const; |
568 | |
569 | /// Returns an unused general-purpose register which can be used for |
570 | /// constructing an outlined call if one exists. Returns 0 otherwise. |
571 | Register findRegisterToSaveLRTo(outliner::Candidate &C) const; |
572 | |
573 | /// Remove a ptest of a predicate-generating operation that already sets, or |
574 | /// can be made to set, the condition codes in an identical manner |
575 | bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg, |
576 | unsigned PredReg, |
577 | const MachineRegisterInfo *MRI) const; |
578 | std::optional<unsigned> |
579 | canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask, |
580 | MachineInstr *Pred, const MachineRegisterInfo *MRI) const; |
581 | }; |
582 | |
583 | struct UsedNZCV { |
584 | bool N = false; |
585 | bool Z = false; |
586 | bool C = false; |
587 | bool V = false; |
588 | |
589 | UsedNZCV() = default; |
590 | |
591 | UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { |
592 | this->N |= UsedFlags.N; |
593 | this->Z |= UsedFlags.Z; |
594 | this->C |= UsedFlags.C; |
595 | this->V |= UsedFlags.V; |
596 | return *this; |
597 | } |
598 | }; |
599 | |
600 | /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV |
601 | /// flags are not alive in successors of the same \p CmpInstr and \p MI parent. |
602 | /// \returns std::nullopt otherwise. |
603 | /// |
604 | /// Collect instructions using that flags in \p CCUseInstrs if provided. |
605 | std::optional<UsedNZCV> |
606 | examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, |
607 | const TargetRegisterInfo &TRI, |
608 | SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr); |
609 | |
610 | /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI |
611 | /// which either reads or clobbers NZCV. |
612 | bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, |
613 | const MachineInstr &UseMI, |
614 | const TargetRegisterInfo *TRI); |
615 | |
616 | MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, |
617 | unsigned Reg, const StackOffset &Offset, |
618 | bool LastAdjustmentWasScalable = true); |
619 | MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, |
620 | const StackOffset &OffsetFromDefCFA); |
621 | |
622 | /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg |
623 | /// plus Offset. This is intended to be used from within the prolog/epilog |
624 | /// insertion (PEI) pass, where a virtual scratch register may be allocated |
625 | /// if necessary, to be replaced by the scavenger at the end of PEI. |
626 | void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
627 | const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, |
628 | StackOffset Offset, const TargetInstrInfo *TII, |
629 | MachineInstr::MIFlag = MachineInstr::NoFlags, |
630 | bool SetNZCV = false, bool NeedsWinCFI = false, |
631 | bool *HasWinCFI = nullptr, bool EmitCFAOffset = false, |
632 | StackOffset InitialOffset = {}, |
633 | unsigned FrameReg = AArch64::SP); |
634 | |
635 | /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the |
636 | /// FP. Return false if the offset could not be handled directly in MI, and |
637 | /// return the left-over portion by reference. |
638 | bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
639 | unsigned FrameReg, StackOffset &Offset, |
640 | const AArch64InstrInfo *TII); |
641 | |
642 | /// Use to report the frame offset status in isAArch64FrameOffsetLegal. |
643 | enum AArch64FrameOffsetStatus { |
644 | AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. |
645 | AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal. |
646 | AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. |
647 | }; |
648 | |
649 | /// Check if the @p Offset is a valid frame offset for @p MI. |
650 | /// The returned value reports the validity of the frame offset for @p MI. |
651 | /// It uses the values defined by AArch64FrameOffsetStatus for that. |
652 | /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to |
653 | /// use an offset.eq |
654 | /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be |
655 | /// rewritten in @p MI. |
656 | /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the |
657 | /// amount that is off the limit of the legal offset. |
658 | /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be |
659 | /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. |
660 | /// If set, @p EmittableOffset contains the amount that can be set in @p MI |
661 | /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that |
662 | /// is a legal offset. |
663 | int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, |
664 | bool *OutUseUnscaledOp = nullptr, |
665 | unsigned *OutUnscaledOp = nullptr, |
666 | int64_t *EmittableOffset = nullptr); |
667 | |
668 | static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } |
669 | |
670 | static inline bool isCondBranchOpcode(int Opc) { |
671 | switch (Opc) { |
672 | case AArch64::Bcc: |
673 | case AArch64::CBZW: |
674 | case AArch64::CBZX: |
675 | case AArch64::CBNZW: |
676 | case AArch64::CBNZX: |
677 | case AArch64::TBZW: |
678 | case AArch64::TBZX: |
679 | case AArch64::TBNZW: |
680 | case AArch64::TBNZX: |
681 | return true; |
682 | default: |
683 | return false; |
684 | } |
685 | } |
686 | |
687 | static inline bool isIndirectBranchOpcode(int Opc) { |
688 | switch (Opc) { |
689 | case AArch64::BR: |
690 | case AArch64::BRAA: |
691 | case AArch64::BRAB: |
692 | case AArch64::BRAAZ: |
693 | case AArch64::BRABZ: |
694 | return true; |
695 | } |
696 | return false; |
697 | } |
698 | |
699 | static inline bool isPTrueOpcode(unsigned Opc) { |
700 | switch (Opc) { |
701 | case AArch64::PTRUE_B: |
702 | case AArch64::PTRUE_H: |
703 | case AArch64::PTRUE_S: |
704 | case AArch64::PTRUE_D: |
705 | return true; |
706 | default: |
707 | return false; |
708 | } |
709 | } |
710 | |
711 | /// Return opcode to be used for indirect calls. |
712 | unsigned getBLRCallOpcode(const MachineFunction &MF); |
713 | |
714 | /// Return XPAC opcode to be used for a ptrauth strip using the given key. |
715 | static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) { |
716 | using namespace AArch64PACKey; |
717 | switch (K) { |
718 | case IA: case IB: return AArch64::XPACI; |
719 | case DA: case DB: return AArch64::XPACD; |
720 | } |
721 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
722 | } |
723 | |
724 | /// Return AUT opcode to be used for a ptrauth auth using the given key, or its |
725 | /// AUT*Z variant that doesn't take a discriminator operand, using zero instead. |
726 | static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) { |
727 | using namespace AArch64PACKey; |
728 | switch (K) { |
729 | case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA; |
730 | case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB; |
731 | case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA; |
732 | case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB; |
733 | } |
734 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
735 | } |
736 | |
737 | /// Return PAC opcode to be used for a ptrauth sign using the given key, or its |
738 | /// PAC*Z variant that doesn't take a discriminator operand, using zero instead. |
739 | static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) { |
740 | using namespace AArch64PACKey; |
741 | switch (K) { |
742 | case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA; |
743 | case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB; |
744 | case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA; |
745 | case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB; |
746 | } |
747 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
748 | } |
749 | |
750 | // struct TSFlags { |
751 | #define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits |
752 | #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits |
753 | #define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits |
754 | #define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits |
755 | #define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits |
756 | // } |
757 | |
758 | namespace AArch64 { |
759 | |
760 | enum ElementSizeType { |
761 | ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7), |
762 | ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0), |
763 | ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1), |
764 | ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2), |
765 | ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3), |
766 | ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4), |
767 | }; |
768 | |
769 | enum DestructiveInstType { |
770 | DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf), |
771 | NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0), |
772 | DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), |
773 | DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2), |
774 | DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3), |
775 | DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4), |
776 | DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5), |
777 | DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6), |
778 | DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7), |
779 | DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8), |
780 | DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9), |
781 | }; |
782 | |
783 | enum FalseLaneType { |
784 | FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3), |
785 | FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1), |
786 | FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2), |
787 | }; |
788 | |
789 | // NOTE: This is a bit field. |
790 | static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1); |
791 | static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); |
792 | |
793 | enum SMEMatrixType { |
794 | SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7), |
795 | SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0), |
796 | SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1), |
797 | SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2), |
798 | SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3), |
799 | SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4), |
800 | SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5), |
801 | SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6), |
802 | }; |
803 | |
804 | #undef TSFLAG_ELEMENT_SIZE_TYPE |
805 | #undef TSFLAG_DESTRUCTIVE_INST_TYPE |
806 | #undef TSFLAG_FALSE_LANE_TYPE |
807 | #undef TSFLAG_INSTR_FLAGS |
808 | #undef TSFLAG_SME_MATRIX_TYPE |
809 | |
810 | int getSVEPseudoMap(uint16_t Opcode); |
811 | int getSVERevInstr(uint16_t Opcode); |
812 | int getSVENonRevInstr(uint16_t Opcode); |
813 | |
814 | int getSMEPseudoMap(uint16_t Opcode); |
815 | } |
816 | |
817 | } // end namespace llvm |
818 | |
819 | #endif |
820 | |