1 | //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the AArch64 implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H |
14 | #define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H |
15 | |
16 | #include "AArch64.h" |
17 | #include "AArch64RegisterInfo.h" |
18 | #include "llvm/CodeGen/TargetInstrInfo.h" |
19 | #include "llvm/Support/TypeSize.h" |
20 | #include <optional> |
21 | |
22 | #define |
23 | #include "AArch64GenInstrInfo.inc" |
24 | |
25 | namespace llvm { |
26 | |
27 | class AArch64Subtarget; |
28 | |
29 | static const MachineMemOperand::Flags MOSuppressPair = |
30 | MachineMemOperand::MOTargetFlag1; |
31 | static const MachineMemOperand::Flags MOStridedAccess = |
32 | MachineMemOperand::MOTargetFlag2; |
33 | |
34 | #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" |
35 | |
36 | // AArch64 MachineCombiner patterns |
37 | enum AArch64MachineCombinerPattern : unsigned { |
38 | // These are patterns used to reduce the length of dependence chain. |
39 | SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START, |
40 | SUBADD_OP2, |
41 | |
42 | // These are multiply-add patterns matched by the AArch64 machine combiner. |
43 | MULADDW_OP1, |
44 | MULADDW_OP2, |
45 | MULSUBW_OP1, |
46 | MULSUBW_OP2, |
47 | MULADDWI_OP1, |
48 | MULSUBWI_OP1, |
49 | MULADDX_OP1, |
50 | MULADDX_OP2, |
51 | MULSUBX_OP1, |
52 | MULSUBX_OP2, |
53 | MULADDXI_OP1, |
54 | MULSUBXI_OP1, |
55 | // NEON integers vectors |
56 | MULADDv8i8_OP1, |
57 | MULADDv8i8_OP2, |
58 | MULADDv16i8_OP1, |
59 | MULADDv16i8_OP2, |
60 | MULADDv4i16_OP1, |
61 | MULADDv4i16_OP2, |
62 | MULADDv8i16_OP1, |
63 | MULADDv8i16_OP2, |
64 | MULADDv2i32_OP1, |
65 | MULADDv2i32_OP2, |
66 | MULADDv4i32_OP1, |
67 | MULADDv4i32_OP2, |
68 | |
69 | MULSUBv8i8_OP1, |
70 | MULSUBv8i8_OP2, |
71 | MULSUBv16i8_OP1, |
72 | MULSUBv16i8_OP2, |
73 | MULSUBv4i16_OP1, |
74 | MULSUBv4i16_OP2, |
75 | MULSUBv8i16_OP1, |
76 | MULSUBv8i16_OP2, |
77 | MULSUBv2i32_OP1, |
78 | MULSUBv2i32_OP2, |
79 | MULSUBv4i32_OP1, |
80 | MULSUBv4i32_OP2, |
81 | |
82 | MULADDv4i16_indexed_OP1, |
83 | MULADDv4i16_indexed_OP2, |
84 | MULADDv8i16_indexed_OP1, |
85 | MULADDv8i16_indexed_OP2, |
86 | MULADDv2i32_indexed_OP1, |
87 | MULADDv2i32_indexed_OP2, |
88 | MULADDv4i32_indexed_OP1, |
89 | MULADDv4i32_indexed_OP2, |
90 | |
91 | MULSUBv4i16_indexed_OP1, |
92 | MULSUBv4i16_indexed_OP2, |
93 | MULSUBv8i16_indexed_OP1, |
94 | MULSUBv8i16_indexed_OP2, |
95 | MULSUBv2i32_indexed_OP1, |
96 | MULSUBv2i32_indexed_OP2, |
97 | MULSUBv4i32_indexed_OP1, |
98 | MULSUBv4i32_indexed_OP2, |
99 | |
100 | // Floating Point |
101 | FMULADDH_OP1, |
102 | FMULADDH_OP2, |
103 | FMULSUBH_OP1, |
104 | FMULSUBH_OP2, |
105 | FMULADDS_OP1, |
106 | FMULADDS_OP2, |
107 | FMULSUBS_OP1, |
108 | FMULSUBS_OP2, |
109 | FMULADDD_OP1, |
110 | FMULADDD_OP2, |
111 | FMULSUBD_OP1, |
112 | FMULSUBD_OP2, |
113 | FNMULSUBH_OP1, |
114 | FNMULSUBS_OP1, |
115 | FNMULSUBD_OP1, |
116 | FMLAv1i32_indexed_OP1, |
117 | FMLAv1i32_indexed_OP2, |
118 | FMLAv1i64_indexed_OP1, |
119 | FMLAv1i64_indexed_OP2, |
120 | FMLAv4f16_OP1, |
121 | FMLAv4f16_OP2, |
122 | FMLAv8f16_OP1, |
123 | FMLAv8f16_OP2, |
124 | FMLAv2f32_OP2, |
125 | FMLAv2f32_OP1, |
126 | FMLAv2f64_OP1, |
127 | FMLAv2f64_OP2, |
128 | FMLAv4i16_indexed_OP1, |
129 | FMLAv4i16_indexed_OP2, |
130 | FMLAv8i16_indexed_OP1, |
131 | FMLAv8i16_indexed_OP2, |
132 | FMLAv2i32_indexed_OP1, |
133 | FMLAv2i32_indexed_OP2, |
134 | FMLAv2i64_indexed_OP1, |
135 | FMLAv2i64_indexed_OP2, |
136 | FMLAv4f32_OP1, |
137 | FMLAv4f32_OP2, |
138 | FMLAv4i32_indexed_OP1, |
139 | FMLAv4i32_indexed_OP2, |
140 | FMLSv1i32_indexed_OP2, |
141 | FMLSv1i64_indexed_OP2, |
142 | FMLSv4f16_OP1, |
143 | FMLSv4f16_OP2, |
144 | FMLSv8f16_OP1, |
145 | FMLSv8f16_OP2, |
146 | FMLSv2f32_OP1, |
147 | FMLSv2f32_OP2, |
148 | FMLSv2f64_OP1, |
149 | FMLSv2f64_OP2, |
150 | FMLSv4i16_indexed_OP1, |
151 | FMLSv4i16_indexed_OP2, |
152 | FMLSv8i16_indexed_OP1, |
153 | FMLSv8i16_indexed_OP2, |
154 | FMLSv2i32_indexed_OP1, |
155 | FMLSv2i32_indexed_OP2, |
156 | FMLSv2i64_indexed_OP1, |
157 | FMLSv2i64_indexed_OP2, |
158 | FMLSv4f32_OP1, |
159 | FMLSv4f32_OP2, |
160 | FMLSv4i32_indexed_OP1, |
161 | FMLSv4i32_indexed_OP2, |
162 | |
163 | FMULv2i32_indexed_OP1, |
164 | FMULv2i32_indexed_OP2, |
165 | FMULv2i64_indexed_OP1, |
166 | FMULv2i64_indexed_OP2, |
167 | FMULv4i16_indexed_OP1, |
168 | FMULv4i16_indexed_OP2, |
169 | FMULv4i32_indexed_OP1, |
170 | FMULv4i32_indexed_OP2, |
171 | FMULv8i16_indexed_OP1, |
172 | FMULv8i16_indexed_OP2, |
173 | |
174 | FNMADD, |
175 | }; |
176 | class AArch64InstrInfo final : public AArch64GenInstrInfo { |
177 | const AArch64RegisterInfo RI; |
178 | const AArch64Subtarget &Subtarget; |
179 | |
180 | public: |
181 | explicit AArch64InstrInfo(const AArch64Subtarget &STI); |
182 | |
183 | /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As |
184 | /// such, whenever a client has an instance of instruction info, it should |
185 | /// always be able to get register info as well (through this method). |
186 | const AArch64RegisterInfo &getRegisterInfo() const { return RI; } |
187 | |
188 | unsigned getInstSizeInBytes(const MachineInstr &MI) const override; |
189 | |
190 | bool isAsCheapAsAMove(const MachineInstr &MI) const override; |
191 | |
192 | bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, |
193 | Register &DstReg, unsigned &SubIdx) const override; |
194 | |
195 | bool |
196 | areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
197 | const MachineInstr &MIb) const override; |
198 | |
199 | Register isLoadFromStackSlot(const MachineInstr &MI, |
200 | int &FrameIndex) const override; |
201 | Register isStoreToStackSlot(const MachineInstr &MI, |
202 | int &FrameIndex) const override; |
203 | |
204 | /// Does this instruction set its full destination register to zero? |
205 | static bool isGPRZero(const MachineInstr &MI); |
206 | |
207 | /// Does this instruction rename a GPR without modifying bits? |
208 | static bool isGPRCopy(const MachineInstr &MI); |
209 | |
210 | /// Does this instruction rename an FPR without modifying bits? |
211 | static bool isFPRCopy(const MachineInstr &MI); |
212 | |
213 | /// Return true if pairing the given load or store is hinted to be |
214 | /// unprofitable. |
215 | static bool isLdStPairSuppressed(const MachineInstr &MI); |
216 | |
217 | /// Return true if the given load or store is a strided memory access. |
218 | static bool isStridedAccess(const MachineInstr &MI); |
219 | |
220 | /// Return true if it has an unscaled load/store offset. |
221 | static bool hasUnscaledLdStOffset(unsigned Opc); |
222 | static bool hasUnscaledLdStOffset(MachineInstr &MI) { |
223 | return hasUnscaledLdStOffset(Opc: MI.getOpcode()); |
224 | } |
225 | |
226 | /// Returns the unscaled load/store for the scaled load/store opcode, |
227 | /// if there is a corresponding unscaled variant available. |
228 | static std::optional<unsigned> getUnscaledLdSt(unsigned Opc); |
229 | |
230 | /// Scaling factor for (scaled or unscaled) load or store. |
231 | static int getMemScale(unsigned Opc); |
232 | static int getMemScale(const MachineInstr &MI) { |
233 | return getMemScale(Opc: MI.getOpcode()); |
234 | } |
235 | |
236 | /// Returns whether the instruction is a pre-indexed load. |
237 | static bool isPreLd(const MachineInstr &MI); |
238 | |
239 | /// Returns whether the instruction is a pre-indexed store. |
240 | static bool isPreSt(const MachineInstr &MI); |
241 | |
242 | /// Returns whether the instruction is a pre-indexed load/store. |
243 | static bool isPreLdSt(const MachineInstr &MI); |
244 | |
245 | /// Returns whether the instruction is a paired load/store. |
246 | static bool isPairedLdSt(const MachineInstr &MI); |
247 | |
248 | /// Returns the base register operator of a load/store. |
249 | static const MachineOperand &getLdStBaseOp(const MachineInstr &MI); |
250 | |
251 | /// Returns the immediate offset operator of a load/store. |
252 | static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); |
253 | |
254 | /// Returns whether the physical register is FP or NEON. |
255 | static bool isFpOrNEON(Register Reg); |
256 | |
257 | /// Returns the shift amount operator of a load/store. |
258 | static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); |
259 | |
260 | /// Returns whether the instruction is FP or NEON. |
261 | static bool isFpOrNEON(const MachineInstr &MI); |
262 | |
263 | /// Returns whether the instruction is in H form (16 bit operands) |
264 | static bool isHForm(const MachineInstr &MI); |
265 | |
266 | /// Returns whether the instruction is in Q form (128 bit operands) |
267 | static bool isQForm(const MachineInstr &MI); |
268 | |
269 | /// Returns whether the instruction can be compatible with non-zero BTYPE. |
270 | static bool hasBTISemantics(const MachineInstr &MI); |
271 | |
272 | /// Returns the index for the immediate for a given instruction. |
273 | static unsigned getLoadStoreImmIdx(unsigned Opc); |
274 | |
275 | /// Return true if pairing the given load or store may be paired with another. |
276 | static bool isPairableLdStInst(const MachineInstr &MI); |
277 | |
278 | /// Returns true if MI is one of the TCRETURN* instructions. |
279 | static bool isTailCallReturnInst(const MachineInstr &MI); |
280 | |
281 | /// Return the opcode that set flags when possible. The caller is |
282 | /// responsible for ensuring the opc has a flag setting equivalent. |
283 | static unsigned convertToFlagSettingOpc(unsigned Opc); |
284 | |
285 | /// Return true if this is a load/store that can be potentially paired/merged. |
286 | bool isCandidateToMergeOrPair(const MachineInstr &MI) const; |
287 | |
288 | /// Hint that pairing the given load or store is unprofitable. |
289 | static void suppressLdStPair(MachineInstr &MI); |
290 | |
291 | std::optional<ExtAddrMode> |
292 | getAddrModeFromMemoryOp(const MachineInstr &MemI, |
293 | const TargetRegisterInfo *TRI) const override; |
294 | |
295 | bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, |
296 | const MachineInstr &AddrI, |
297 | ExtAddrMode &AM) const override; |
298 | |
299 | MachineInstr *emitLdStWithAddr(MachineInstr &MemI, |
300 | const ExtAddrMode &AM) const override; |
301 | |
302 | bool getMemOperandsWithOffsetWidth( |
303 | const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps, |
304 | int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, |
305 | const TargetRegisterInfo *TRI) const override; |
306 | |
307 | /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`. |
308 | /// This is true for some SVE instructions like ldr/str that have a |
309 | /// 'reg + imm' addressing mode where the immediate is an index to the |
310 | /// scalable vector located at 'reg + imm * vscale x #bytes'. |
311 | bool getMemOperandWithOffsetWidth(const MachineInstr &MI, |
312 | const MachineOperand *&BaseOp, |
313 | int64_t &Offset, bool &OffsetIsScalable, |
314 | TypeSize &Width, |
315 | const TargetRegisterInfo *TRI) const; |
316 | |
317 | /// Return the immediate offset of the base register in a load/store \p LdSt. |
318 | MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; |
319 | |
320 | /// Returns true if opcode \p Opc is a memory operation. If it is, set |
321 | /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. |
322 | /// |
323 | /// For unscaled instructions, \p Scale is set to 1. All values are in bytes. |
324 | /// MinOffset/MaxOffset are the un-scaled limits of the immediate in the |
325 | /// instruction, the actual offset limit is [MinOffset*Scale, |
326 | /// MaxOffset*Scale]. |
327 | static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, |
328 | int64_t &MinOffset, int64_t &MaxOffset); |
329 | |
330 | bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
331 | int64_t Offset1, bool OffsetIsScalable1, |
332 | ArrayRef<const MachineOperand *> BaseOps2, |
333 | int64_t Offset2, bool OffsetIsScalable2, |
334 | unsigned ClusterSize, |
335 | unsigned NumBytes) const override; |
336 | |
337 | void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
338 | const DebugLoc &DL, MCRegister DestReg, |
339 | MCRegister SrcReg, bool KillSrc, unsigned Opcode, |
340 | llvm::ArrayRef<unsigned> Indices) const; |
341 | void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
342 | const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, |
343 | bool KillSrc, unsigned Opcode, unsigned ZeroReg, |
344 | llvm::ArrayRef<unsigned> Indices) const; |
345 | void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
346 | const DebugLoc &DL, Register DestReg, Register SrcReg, |
347 | bool KillSrc, bool RenamableDest = false, |
348 | bool RenamableSrc = false) const override; |
349 | |
350 | void storeRegToStackSlot( |
351 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, |
352 | bool isKill, int FrameIndex, const TargetRegisterClass *RC, |
353 | const TargetRegisterInfo *TRI, Register VReg, |
354 | MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; |
355 | |
356 | void loadRegFromStackSlot( |
357 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
358 | Register DestReg, int FrameIndex, const TargetRegisterClass *RC, |
359 | const TargetRegisterInfo *TRI, Register VReg, |
360 | MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; |
361 | |
362 | // This tells target independent code that it is okay to pass instructions |
363 | // with subreg operands to foldMemoryOperandImpl. |
364 | bool isSubregFoldable() const override { return true; } |
365 | |
366 | using TargetInstrInfo::foldMemoryOperandImpl; |
367 | MachineInstr * |
368 | foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, |
369 | ArrayRef<unsigned> Ops, |
370 | MachineBasicBlock::iterator InsertPt, int FrameIndex, |
371 | LiveIntervals *LIS = nullptr, |
372 | VirtRegMap *VRM = nullptr) const override; |
373 | |
374 | /// \returns true if a branch from an instruction with opcode \p BranchOpc |
375 | /// bytes is capable of jumping to a position \p BrOffset bytes away. |
376 | bool isBranchOffsetInRange(unsigned BranchOpc, |
377 | int64_t BrOffset) const override; |
378 | |
379 | MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; |
380 | |
381 | void insertIndirectBranch(MachineBasicBlock &MBB, |
382 | MachineBasicBlock &NewDestBB, |
383 | MachineBasicBlock &RestoreBB, const DebugLoc &DL, |
384 | int64_t BrOffset, RegScavenger *RS) const override; |
385 | |
386 | bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
387 | MachineBasicBlock *&FBB, |
388 | SmallVectorImpl<MachineOperand> &Cond, |
389 | bool AllowModify = false) const override; |
390 | bool analyzeBranchPredicate(MachineBasicBlock &MBB, |
391 | MachineBranchPredicate &MBP, |
392 | bool AllowModify) const override; |
393 | unsigned removeBranch(MachineBasicBlock &MBB, |
394 | int *BytesRemoved = nullptr) const override; |
395 | unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, |
396 | MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, |
397 | const DebugLoc &DL, |
398 | int *BytesAdded = nullptr) const override; |
399 | |
400 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
401 | analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; |
402 | |
403 | bool |
404 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; |
405 | bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, |
406 | Register, Register, Register, int &, int &, |
407 | int &) const override; |
408 | void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
409 | const DebugLoc &DL, Register DstReg, |
410 | ArrayRef<MachineOperand> Cond, Register TrueReg, |
411 | Register FalseReg) const override; |
412 | |
413 | void insertNoop(MachineBasicBlock &MBB, |
414 | MachineBasicBlock::iterator MI) const override; |
415 | |
416 | MCInst getNop() const override; |
417 | |
418 | bool isSchedulingBoundary(const MachineInstr &MI, |
419 | const MachineBasicBlock *MBB, |
420 | const MachineFunction &MF) const override; |
421 | |
422 | /// analyzeCompare - For a comparison instruction, return the source registers |
423 | /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. |
424 | /// Return true if the comparison instruction can be analyzed. |
425 | bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
426 | Register &SrcReg2, int64_t &CmpMask, |
427 | int64_t &CmpValue) const override; |
428 | /// optimizeCompareInstr - Convert the instruction supplying the argument to |
429 | /// the comparison into one that sets the zero bit in the flags register. |
430 | bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, |
431 | Register SrcReg2, int64_t CmpMask, int64_t CmpValue, |
432 | const MachineRegisterInfo *MRI) const override; |
433 | bool optimizeCondBranch(MachineInstr &MI) const override; |
434 | |
435 | CombinerObjective getCombinerObjective(unsigned Pattern) const override; |
436 | /// Return true when a code sequence can improve throughput. It |
437 | /// should be called only for instructions in loops. |
438 | /// \param Pattern - combiner pattern |
439 | bool isThroughputPattern(unsigned Pattern) const override; |
440 | /// Return true when there is potentially a faster code sequence |
441 | /// for an instruction chain ending in ``Root``. All potential patterns are |
442 | /// listed in the ``Patterns`` array. |
443 | bool getMachineCombinerPatterns(MachineInstr &Root, |
444 | SmallVectorImpl<unsigned> &Patterns, |
445 | bool DoRegPressureReduce) const override; |
446 | /// Return true when Inst is associative and commutative so that it can be |
447 | /// reassociated. If Invert is true, then the inverse of Inst operation must |
448 | /// be checked. |
449 | bool isAssociativeAndCommutative(const MachineInstr &Inst, |
450 | bool Invert) const override; |
451 | |
452 | /// Returns true if \P Opcode is an instruction which performs accumulation |
453 | /// into a destination register. |
454 | bool isAccumulationOpcode(unsigned Opcode) const override; |
455 | |
456 | /// Returns an opcode which defines the accumulator used by \P Opcode. |
457 | unsigned getAccumulationStartOpcode(unsigned Opcode) const override; |
458 | |
459 | unsigned |
460 | getReduceOpcodeForAccumulator(unsigned int AccumulatorOpCode) const override; |
461 | |
462 | /// When getMachineCombinerPatterns() finds patterns, this function |
463 | /// generates the instructions that could replace the original code |
464 | /// sequence |
465 | void genAlternativeCodeSequence( |
466 | MachineInstr &Root, unsigned Pattern, |
467 | SmallVectorImpl<MachineInstr *> &InsInstrs, |
468 | SmallVectorImpl<MachineInstr *> &DelInstrs, |
469 | DenseMap<Register, unsigned> &InstrIdxForVirtReg) const override; |
470 | /// AArch64 supports MachineCombiner. |
471 | bool useMachineCombiner() const override; |
472 | |
473 | bool expandPostRAPseudo(MachineInstr &MI) const override; |
474 | |
475 | std::pair<unsigned, unsigned> |
476 | decomposeMachineOperandsTargetFlags(unsigned TF) const override; |
477 | ArrayRef<std::pair<unsigned, const char *>> |
478 | getSerializableDirectMachineOperandTargetFlags() const override; |
479 | ArrayRef<std::pair<unsigned, const char *>> |
480 | getSerializableBitmaskMachineOperandTargetFlags() const override; |
481 | ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> |
482 | getSerializableMachineMemOperandTargetFlags() const override; |
483 | |
484 | bool isFunctionSafeToOutlineFrom(MachineFunction &MF, |
485 | bool OutlineFromLinkOnceODRs) const override; |
486 | std::optional<std::unique_ptr<outliner::OutlinedFunction>> |
487 | getOutliningCandidateInfo( |
488 | const MachineModuleInfo &MMI, |
489 | std::vector<outliner::Candidate> &RepeatedSequenceLocs, |
490 | unsigned MinRepeats) const override; |
491 | void mergeOutliningCandidateAttributes( |
492 | Function &F, std::vector<outliner::Candidate> &Candidates) const override; |
493 | outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, |
494 | MachineBasicBlock::iterator &MIT, |
495 | unsigned Flags) const override; |
496 | SmallVector< |
497 | std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> |
498 | getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override; |
499 | void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, |
500 | const outliner::OutlinedFunction &OF) const override; |
501 | MachineBasicBlock::iterator |
502 | insertOutlinedCall(Module &M, MachineBasicBlock &MBB, |
503 | MachineBasicBlock::iterator &It, MachineFunction &MF, |
504 | outliner::Candidate &C) const override; |
505 | bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; |
506 | |
507 | void buildClearRegister(Register Reg, MachineBasicBlock &MBB, |
508 | MachineBasicBlock::iterator Iter, DebugLoc &DL, |
509 | bool AllowSideEffects = true) const override; |
510 | |
511 | /// Returns the vector element size (B, H, S or D) of an SVE opcode. |
512 | uint64_t getElementSizeForOpcode(unsigned Opc) const; |
513 | /// Returns true if the opcode is for an SVE instruction that sets the |
514 | /// condition codes as if it's results had been fed to a PTEST instruction |
515 | /// along with the same general predicate. |
516 | bool isPTestLikeOpcode(unsigned Opc) const; |
517 | /// Returns true if the opcode is for an SVE WHILE## instruction. |
518 | bool isWhileOpcode(unsigned Opc) const; |
519 | /// Returns true if the instruction has a shift by immediate that can be |
520 | /// executed in one cycle less. |
521 | static bool isFalkorShiftExtFast(const MachineInstr &MI); |
522 | /// Return true if the instructions is a SEH instruction used for unwinding |
523 | /// on Windows. |
524 | static bool isSEHInstruction(const MachineInstr &MI); |
525 | |
526 | std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI, |
527 | Register Reg) const override; |
528 | |
529 | bool isFunctionSafeToSplit(const MachineFunction &MF) const override; |
530 | |
531 | bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override; |
532 | |
533 | std::optional<ParamLoadedValue> |
534 | describeLoadedValue(const MachineInstr &MI, Register Reg) const override; |
535 | |
536 | unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; |
537 | |
538 | bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, |
539 | MachineRegisterInfo &MRI) const override; |
540 | |
541 | static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, |
542 | int64_t &NumBytes, |
543 | int64_t &NumPredicateVectors, |
544 | int64_t &NumDataVectors); |
545 | static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, |
546 | int64_t &ByteSized, |
547 | int64_t &VGSized); |
548 | |
549 | // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can |
550 | // be used for a load/store of NumBytes. BaseReg is always present and |
551 | // implicit. |
552 | bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, |
553 | unsigned Scale) const; |
554 | |
555 | // Decrement the SP, issuing probes along the way. `TargetReg` is the new top |
556 | // of the stack. `FrameSetup` is passed as true, if the allocation is a part |
557 | // of constructing the activation frame of a function. |
558 | MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, |
559 | Register TargetReg, |
560 | bool FrameSetup) const; |
561 | |
562 | #define GET_INSTRINFO_HELPER_DECLS |
563 | #include "AArch64GenInstrInfo.inc" |
564 | |
565 | protected: |
566 | /// If the specific machine instruction is an instruction that moves/copies |
567 | /// value from one register to another register return destination and source |
568 | /// registers as machine operands. |
569 | std::optional<DestSourcePair> |
570 | isCopyInstrImpl(const MachineInstr &MI) const override; |
571 | std::optional<DestSourcePair> |
572 | isCopyLikeInstrImpl(const MachineInstr &MI) const override; |
573 | |
574 | private: |
575 | unsigned getInstBundleLength(const MachineInstr &MI) const; |
576 | |
577 | /// Sets the offsets on outlined instructions in \p MBB which use SP |
578 | /// so that they will be valid post-outlining. |
579 | /// |
580 | /// \param MBB A \p MachineBasicBlock in an outlined function. |
581 | void fixupPostOutline(MachineBasicBlock &MBB) const; |
582 | |
583 | void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, |
584 | MachineBasicBlock *TBB, |
585 | ArrayRef<MachineOperand> Cond) const; |
586 | bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, |
587 | const MachineRegisterInfo &MRI) const; |
588 | bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, |
589 | int CmpValue, const MachineRegisterInfo &MRI) const; |
590 | |
591 | /// Returns an unused general-purpose register which can be used for |
592 | /// constructing an outlined call if one exists. Returns 0 otherwise. |
593 | Register findRegisterToSaveLRTo(outliner::Candidate &C) const; |
594 | |
595 | /// Remove a ptest of a predicate-generating operation that already sets, or |
596 | /// can be made to set, the condition codes in an identical manner |
597 | bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg, |
598 | unsigned PredReg, |
599 | const MachineRegisterInfo *MRI) const; |
600 | std::optional<unsigned> |
601 | canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask, |
602 | MachineInstr *Pred, const MachineRegisterInfo *MRI) const; |
603 | |
604 | /// verifyInstruction - Perform target specific instruction verification. |
605 | bool verifyInstruction(const MachineInstr &MI, |
606 | StringRef &ErrInfo) const override; |
607 | }; |
608 | |
609 | struct UsedNZCV { |
610 | bool N = false; |
611 | bool Z = false; |
612 | bool C = false; |
613 | bool V = false; |
614 | |
615 | UsedNZCV() = default; |
616 | |
617 | UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { |
618 | this->N |= UsedFlags.N; |
619 | this->Z |= UsedFlags.Z; |
620 | this->C |= UsedFlags.C; |
621 | this->V |= UsedFlags.V; |
622 | return *this; |
623 | } |
624 | }; |
625 | |
626 | /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV |
627 | /// flags are not alive in successors of the same \p CmpInstr and \p MI parent. |
628 | /// \returns std::nullopt otherwise. |
629 | /// |
630 | /// Collect instructions using that flags in \p CCUseInstrs if provided. |
631 | std::optional<UsedNZCV> |
632 | examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, |
633 | const TargetRegisterInfo &TRI, |
634 | SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr); |
635 | |
636 | /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI |
637 | /// which either reads or clobbers NZCV. |
638 | bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, |
639 | const MachineInstr &UseMI, |
640 | const TargetRegisterInfo *TRI); |
641 | |
642 | MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, |
643 | unsigned Reg, const StackOffset &Offset, |
644 | bool LastAdjustmentWasScalable = true); |
645 | MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, |
646 | const StackOffset &OffsetFromDefCFA); |
647 | |
648 | /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg |
649 | /// plus Offset. This is intended to be used from within the prolog/epilog |
650 | /// insertion (PEI) pass, where a virtual scratch register may be allocated |
651 | /// if necessary, to be replaced by the scavenger at the end of PEI. |
652 | void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
653 | const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, |
654 | StackOffset Offset, const TargetInstrInfo *TII, |
655 | MachineInstr::MIFlag = MachineInstr::NoFlags, |
656 | bool SetNZCV = false, bool NeedsWinCFI = false, |
657 | bool *HasWinCFI = nullptr, bool EmitCFAOffset = false, |
658 | StackOffset InitialOffset = {}, |
659 | unsigned FrameReg = AArch64::SP); |
660 | |
661 | /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the |
662 | /// FP. Return false if the offset could not be handled directly in MI, and |
663 | /// return the left-over portion by reference. |
664 | bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
665 | unsigned FrameReg, StackOffset &Offset, |
666 | const AArch64InstrInfo *TII); |
667 | |
668 | /// Use to report the frame offset status in isAArch64FrameOffsetLegal. |
669 | enum AArch64FrameOffsetStatus { |
670 | AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. |
671 | AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal. |
672 | AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. |
673 | }; |
674 | |
675 | /// Check if the @p Offset is a valid frame offset for @p MI. |
676 | /// The returned value reports the validity of the frame offset for @p MI. |
677 | /// It uses the values defined by AArch64FrameOffsetStatus for that. |
678 | /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to |
679 | /// use an offset.eq |
680 | /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be |
681 | /// rewritten in @p MI. |
682 | /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the |
683 | /// amount that is off the limit of the legal offset. |
684 | /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be |
685 | /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. |
686 | /// If set, @p EmittableOffset contains the amount that can be set in @p MI |
687 | /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that |
688 | /// is a legal offset. |
689 | int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, |
690 | bool *OutUseUnscaledOp = nullptr, |
691 | unsigned *OutUnscaledOp = nullptr, |
692 | int64_t *EmittableOffset = nullptr); |
693 | |
694 | static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } |
695 | |
696 | static inline bool isCondBranchOpcode(int Opc) { |
697 | switch (Opc) { |
698 | case AArch64::Bcc: |
699 | case AArch64::CBZW: |
700 | case AArch64::CBZX: |
701 | case AArch64::CBNZW: |
702 | case AArch64::CBNZX: |
703 | case AArch64::TBZW: |
704 | case AArch64::TBZX: |
705 | case AArch64::TBNZW: |
706 | case AArch64::TBNZX: |
707 | case AArch64::CBWPri: |
708 | case AArch64::CBXPri: |
709 | case AArch64::CBWPrr: |
710 | case AArch64::CBXPrr: |
711 | return true; |
712 | default: |
713 | return false; |
714 | } |
715 | } |
716 | |
717 | static inline bool isIndirectBranchOpcode(int Opc) { |
718 | switch (Opc) { |
719 | case AArch64::BR: |
720 | case AArch64::BRAA: |
721 | case AArch64::BRAB: |
722 | case AArch64::BRAAZ: |
723 | case AArch64::BRABZ: |
724 | return true; |
725 | } |
726 | return false; |
727 | } |
728 | |
729 | static inline bool isIndirectCallOpcode(unsigned Opc) { |
730 | switch (Opc) { |
731 | case AArch64::BLR: |
732 | case AArch64::BLRAA: |
733 | case AArch64::BLRAB: |
734 | case AArch64::BLRAAZ: |
735 | case AArch64::BLRABZ: |
736 | return true; |
737 | default: |
738 | return false; |
739 | } |
740 | } |
741 | |
742 | static inline bool isPTrueOpcode(unsigned Opc) { |
743 | switch (Opc) { |
744 | case AArch64::PTRUE_B: |
745 | case AArch64::PTRUE_H: |
746 | case AArch64::PTRUE_S: |
747 | case AArch64::PTRUE_D: |
748 | return true; |
749 | default: |
750 | return false; |
751 | } |
752 | } |
753 | |
754 | /// Return opcode to be used for indirect calls. |
755 | unsigned getBLRCallOpcode(const MachineFunction &MF); |
756 | |
757 | /// Return XPAC opcode to be used for a ptrauth strip using the given key. |
758 | static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) { |
759 | using namespace AArch64PACKey; |
760 | switch (K) { |
761 | case IA: case IB: return AArch64::XPACI; |
762 | case DA: case DB: return AArch64::XPACD; |
763 | } |
764 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
765 | } |
766 | |
767 | /// Return AUT opcode to be used for a ptrauth auth using the given key, or its |
768 | /// AUT*Z variant that doesn't take a discriminator operand, using zero instead. |
769 | static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) { |
770 | using namespace AArch64PACKey; |
771 | switch (K) { |
772 | case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA; |
773 | case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB; |
774 | case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA; |
775 | case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB; |
776 | } |
777 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
778 | } |
779 | |
780 | /// Return PAC opcode to be used for a ptrauth sign using the given key, or its |
781 | /// PAC*Z variant that doesn't take a discriminator operand, using zero instead. |
782 | static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) { |
783 | using namespace AArch64PACKey; |
784 | switch (K) { |
785 | case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA; |
786 | case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB; |
787 | case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA; |
788 | case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB; |
789 | } |
790 | llvm_unreachable("Unhandled AArch64PACKey::ID enum" ); |
791 | } |
792 | |
793 | // struct TSFlags { |
794 | #define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits |
795 | #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits |
796 | #define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits |
797 | #define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits |
798 | #define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits |
799 | // } |
800 | |
801 | namespace AArch64 { |
802 | |
803 | enum ElementSizeType { |
804 | ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7), |
805 | ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0), |
806 | ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1), |
807 | ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2), |
808 | ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3), |
809 | ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4), |
810 | }; |
811 | |
812 | enum DestructiveInstType { |
813 | DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf), |
814 | NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0), |
815 | DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), |
816 | DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2), |
817 | DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3), |
818 | DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4), |
819 | DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5), |
820 | DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6), |
821 | DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7), |
822 | DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8), |
823 | DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9), |
824 | }; |
825 | |
826 | enum FalseLaneType { |
827 | FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3), |
828 | FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1), |
829 | FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2), |
830 | }; |
831 | |
832 | // NOTE: This is a bit field. |
833 | static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1); |
834 | static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); |
835 | |
836 | enum SMEMatrixType { |
837 | SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7), |
838 | SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0), |
839 | SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1), |
840 | SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2), |
841 | SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3), |
842 | SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4), |
843 | SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5), |
844 | SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6), |
845 | }; |
846 | |
847 | #undef TSFLAG_ELEMENT_SIZE_TYPE |
848 | #undef TSFLAG_DESTRUCTIVE_INST_TYPE |
849 | #undef TSFLAG_FALSE_LANE_TYPE |
850 | #undef TSFLAG_INSTR_FLAGS |
851 | #undef TSFLAG_SME_MATRIX_TYPE |
852 | |
853 | int getSVEPseudoMap(uint16_t Opcode); |
854 | int getSVERevInstr(uint16_t Opcode); |
855 | int getSVENonRevInstr(uint16_t Opcode); |
856 | |
857 | int getSMEPseudoMap(uint16_t Opcode); |
858 | } |
859 | |
860 | } // end namespace llvm |
861 | |
862 | #endif |
863 | |