SIInstrInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/SIInstrInfo.h]

1	//===- SIInstrInfo.h - SI Instruction Info Interface ------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Interface definition for SIInstrInfo.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15	#define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16
17	#include "AMDGPUMIRFormatter.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "SIRegisterInfo.h"
20	#include "Utils/AMDGPUBaseInfo.h"
21	#include "llvm/ADT/SetVector.h"
22	#include "llvm/CodeGen/TargetInstrInfo.h"
23	#include "llvm/CodeGen/TargetSchedule.h"
24
25	#define GET_INSTRINFO_HEADER
26	#include "AMDGPUGenInstrInfo.inc"
27
28	namespace llvm {
29
30	class APInt;
31	class GCNSubtarget;
32	class LiveVariables;
33	class MachineDominatorTree;
34	class MachineRegisterInfo;
35	class RegScavenger;
36	class SIMachineFunctionInfo;
37	class TargetRegisterClass;
38	class ScheduleHazardRecognizer;
39
40	constexpr unsigned DefaultMemoryClusterDWordsLimit = `8`;
41
42	/// Mark the MMO of a uniform load if there are no potentially clobbering stores
43	/// on any path from the start of an entry function to this load.
44	static const MachineMemOperand::Flags MONoClobber =
45	MachineMemOperand::MOTargetFlag1;
46
47	/// Mark the MMO of a load as the last use.
48	static const MachineMemOperand::Flags MOLastUse =
49	MachineMemOperand::MOTargetFlag2;
50
51	/// Mark the MMO of cooperative load/store atomics.
52	static const MachineMemOperand::Flags MOCooperative =
53	MachineMemOperand::MOTargetFlag3;
54
55	/// Mark the MMO of accesses to memory locations that are
56	/// never written to by other threads.
57	static const MachineMemOperand::Flags MOThreadPrivate =
58	MachineMemOperand::MOTargetFlag4;
59
60	/// Utility to store machine instructions worklist.
61	struct SIInstrWorklist {
62	SIInstrWorklist() = default;
63
64	void insert(MachineInstr *MI);
65
66	MachineInstr top() const* {
67	const auto *iter = InstrList.begin();
68	return *iter;
69	}
70
71	void erase_top() {
72	const auto *iter = InstrList.begin();
73	InstrList.erase(I: iter);
74	}
75
76	bool empty() const { return InstrList.empty(); }
77
78	void clear() {
79	InstrList.clear();
80	DeferredList.clear();
81	}
82
83	bool isDeferred(MachineInstr *MI);
84
85	SetVector<MachineInstr > &getDeferredList() { return* DeferredList; }
86
87	private:
88	/// InstrList contains the MachineInstrs.
89	SetVector<MachineInstr *> InstrList;
90	/// Deferred instructions are specific MachineInstr
91	/// that will be added by insert method.
92	SetVector<MachineInstr *> DeferredList;
93	};
94
95	class SIInstrInfo final : public AMDGPUGenInstrInfo {
96	struct ThreeAddressUpdates;
97
98	private:
99	const SIRegisterInfo RI;
100	const GCNSubtarget &ST;
101	TargetSchedModel SchedModel;
102	mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
103
104	// The inverse predicate should have the negative value.
105	enum BranchPredicate {
106	INVALID_BR = `0`,
107	SCC_TRUE = `1`,
108	SCC_FALSE = -`1`,
109	VCCNZ = `2`,
110	VCCZ = -`2`,
111	EXECNZ = -`3`,
112	EXECZ = `3`
113	};
114
115	using SetVectorType = SmallSetVector<MachineInstr *, `32`>;
116
117	static unsigned getBranchOpcode(BranchPredicate Cond);
118	static BranchPredicate getBranchPredicate(unsigned Opcode);
119
120	public:
121	unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
122	MachineRegisterInfo &MRI,
123	const MachineOperand &SuperReg,
124	const TargetRegisterClass *SuperRC,
125	unsigned SubIdx,
126	const TargetRegisterClass SubRC) const*;
127	MachineOperand buildExtractSubRegOrImm(
128	MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
129	const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
130	unsigned SubIdx, const TargetRegisterClass SubRC) const*;
131
132	private:
133	bool optimizeSCC(MachineInstr SCCValid, MachineInstr SCCRedefine,
134	bool NeedInversion) const;
135
136	bool invertSCCUse(MachineInstr SCCDef) const*;
137
138	void swapOperands(MachineInstr &Inst) const;
139
140	std::pair<bool, MachineBasicBlock *>
141	moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
142	MachineDominatorTree MDT = nullptr) const*;
143
144	void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
145	MachineDominatorTree MDT = nullptr) const*;
146
147	void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
148
149	void lowerScalarAbsDiff(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
150
151	void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
152
153	void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
154	unsigned Opcode) const;
155
156	void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
157	unsigned Opcode) const;
158
159	void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
160	unsigned Opcode, bool Swap = false) const;
161
162	void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
163	unsigned Opcode,
164	MachineDominatorTree MDT = nullptr) const*;
165
166	void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
167	MachineDominatorTree MDT) const*;
168
169	void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
170	MachineDominatorTree MDT) const*;
171
172	void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
173	MachineDominatorTree MDT = nullptr) const*;
174
175	void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
176	MachineInstr &Inst) const;
177	void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
178	void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
179	unsigned Opcode,
180	MachineDominatorTree MDT = nullptr) const*;
181	void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
182	MachineInstr &Inst) const;
183
184	void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
185	SIInstrWorklist &Worklist) const;
186
187	void addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
188	MachineInstr &SCCDefInst,
189	SIInstrWorklist &Worklist,
190	Register NewCond = Register ()) const;
191	void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
192	SIInstrWorklist &Worklist) const;
193
194	const TargetRegisterClass *
195	getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
196
197	bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
198	const MachineInstr &MIb) const;
199
200	Register findUsedSGPR(const MachineInstr &MI, int OpIndices[`3`]) const;
201
202	bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
203	StringRef &ErrInfo) const;
204
205	bool resultDependsOnExec(const MachineInstr &MI) const;
206
207	MachineInstr *convertToThreeAddressImpl(MachineInstr &MI,
208	ThreeAddressUpdates &Updates) const;
209
210	protected:
211	/// If the specific machine instruction is a instruction that moves/copies
212	/// value from one register to another register return destination and source
213	/// registers as machine operands.
214	std::optional<DestSourcePair>
215	isCopyInstrImpl(const MachineInstr &MI) const override;
216
217	bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
218	AMDGPU::OpName Src0OpName, MachineOperand &Src1,
219	AMDGPU::OpName Src1OpName) const;
220	bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
221	unsigned toIdx) const;
222	MachineInstr commuteInstructionImpl(MachineInstr &MI, bool* NewMI,
223	unsigned OpIdx0,
224	unsigned OpIdx1) const override;
225
226	public:
227	enum TargetOperandFlags {
228	MO_MASK = `0xf`,
229
230	MO_NONE = `0`,
231	// MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
232	MO_GOTPCREL = `1`,
233	// MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
234	MO_GOTPCREL32 = `2`,
235	MO_GOTPCREL32_LO = `2`,
236	// MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
237	MO_GOTPCREL32_HI = `3`,
238	// MO_GOTPCREL64 -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
239	MO_GOTPCREL64 = `4`,
240	// MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
241	MO_REL32 = `5`,
242	MO_REL32_LO = `5`,
243	// MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
244	MO_REL32_HI = `6`,
245	MO_REL64 = `7`,
246
247	MO_FAR_BRANCH_OFFSET = `8`,
248
249	MO_ABS32_LO = `9`,
250	MO_ABS32_HI = `10`,
251	MO_ABS64 = `11`,
252	};
253
254	explicit SIInstrInfo(const GCNSubtarget &ST);
255
256	const SIRegisterInfo &getRegisterInfo() const {
257	return RI;
258	}
259
260	const GCNSubtarget &getSubtarget() const {
261	return ST;
262	}
263
264	bool isReMaterializableImpl(const MachineInstr &MI) const override;
265
266	bool isIgnorableUse(const MachineOperand &MO) const override;
267
268	bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
269	MachineCycleInfo CI) const* override;
270
271	bool areLoadsFromSameBasePtr(SDNode Load0, SDNode Load1, int64_t &Offset0,
272	int64_t &Offset1) const override;
273
274	bool isGlobalMemoryObject(const MachineInstr MI) const* override;
275
276	bool getMemOperandsWithOffsetWidth(
277	const MachineInstr &LdSt,
278	SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
279	bool &OffsetIsScalable, LocationSize &Width,
280	const TargetRegisterInfo TRI) const* final;
281
282	bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
283	int64_t Offset1, bool OffsetIsScalable1,
284	ArrayRef<const MachineOperand *> BaseOps2,
285	int64_t Offset2, bool OffsetIsScalable2,
286	unsigned ClusterSize,
287	unsigned NumBytes) const override;
288
289	bool shouldScheduleLoadsNear(SDNode Load0, SDNode Load1, int64_t Offset0,
290	int64_t Offset1, unsigned NumLoads) const override;
291
292	void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
293	const DebugLoc &DL, Register DestReg, Register SrcReg,
294	bool KillSrc, bool RenamableDest = false,
295	bool RenamableSrc = false) const override;
296
297	const TargetRegisterClass *getPreferredSelectRegClass(
298	unsigned Size) const;
299
300	Register insertNE(MachineBasicBlock *MBB,
301	MachineBasicBlock::iterator I, const DebugLoc &DL,
302	Register SrcReg, int Value) const;
303
304	Register insertEQ(MachineBasicBlock *MBB,
305	MachineBasicBlock::iterator I, const DebugLoc &DL,
306	Register SrcReg, int Value) const;
307
308	bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
309	int64_t &ImmVal) const override;
310
311	std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
312
313	unsigned getVectorRegSpillSaveOpcode(Register Reg,
314	const TargetRegisterClass *RC,
315	unsigned Size,
316	const SIMachineFunctionInfo &MFI) const;
317	unsigned
318	getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
319	unsigned Size,
320	const SIMachineFunctionInfo &MFI) const;
321
322	void storeRegToStackSlot(
323	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
324	bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
325	MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
326
327	void loadRegFromStackSlot(
328	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
329	int FrameIndex, const TargetRegisterClass *RC, Register VReg,
330	unsigned SubReg = `0`,
331	MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
332
333	bool expandPostRAPseudo(MachineInstr &MI) const override;
334
335	void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
336	Register DestReg, unsigned SubIdx,
337	const MachineInstr &Orig) const override;
338
339	// Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
340	// instructions. Returns a pair of generated instructions.
341	// Can split either post-RA with physical registers or pre-RA with
342	// virtual registers. In latter case IR needs to be in SSA form and
343	// and a REG_SEQUENCE is produced to define original register.
344	std::pair<MachineInstr, MachineInstr>
345	expandMovDPP64(MachineInstr &MI) const;
346
347	// Returns an opcode that can be used to move a value to a \p DstRC
348	// register. If there is no hardware instruction that can store to \p
349	// DstRC, then AMDGPU::COPY is returned.
350	unsigned getMovOpcode(const TargetRegisterClass DstRC) const*;
351
352	const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
353	unsigned EltSize,
354	bool IsSGPR) const;
355
356	const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
357	bool IsIndirectSrc) const;
358	LLVM_READONLY
359	int commuteOpcode(unsigned Opc) const;
360
361	LLVM_READONLY
362	inline int commuteOpcode(const MachineInstr &MI) const {
363	return commuteOpcode(Opc: MI.getOpcode());
364	}
365
366	bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
367	unsigned &SrcOpIdx1) const override;
368
369	bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
370	unsigned &SrcOpIdx1) const;
371
372	bool isBranchOffsetInRange(unsigned BranchOpc,
373	int64_t BrOffset) const override;
374
375	MachineBasicBlock getBranchDestBlock(const* MachineInstr &MI) const override;
376
377	/// Return whether the block terminate with divergent branch.
378	/// Note this only work before lowering the pseudo control flow instructions.
379	bool hasDivergentBranch(const MachineBasicBlock MBB) const*;
380
381	void insertIndirectBranch(MachineBasicBlock &MBB,
382	MachineBasicBlock &NewDestBB,
383	MachineBasicBlock &RestoreBB, const DebugLoc &DL,
384	int64_t BrOffset, RegScavenger RS) const* override;
385
386	bool analyzeBranchImpl(MachineBasicBlock &MBB,
387	MachineBasicBlock::iterator I,
388	MachineBasicBlock *&TBB,
389	MachineBasicBlock *&FBB,
390	SmallVectorImpl<MachineOperand> &Cond,
391	bool AllowModify) const;
392
393	bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
394	MachineBasicBlock *&FBB,
395	SmallVectorImpl<MachineOperand> &Cond,
396	bool AllowModify = false) const override;
397
398	unsigned removeBranch(MachineBasicBlock &MBB,
399	int BytesRemoved = nullptr) const* override;
400
401	unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
402	MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
403	const DebugLoc &DL,
404	int BytesAdded = nullptr) const* override;
405
406	bool reverseBranchCondition(
407	SmallVectorImpl<MachineOperand> &Cond) const override;
408
409	bool canInsertSelect(const MachineBasicBlock &MBB,
410	ArrayRef<MachineOperand> Cond, Register DstReg,
411	Register TrueReg, Register FalseReg, int &CondCycles,
412	int &TrueCycles, int &FalseCycles) const override;
413
414	void insertSelect(MachineBasicBlock &MBB,
415	MachineBasicBlock::iterator I, const DebugLoc &DL,
416	Register DstReg, ArrayRef<MachineOperand> Cond,
417	Register TrueReg, Register FalseReg) const override;
418
419	void insertVectorSelect(MachineBasicBlock &MBB,
420	MachineBasicBlock::iterator I, const DebugLoc &DL,
421	Register DstReg, ArrayRef<MachineOperand> Cond,
422	Register TrueReg, Register FalseReg) const;
423
424	bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
425	Register &SrcReg2, int64_t &CmpMask,
426	int64_t &CmpValue) const override;
427
428	bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
429	Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
430	const MachineRegisterInfo MRI) const* override;
431
432	bool
433	areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
434	const MachineInstr &MIb) const override;
435
436	static bool isFoldableCopy(const MachineInstr &MI);
437	static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
438
439	void removeModOperands(MachineInstr &MI) const;
440
441	void mutateAndCleanupImplicit(MachineInstr &MI,
442	const MCInstrDesc &NewDesc) const;
443
444	/// Return the extracted immediate value in a subregister use from a constant
445	/// materialized in a super register.
446	///
447	/// e.g. %imm = S_MOV_B64 K[0:63]
448	/// USE %imm.sub1
449	/// This will return K[32:63]
450	static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
451	unsigned SubRegIndex);
452
453	bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
454	MachineRegisterInfo MRI) const* final;
455
456	unsigned getMachineCSELookAheadLimit() const override { return `500`; }
457
458	MachineInstr convertToThreeAddress(MachineInstr &MI, LiveVariables LV,
459	LiveIntervals LIS) const* override;
460
461	bool isSchedulingBoundary(const MachineInstr &MI,
462	const MachineBasicBlock *MBB,
463	const MachineFunction &MF) const override;
464
465	static bool isSALU(const MachineInstr &MI) {
466	return MI.getDesc().TSFlags & SIInstrFlags::SALU;
467	}
468
469	bool isSALU(uint32_t Opcode) const {
470	return get(Opcode).TSFlags & SIInstrFlags::SALU;
471	}
472
473	static bool isVALU(const MachineInstr &MI) {
474	return MI.getDesc().TSFlags & SIInstrFlags::VALU;
475	}
476
477	bool isVALU(uint32_t Opcode) const {
478	return get(Opcode).TSFlags & SIInstrFlags::VALU;
479	}
480
481	static bool isImage(const MachineInstr &MI) {
482	return isMIMG(MI) \|\| isVSAMPLE(MI) \|\| isVIMAGE(MI);
483	}
484
485	bool isImage(uint32_t Opcode) const {
486	return isMIMG(Opcode) \|\| isVSAMPLE(Opcode) \|\| isVIMAGE(Opcode);
487	}
488
489	static bool isVMEM(const MachineInstr &MI) {
490	return isMUBUF(MI) \|\| isMTBUF(MI) \|\| isImage(MI) \|\| isFLAT(MI);
491	}
492
493	bool isVMEM(uint32_t Opcode) const {
494	return isMUBUF(Opcode) \|\| isMTBUF(Opcode) \|\| isImage(Opcode) \|\|
495	isFLAT(Opcode);
496	}
497
498	static bool isSOP1(const MachineInstr &MI) {
499	return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
500	}
501
502	bool isSOP1(uint32_t Opcode) const {
503	return get(Opcode).TSFlags & SIInstrFlags::SOP1;
504	}
505
506	static bool isSOP2(const MachineInstr &MI) {
507	return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
508	}
509
510	bool isSOP2(uint32_t Opcode) const {
511	return get(Opcode).TSFlags & SIInstrFlags::SOP2;
512	}
513
514	static bool isSOPC(const MachineInstr &MI) {
515	return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
516	}
517
518	bool isSOPC(uint32_t Opcode) const {
519	return get(Opcode).TSFlags & SIInstrFlags::SOPC;
520	}
521
522	static bool isSOPK(const MachineInstr &MI) {
523	return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
524	}
525
526	bool isSOPK(uint32_t Opcode) const {
527	return get(Opcode).TSFlags & SIInstrFlags::SOPK;
528	}
529
530	static bool isSOPP(const MachineInstr &MI) {
531	return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
532	}
533
534	bool isSOPP(uint32_t Opcode) const {
535	return get(Opcode).TSFlags & SIInstrFlags::SOPP;
536	}
537
538	static bool isPacked(const MachineInstr &MI) {
539	return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
540	}
541
542	bool isPacked(uint32_t Opcode) const {
543	return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
544	}
545
546	static bool isVOP1(const MachineInstr &MI) {
547	return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
548	}
549
550	bool isVOP1(uint32_t Opcode) const {
551	return get(Opcode).TSFlags & SIInstrFlags::VOP1;
552	}
553
554	static bool isVOP2(const MachineInstr &MI) {
555	return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
556	}
557
558	bool isVOP2(uint32_t Opcode) const {
559	return get(Opcode).TSFlags & SIInstrFlags::VOP2;
560	}
561
562	static bool isVOP3(const MCInstrDesc &Desc) {
563	return Desc.TSFlags & SIInstrFlags::VOP3;
564	}
565
566	static bool isVOP3(const MachineInstr &MI) { return isVOP3(Desc: MI.getDesc()); }
567
568	bool isVOP3(uint32_t Opcode) const { return isVOP3(Desc: get(Opcode)); }
569
570	static bool isSDWA(const MachineInstr &MI) {
571	return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
572	}
573
574	bool isSDWA(uint32_t Opcode) const {
575	return get(Opcode).TSFlags & SIInstrFlags::SDWA;
576	}
577
578	static bool isVOPC(const MachineInstr &MI) {
579	return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
580	}
581
582	bool isVOPC(uint32_t Opcode) const {
583	return get(Opcode).TSFlags & SIInstrFlags::VOPC;
584	}
585
586	static bool isMUBUF(const MachineInstr &MI) {
587	return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
588	}
589
590	bool isMUBUF(uint32_t Opcode) const {
591	return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
592	}
593
594	static bool isMTBUF(const MachineInstr &MI) {
595	return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
596	}
597
598	bool isMTBUF(uint32_t Opcode) const {
599	return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
600	}
601
602	static bool isBUF(const MachineInstr &MI) {
603	return isMUBUF(MI) \|\| isMTBUF(MI);
604	}
605
606	static bool isSMRD(const MachineInstr &MI) {
607	return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
608	}
609
610	bool isSMRD(uint32_t Opcode) const {
611	return get(Opcode).TSFlags & SIInstrFlags::SMRD;
612	}
613
614	bool isBufferSMRD(const MachineInstr &MI) const;
615
616	static bool isDS(const MachineInstr &MI) {
617	return MI.getDesc().TSFlags & SIInstrFlags::DS;
618	}
619
620	bool isDS(uint32_t Opcode) const {
621	return get(Opcode).TSFlags & SIInstrFlags::DS;
622	}
623
624	static bool isLDSDMA(const MachineInstr &MI) {
625	return (isVALU(MI) && (isMUBUF(MI) \|\| isFLAT(MI))) \|\|
626	(MI.getDesc().TSFlags & SIInstrFlags::TENSOR_CNT);
627	}
628
629	bool isLDSDMA(uint32_t Opcode) {
630	return (isVALU(Opcode) && (isMUBUF(Opcode) \|\| isFLAT(Opcode))) \|\|
631	(get(Opcode).TSFlags & SIInstrFlags::TENSOR_CNT);
632	}
633
634	static bool isGWS(const MachineInstr &MI) {
635	return MI.getDesc().TSFlags & SIInstrFlags::GWS;
636	}
637
638	bool isGWS(uint32_t Opcode) const {
639	return get(Opcode).TSFlags & SIInstrFlags::GWS;
640	}
641
642	bool isAlwaysGDS(uint32_t Opcode) const;
643
644	static bool isMIMG(const MachineInstr &MI) {
645	return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
646	}
647
648	bool isMIMG(uint32_t Opcode) const {
649	return get(Opcode).TSFlags & SIInstrFlags::MIMG;
650	}
651
652	static bool isVIMAGE(const MachineInstr &MI) {
653	return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
654	}
655
656	bool isVIMAGE(uint32_t Opcode) const {
657	return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
658	}
659
660	static bool isVSAMPLE(const MachineInstr &MI) {
661	return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
662	}
663
664	bool isVSAMPLE(uint32_t Opcode) const {
665	return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
666	}
667
668	static bool isGather4(const MachineInstr &MI) {
669	return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
670	}
671
672	bool isGather4(uint32_t Opcode) const {
673	return get(Opcode).TSFlags & SIInstrFlags::Gather4;
674	}
675
676	static bool isFLAT(const MachineInstr &MI) {
677	return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
678	}
679
680	// Is a FLAT encoded instruction which accesses a specific segment,
681	// i.e. global_ or scratch_.
682	static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
683	auto Flags = MI.getDesc().TSFlags;
684	return Flags & (SIInstrFlags::FlatGlobal \| SIInstrFlags::FlatScratch);
685	}
686
687	bool isSegmentSpecificFLAT(uint32_t Opcode) const {
688	auto Flags = get(Opcode).TSFlags;
689	return Flags & (SIInstrFlags::FlatGlobal \| SIInstrFlags::FlatScratch);
690	}
691
692	static bool isFLATGlobal(const MachineInstr &MI) {
693	return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
694	}
695
696	bool isFLATGlobal(uint32_t Opcode) const {
697	return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
698	}
699
700	static bool isFLATScratch(const MachineInstr &MI) {
701	return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
702	}
703
704	bool isFLATScratch(uint32_t Opcode) const {
705	return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
706	}
707
708	// Any FLAT encoded instruction, including global_ and scratch_.
709	bool isFLAT(uint32_t Opcode) const {
710	return get(Opcode).TSFlags & SIInstrFlags::FLAT;
711	}
712
713	/// \returns true for SCRATCH_ instructions, or FLAT/BUF instructions unless
714	/// the MMOs do not include scratch.
715	/// Conservatively correct; will return true if \p MI cannot be proven
716	/// to not hit scratch.
717	bool mayAccessScratch(const MachineInstr &MI) const;
718
719	/// \returns true for FLAT instructions that can access VMEM.
720	bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
721
722	/// \returns true for FLAT instructions that can access LDS.
723	bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
724
725	static bool isBlockLoadStore(uint32_t Opcode) {
726	switch (Opcode) {
727	case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
728	case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
729	case AMDGPU::SCRATCH_STORE_BLOCK_SADDR:
730	case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR:
731	case AMDGPU::SCRATCH_STORE_BLOCK_SVS:
732	case AMDGPU::SCRATCH_LOAD_BLOCK_SVS:
733	return true;
734	default:
735	return false;
736	}
737	}
738
739	static bool setsSCCIfResultIsNonZero(const MachineInstr &MI) {
740	switch (MI.getOpcode()) {
741	case AMDGPU::S_ABSDIFF_I32:
742	case AMDGPU::S_ABS_I32:
743	case AMDGPU::S_AND_B32:
744	case AMDGPU::S_AND_B64:
745	case AMDGPU::S_ANDN2_B32:
746	case AMDGPU::S_ANDN2_B64:
747	case AMDGPU::S_ASHR_I32:
748	case AMDGPU::S_ASHR_I64:
749	case AMDGPU::S_BCNT0_I32_B32:
750	case AMDGPU::S_BCNT0_I32_B64:
751	case AMDGPU::S_BCNT1_I32_B32:
752	case AMDGPU::S_BCNT1_I32_B64:
753	case AMDGPU::S_BFE_I32:
754	case AMDGPU::S_BFE_I64:
755	case AMDGPU::S_BFE_U32:
756	case AMDGPU::S_BFE_U64:
757	case AMDGPU::S_LSHL_B32:
758	case AMDGPU::S_LSHL_B64:
759	case AMDGPU::S_LSHR_B32:
760	case AMDGPU::S_LSHR_B64:
761	case AMDGPU::S_NAND_B32:
762	case AMDGPU::S_NAND_B64:
763	case AMDGPU::S_NOR_B32:
764	case AMDGPU::S_NOR_B64:
765	case AMDGPU::S_NOT_B32:
766	case AMDGPU::S_NOT_B64:
767	case AMDGPU::S_OR_B32:
768	case AMDGPU::S_OR_B64:
769	case AMDGPU::S_ORN2_B32:
770	case AMDGPU::S_ORN2_B64:
771	case AMDGPU::S_QUADMASK_B32:
772	case AMDGPU::S_QUADMASK_B64:
773	case AMDGPU::S_WQM_B32:
774	case AMDGPU::S_WQM_B64:
775	case AMDGPU::S_XNOR_B32:
776	case AMDGPU::S_XNOR_B64:
777	case AMDGPU::S_XOR_B32:
778	case AMDGPU::S_XOR_B64:
779	return true;
780	default:
781	return false;
782	}
783	}
784
785	static bool isEXP(const MachineInstr &MI) {
786	return MI.getDesc().TSFlags & SIInstrFlags::EXP;
787	}
788
789	static bool isDualSourceBlendEXP(const MachineInstr &MI) {
790	if (!isEXP(MI))
791	return false;
792	unsigned Target = MI.getOperand(i: `0`).getImm();
793	return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 \|\|
794	Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
795	}
796
797	bool isEXP(uint32_t Opcode) const {
798	return get(Opcode).TSFlags & SIInstrFlags::EXP;
799	}
800
801	static bool isAtomicNoRet(const MachineInstr &MI) {
802	return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
803	}
804
805	bool isAtomicNoRet(uint32_t Opcode) const {
806	return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
807	}
808
809	static bool isAtomicRet(const MachineInstr &MI) {
810	return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
811	}
812
813	bool isAtomicRet(uint32_t Opcode) const {
814	return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
815	}
816
817	static bool isAtomic(const MachineInstr &MI) {
818	return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet \|
819	SIInstrFlags::IsAtomicNoRet);
820	}
821
822	bool isAtomic(uint32_t Opcode) const {
823	return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet \|
824	SIInstrFlags::IsAtomicNoRet);
825	}
826
827	static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
828	unsigned Opc = MI.getOpcode();
829	// Exclude instructions that read FROM LDS (not write to it)
830	return isLDSDMA(MI) && Opc != AMDGPU::BUFFER_STORE_LDS_DWORD &&
831	Opc != AMDGPU::TENSOR_STORE_FROM_LDS_d2 &&
832	Opc != AMDGPU::TENSOR_STORE_FROM_LDS_d4;
833	}
834
835	static bool isSBarrierSCCWrite(unsigned Opcode) {
836	return Opcode == AMDGPU::S_BARRIER_LEAVE \|\|
837	Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM \|\|
838	Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0;
839	}
840
841	static bool isCBranchVCCZRead(const MachineInstr &MI) {
842	unsigned Opc = MI.getOpcode();
843	return (Opc == AMDGPU::S_CBRANCH_VCCNZ \|\| Opc == AMDGPU::S_CBRANCH_VCCZ) &&
844	!MI.getOperand(i: `1`).isUndef();
845	}
846
847	static bool isWQM(const MachineInstr &MI) {
848	return MI.getDesc().TSFlags & SIInstrFlags::WQM;
849	}
850
851	bool isWQM(uint32_t Opcode) const {
852	return get(Opcode).TSFlags & SIInstrFlags::WQM;
853	}
854
855	static bool isDisableWQM(const MachineInstr &MI) {
856	return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
857	}
858
859	bool isDisableWQM(uint32_t Opcode) const {
860	return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
861	}
862
863	// SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
864	// SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
865	// therefore we need an explicit check for them since just checking if the
866	// Spill bit is set and what instruction type it came from misclassifies
867	// them.
868	static bool isVGPRSpill(const MachineInstr &MI) {
869	return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
870	MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
871	(isSpill(MI) && isVALU(MI));
872	}
873
874	bool isVGPRSpill(uint32_t Opcode) const {
875	return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
876	Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
877	(isSpill(Opcode) && isVALU(Opcode));
878	}
879
880	static bool isSGPRSpill(const MachineInstr &MI) {
881	return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR \|\|
882	MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR \|\|
883	(isSpill(MI) && isSALU(MI));
884	}
885
886	bool isSGPRSpill(uint32_t Opcode) const {
887	return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR \|\|
888	Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR \|\|
889	(isSpill(Opcode) && isSALU(Opcode));
890	}
891
892	bool isSpill(uint32_t Opcode) const {
893	return get(Opcode).TSFlags & SIInstrFlags::Spill;
894	}
895
896	static bool isSpill(const MCInstrDesc &Desc) {
897	return Desc.TSFlags & SIInstrFlags::Spill;
898	}
899
900	static bool isSpill(const MachineInstr &MI) { return isSpill(Desc: MI.getDesc()); }
901
902	static bool isWWMRegSpillOpcode(uint32_t Opcode) {
903	return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE \|\|
904	Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE \|\|
905	Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE \|\|
906	Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
907	}
908
909	static bool isChainCallOpcode(uint64_t Opcode) {
910	return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 \|\|
911	Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
912	}
913
914	static bool isDPP(const MachineInstr &MI) {
915	return MI.getDesc().TSFlags & SIInstrFlags::DPP;
916	}
917
918	bool isDPP(uint32_t Opcode) const {
919	return get(Opcode).TSFlags & SIInstrFlags::DPP;
920	}
921
922	static bool isTRANS(const MachineInstr &MI) {
923	return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
924	}
925
926	bool isTRANS(uint32_t Opcode) const {
927	return get(Opcode).TSFlags & SIInstrFlags::TRANS;
928	}
929
930	static bool isVOP3P(const MachineInstr &MI) {
931	return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
932	}
933
934	bool isVOP3P(uint32_t Opcode) const {
935	return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
936	}
937
938	static bool isVINTRP(const MachineInstr &MI) {
939	return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
940	}
941
942	bool isVINTRP(uint32_t Opcode) const {
943	return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
944	}
945
946	static bool isMAI(const MCInstrDesc &Desc) {
947	return Desc.TSFlags & SIInstrFlags::IsMAI;
948	}
949
950	static bool isMAI(const MachineInstr &MI) { return isMAI(Desc: MI.getDesc()); }
951
952	bool isMAI(uint32_t Opcode) const { return isMAI(Desc: get(Opcode)); }
953
954	static bool isMFMA(const MachineInstr &MI) {
955	return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
956	MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
957	}
958
959	bool isMFMA(uint32_t Opcode) const {
960	return isMAI(Opcode) && Opcode != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
961	Opcode != AMDGPU::V_ACCVGPR_READ_B32_e64;
962	}
963
964	static bool isDOT(const MachineInstr &MI) {
965	return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
966	}
967
968	static bool isWMMA(const MachineInstr &MI) {
969	return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
970	}
971
972	bool isWMMA(uint32_t Opcode) const {
973	return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
974	}
975
976	static bool isMFMAorWMMA(const MachineInstr &MI) {
977	return isMFMA(MI) \|\| isWMMA(MI) \|\| isSWMMAC(MI);
978	}
979
980	bool isMFMAorWMMA(uint32_t Opcode) const {
981	return isMFMA(Opcode) \|\| isWMMA(Opcode) \|\| isSWMMAC(Opcode);
982	}
983
984	static bool isSWMMAC(const MachineInstr &MI) {
985	return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
986	}
987
988	bool isSWMMAC(uint32_t Opcode) const {
989	return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
990	}
991
992	bool isDOT(uint32_t Opcode) const {
993	return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
994	}
995
996	bool isXDLWMMA(const MachineInstr &MI) const;
997
998	bool isXDL(const MachineInstr &MI) const;
999
1000	static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opc: Opcode); }
1001
1002	static bool isLDSDIR(const MachineInstr &MI) {
1003	return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
1004	}
1005
1006	bool isLDSDIR(uint32_t Opcode) const {
1007	return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
1008	}
1009
1010	static bool isVINTERP(const MachineInstr &MI) {
1011	return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
1012	}
1013
1014	bool isVINTERP(uint32_t Opcode) const {
1015	return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
1016	}
1017
1018	static bool isScalarUnit(const MachineInstr &MI) {
1019	return MI.getDesc().TSFlags & (SIInstrFlags::SALU \| SIInstrFlags::SMRD);
1020	}
1021
1022	static bool usesVM_CNT(const MachineInstr &MI) {
1023	return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
1024	}
1025
1026	static bool usesLGKM_CNT(const MachineInstr &MI) {
1027	return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
1028	}
1029
1030	static bool usesASYNC_CNT(const MachineInstr &MI) {
1031	return MI.getDesc().TSFlags & SIInstrFlags::ASYNC_CNT;
1032	}
1033
1034	bool usesASYNC_CNT(uint32_t Opcode) const {
1035	return get(Opcode).TSFlags & SIInstrFlags::ASYNC_CNT;
1036	}
1037
1038	// Most sopk treat the immediate as a signed 16-bit, however some
1039	// use it as unsigned.
1040	static bool sopkIsZext(unsigned Opcode) {
1041	return Opcode == AMDGPU::S_CMPK_EQ_U32 \|\| Opcode == AMDGPU::S_CMPK_LG_U32 \|\|
1042	Opcode == AMDGPU::S_CMPK_GT_U32 \|\| Opcode == AMDGPU::S_CMPK_GE_U32 \|\|
1043	Opcode == AMDGPU::S_CMPK_LT_U32 \|\| Opcode == AMDGPU::S_CMPK_LE_U32 \|\|
1044	Opcode == AMDGPU::S_GETREG_B32 \|\|
1045	Opcode == AMDGPU::S_GETREG_B32_const;
1046	}
1047
1048	/// \returns true if this is an s_store_dword instruction. This is more*
1049	/// specific than isSMEM && mayStore.
1050	static bool isScalarStore(const MachineInstr &MI) {
1051	return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
1052	}
1053
1054	bool isScalarStore(uint32_t Opcode) const {
1055	return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
1056	}
1057
1058	static bool isFixedSize(const MachineInstr &MI) {
1059	return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
1060	}
1061
1062	bool isFixedSize(uint32_t Opcode) const {
1063	return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
1064	}
1065
1066	static bool hasFPClamp(const MachineInstr &MI) {
1067	return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
1068	}
1069
1070	bool hasFPClamp(uint32_t Opcode) const {
1071	return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
1072	}
1073
1074	static bool hasIntClamp(const MachineInstr &MI) {
1075	return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
1076	}
1077
1078	uint64_t getClampMask(const MachineInstr &MI) const {
1079	const uint64_t ClampFlags = SIInstrFlags::FPClamp \|
1080	SIInstrFlags::IntClamp \|
1081	SIInstrFlags::ClampLo \|
1082	SIInstrFlags::ClampHi;
1083	return MI.getDesc().TSFlags & ClampFlags;
1084	}
1085
1086	static bool usesFPDPRounding(const MachineInstr &MI) {
1087	return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
1088	}
1089
1090	bool usesFPDPRounding(uint32_t Opcode) const {
1091	return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
1092	}
1093
1094	static bool isFPAtomic(const MachineInstr &MI) {
1095	return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
1096	}
1097
1098	bool isFPAtomic(uint32_t Opcode) const {
1099	return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
1100	}
1101
1102	static bool isNeverUniform(const MachineInstr &MI) {
1103	return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
1104	}
1105
1106	// Check to see if opcode is for a barrier start. Pre gfx12 this is just the
1107	// S_BARRIER, but after support for S_BARRIER_SIGNAL / S_BARRIER_WAIT we want*
1108	// to check for the barrier start (S_BARRIER_SIGNAL)*
1109	bool isBarrierStart(unsigned Opcode) const {
1110	return Opcode == AMDGPU::S_BARRIER \|\|
1111	Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 \|\|
1112	Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 \|\|
1113	Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM \|\|
1114	Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
1115	}
1116
1117	bool isBarrier(unsigned Opcode) const {
1118	return isBarrierStart(Opcode) \|\| Opcode == AMDGPU::S_BARRIER_WAIT \|\|
1119	Opcode == AMDGPU::S_BARRIER_INIT_M0 \|\|
1120	Opcode == AMDGPU::S_BARRIER_INIT_IMM \|\|
1121	Opcode == AMDGPU::S_BARRIER_JOIN_IMM \|\|
1122	Opcode == AMDGPU::S_BARRIER_LEAVE \|\| Opcode == AMDGPU::DS_GWS_INIT \|\|
1123	Opcode == AMDGPU::DS_GWS_BARRIER;
1124	}
1125
1126	static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
1127	return Opc == AMDGPU::GLOBAL_INV \|\| Opc == AMDGPU::GLOBAL_WB \|\|
1128	Opc == AMDGPU::GLOBAL_WBINV;
1129	}
1130
1131	static bool isF16PseudoScalarTrans(unsigned Opcode) {
1132	return Opcode == AMDGPU::V_S_EXP_F16_e64 \|\|
1133	Opcode == AMDGPU::V_S_LOG_F16_e64 \|\|
1134	Opcode == AMDGPU::V_S_RCP_F16_e64 \|\|
1135	Opcode == AMDGPU::V_S_RSQ_F16_e64 \|\|
1136	Opcode == AMDGPU::V_S_SQRT_F16_e64;
1137	}
1138
1139	static bool doesNotReadTiedSource(const MachineInstr &MI) {
1140	return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
1141	}
1142
1143	bool doesNotReadTiedSource(uint32_t Opcode) const {
1144	return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
1145	}
1146
1147	bool isIGLP(unsigned Opcode) const {
1148	return Opcode == AMDGPU::SCHED_BARRIER \|\|
1149	Opcode == AMDGPU::SCHED_GROUP_BARRIER \|\| Opcode == AMDGPU::IGLP_OPT;
1150	}
1151
1152	bool isIGLP(const MachineInstr &MI) const { return isIGLP(Opcode: MI.getOpcode()); }
1153
1154	// Return true if the instruction is mutually exclusive with all non-IGLP DAG
1155	// mutations, requiring all other mutations to be disabled.
1156	bool isIGLPMutationOnly(unsigned Opcode) const {
1157	return Opcode == AMDGPU::SCHED_GROUP_BARRIER \|\| Opcode == AMDGPU::IGLP_OPT;
1158	}
1159
1160	static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
1161	switch (Opcode) {
1162	case AMDGPU::S_WAITCNT_soft:
1163	return AMDGPU::S_WAITCNT;
1164	case AMDGPU::S_WAITCNT_VSCNT_soft:
1165	return AMDGPU::S_WAITCNT_VSCNT;
1166	case AMDGPU::S_WAIT_LOADCNT_soft:
1167	return AMDGPU::S_WAIT_LOADCNT;
1168	case AMDGPU::S_WAIT_STORECNT_soft:
1169	return AMDGPU::S_WAIT_STORECNT;
1170	case AMDGPU::S_WAIT_SAMPLECNT_soft:
1171	return AMDGPU::S_WAIT_SAMPLECNT;
1172	case AMDGPU::S_WAIT_BVHCNT_soft:
1173	return AMDGPU::S_WAIT_BVHCNT;
1174	case AMDGPU::S_WAIT_DSCNT_soft:
1175	return AMDGPU::S_WAIT_DSCNT;
1176	case AMDGPU::S_WAIT_KMCNT_soft:
1177	return AMDGPU::S_WAIT_KMCNT;
1178	case AMDGPU::S_WAIT_XCNT_soft:
1179	return AMDGPU::S_WAIT_XCNT;
1180	default:
1181	return Opcode;
1182	}
1183	}
1184
1185	static bool isWaitcnt(unsigned Opcode) {
1186	switch (getNonSoftWaitcntOpcode(Opcode)) {
1187	case AMDGPU::S_WAITCNT:
1188	case AMDGPU::S_WAITCNT_VSCNT:
1189	case AMDGPU::S_WAITCNT_VMCNT:
1190	case AMDGPU::S_WAITCNT_EXPCNT:
1191	case AMDGPU::S_WAITCNT_LGKMCNT:
1192	case AMDGPU::S_WAIT_LOADCNT:
1193	case AMDGPU::S_WAIT_LOADCNT_DSCNT:
1194	case AMDGPU::S_WAIT_STORECNT:
1195	case AMDGPU::S_WAIT_STORECNT_DSCNT:
1196	case AMDGPU::S_WAIT_SAMPLECNT:
1197	case AMDGPU::S_WAIT_BVHCNT:
1198	case AMDGPU::S_WAIT_EXPCNT:
1199	case AMDGPU::S_WAIT_DSCNT:
1200	case AMDGPU::S_WAIT_KMCNT:
1201	case AMDGPU::S_WAIT_IDLE:
1202	return true;
1203	default:
1204	return false;
1205	}
1206	}
1207
1208	bool isVGPRCopy(const MachineInstr &MI) const {
1209	assert(isCopyInstr(MI));
1210	Register Dest = MI.getOperand(i: `0`).getReg();
1211	const MachineFunction &MF = *MI.getMF();
1212	const MachineRegisterInfo &MRI = MF.getRegInfo();
1213	return !RI.isSGPRReg(MRI, Reg: Dest);
1214	}
1215
1216	bool hasVGPRUses(const MachineInstr &MI) const {
1217	const MachineFunction &MF = *MI.getMF();
1218	const MachineRegisterInfo &MRI = MF.getRegInfo();
1219	return llvm::any_of(Range: MI.explicit_uses(),
1220	P: [&MRI, this](const MachineOperand &MO) {
1221	return MO.isReg() && RI.isVGPR(MRI, Reg: MO.getReg());});
1222	}
1223
1224	/// Return true if the instruction modifies the mode register.q
1225	static bool modifiesModeRegister(const MachineInstr &MI);
1226
1227	/// This function is used to determine if an instruction can be safely
1228	/// executed under EXEC = 0 without hardware error, indeterminate results,
1229	/// and/or visible effects on future vector execution or outside the shader.
1230	/// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1231	/// used in removing branches over short EXEC = 0 sequences.
1232	/// As such it embeds certain assumptions which may not apply to every case
1233	/// of EXEC = 0 execution.
1234	bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1235
1236	/// Returns true if the instruction could potentially depend on the value of
1237	/// exec. If false, exec dependencies may safely be ignored.
1238	bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1239
1240	bool isInlineConstant(const APInt &Imm) const;
1241
1242	bool isInlineConstant(const APFloat &Imm) const;
1243
1244	// Returns true if this non-register operand definitely does not need to be
1245	// encoded as a 32-bit literal. Note that this function handles all kinds of
1246	// operands, not just immediates.
1247	//
1248	// Some operands like FrameIndexes could resolve to an inline immediate value
1249	// that will not require an additional 4-bytes; this function assumes that it
1250	// will.
1251	bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
1252	if (!MO.isImm())
1253	return false;
1254	return isInlineConstant(ImmVal: MO.getImm(), OperandType);
1255	}
1256	bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const;
1257
1258	bool isInlineConstant(const MachineOperand &MO,
1259	const MCOperandInfo &OpInfo) const {
1260	return isInlineConstant(MO, OperandType: OpInfo.OperandType);
1261	}
1262
1263	/// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1264	/// be an inline immediate.
1265	bool isInlineConstant(const MachineInstr &MI,
1266	const MachineOperand &UseMO,
1267	const MachineOperand &DefMO) const {
1268	assert(UseMO.getParent() == &MI);
1269	int OpIdx = UseMO.getOperandNo();
1270	if (OpIdx >= MI.getDesc().NumOperands)
1271	return false;
1272
1273	return isInlineConstant(MO: DefMO, OpInfo: MI.getDesc().operands()[OpIdx]);
1274	}
1275
1276	/// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1277	/// immediate.
1278	bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1279	const MachineOperand &MO = MI.getOperand(i: OpIdx);
1280	return isInlineConstant(MO, OperandType: MI.getDesc().operands()[OpIdx].OperandType);
1281	}
1282
1283	bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1284	int64_t ImmVal) const {
1285	if (OpIdx >= MI.getDesc().NumOperands)
1286	return false;
1287
1288	if (isCopyInstr(MI)) {
1289	unsigned Size = getOpSize(MI, OpNo: OpIdx);
1290	assert(Size == `8` \|\| Size == `4`);
1291
1292	uint8_t OpType = (Size == `8`) ?
1293	AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1294	return isInlineConstant(ImmVal, OperandType: OpType);
1295	}
1296
1297	return isInlineConstant(ImmVal, OperandType: MI.getDesc().operands()[OpIdx].OperandType);
1298	}
1299
1300	bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1301	const MachineOperand &MO) const {
1302	return isInlineConstant(MI, OpIdx, ImmVal: MO.getImm());
1303	}
1304
1305	bool isInlineConstant(const MachineOperand &MO) const {
1306	return isInlineConstant(MI: *MO.getParent(), OpIdx: MO.getOperandNo());
1307	}
1308
1309	bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
1310	const MachineOperand &MO) const;
1311
1312	bool isLiteralOperandLegal(const MCInstrDesc &InstDesc,
1313	const MCOperandInfo &OpInfo) const;
1314
1315	bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
1316	int64_t ImmVal) const;
1317
1318	bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1319	const MachineOperand &MO) const {
1320	return isImmOperandLegal(InstDesc: MI.getDesc(), OpNo, MO);
1321	}
1322
1323	bool isNeverCoissue(MachineInstr &MI) const;
1324
1325	/// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
1326	bool isLegalAV64PseudoImm(uint64_t Imm) const;
1327
1328	/// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1329	/// This function will return false if you pass it a 32-bit instruction.
1330	bool hasVALU32BitEncoding(unsigned Opcode) const;
1331
1332	bool physRegUsesConstantBus(const MachineOperand &Reg) const;
1333	bool regUsesConstantBus(const MachineOperand &Reg,
1334	const MachineRegisterInfo &MRI) const;
1335
1336	/// Returns true if this operand uses the constant bus.
1337	bool usesConstantBus(const MachineRegisterInfo &MRI,
1338	const MachineOperand &MO,
1339	const MCOperandInfo &OpInfo) const;
1340
1341	bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
1342	int OpIdx) const {
1343	return usesConstantBus(MRI, MO: MI.getOperand(i: OpIdx),
1344	OpInfo: MI.getDesc().operands()[OpIdx]);
1345	}
1346
1347	/// Return true if this instruction has any modifiers.
1348	/// e.g. src[012]_mod, omod, clamp.
1349	bool hasModifiers(unsigned Opcode) const;
1350
1351	bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
1352	bool hasAnyModifiersSet(const MachineInstr &MI) const;
1353
1354	bool canShrink(const MachineInstr &MI,
1355	const MachineRegisterInfo &MRI) const;
1356
1357	MachineInstr *buildShrunkInst(MachineInstr &MI,
1358	unsigned NewOpcode) const;
1359
1360	bool verifyInstruction(const MachineInstr &MI,
1361	StringRef &ErrInfo) const override;
1362
1363	unsigned getVALUOp(const MachineInstr &MI) const;
1364
1365	void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1366	MachineBasicBlock::iterator MBBI,
1367	const DebugLoc &DL, Register Reg, bool IsSCCLive,
1368	SlotIndexes Indexes = nullptr) const*;
1369
1370	void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1371	MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1372	Register Reg, SlotIndexes Indexes = nullptr) const*;
1373
1374	MachineInstr getWholeWaveFunctionSetup(MachineFunction &MF) const*;
1375
1376	/// Return the correct register class for \p OpNo. For target-specific
1377	/// instructions, this will return the register class that has been defined
1378	/// in tablegen. For generic instructions, like REG_SEQUENCE it will return
1379	/// the register class of its machine operand.
1380	/// to infer the correct register class base on the other operands.
1381	const TargetRegisterClass getOpRegClass(const* MachineInstr &MI,
1382	unsigned OpNo) const;
1383
1384	/// Return the size in bytes of the operand OpNo on the given
1385	// instruction opcode.
1386	unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const {
1387	const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1388
1389	if (OpInfo.RegClass == -`1`) {
1390	// If this is an immediate operand, this must be a 32-bit literal.
1391	assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1392	return `4`;
1393	}
1394
1395	return RI.getRegSizeInBits(RC: *RI.getRegClass(i: getOpRegClassID(OpInfo))) / `8`;
1396	}
1397
1398	/// This form should usually be preferred since it handles operands
1399	/// with unknown register classes.
1400	unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1401	const MachineOperand &MO = MI.getOperand(i: OpNo);
1402	if (MO.isReg()) {
1403	if (unsigned SubReg = MO.getSubReg()) {
1404	return RI.getSubRegIdxSize(Idx: SubReg) / `8`;
1405	}
1406	}
1407	return RI.getRegSizeInBits(RC: *getOpRegClass(MI, OpNo)) / `8`;
1408	}
1409
1410	/// Legalize the \p OpIndex operand of this instruction by inserting
1411	/// a MOV. For example:
1412	/// ADD_I32_e32 VGPR0, 15
1413	/// to
1414	/// MOV VGPR1, 15
1415	/// ADD_I32_e32 VGPR0, VGPR1
1416	///
1417	/// If the operand being legalized is a register, then a COPY will be used
1418	/// instead of MOV.
1419	void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1420
1421	/// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1422	/// for \p MI.
1423	bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1424	const MachineOperand MO = nullptr) const*;
1425
1426	/// Check if \p MO would be a valid operand for the given operand
1427	/// definition \p OpInfo. Note this does not attempt to validate constant bus
1428	/// restrictions (e.g. literal constant usage).
1429	bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1430	const MCOperandInfo &OpInfo,
1431	const MachineOperand &MO) const;
1432
1433	/// Check if \p MO (a register operand) is a legal register for the
1434	/// given operand description or operand index.
1435	/// The operand index version provide more legality checks
1436	bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1437	const MCOperandInfo &OpInfo,
1438	const MachineOperand &MO) const;
1439	bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
1440	const MachineOperand &MO) const;
1441
1442	/// Check if \p MO would be a legal operand for gfx12+ packed math FP32
1443	/// instructions. Packed math FP32 instructions typically accept SGPRs or
1444	/// VGPRs as source operands. On gfx12+, if a source operand uses SGPRs, the
1445	/// HW can only read the first SGPR and use it for both the low and high
1446	/// operations.
1447	/// \p SrcN can be 0, 1, or 2, representing src0, src1, and src2,
1448	/// respectively. If \p MO is nullptr, the operand corresponding to SrcN will
1449	/// be used.
1450	bool isLegalGFX12PlusPackedMathFP32Operand(
1451	const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
1452	const MachineOperand MO = nullptr) const*;
1453
1454	/// Legalize operands in \p MI by either commuting it or inserting a
1455	/// copy of src1.
1456	void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1457
1458	/// Fix operands in \p MI to satisfy constant bus requirements.
1459	void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1460
1461	/// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1462	/// for the dst register (\p DstRC) can be optionally supplied. This function
1463	/// can only be used when it is know that the value in SrcReg is same across
1464	/// all threads in the wave.
1465	/// \returns The SGPR register that \p SrcReg was copied to.
1466	Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1467	MachineRegisterInfo &MRI,
1468	const TargetRegisterClass DstRC = nullptr) const*;
1469
1470	void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1471	void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1472
1473	void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1474	MachineBasicBlock::iterator I,
1475	const TargetRegisterClass *DstRC,
1476	MachineOperand &Op, MachineRegisterInfo &MRI,
1477	const DebugLoc &DL) const;
1478
1479	/// Legalize all operands in this instruction. This function may create new
1480	/// instructions and control-flow around \p MI. If present, \p MDT is
1481	/// updated.
1482	/// \returns A new basic block that contains \p MI if new blocks were created.
1483	MachineBasicBlock *
1484	legalizeOperands(MachineInstr &MI, MachineDominatorTree MDT = nullptr) const*;
1485
1486	/// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1487	/// was moved to VGPR. \returns true if succeeded.
1488	bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1489
1490	/// Fix operands in Inst to fix 16bit SALU to VALU lowering.
1491	void legalizeOperandsVALUt16(MachineInstr &Inst,
1492	MachineRegisterInfo &MRI) const;
1493	void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1494	MachineRegisterInfo &MRI) const;
1495
1496	/// Replace the instructions opcode with the equivalent VALU
1497	/// opcode. This function will also move the users of MachineInstruntions
1498	/// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1499	/// updated.
1500	void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree MDT) const*;
1501
1502	void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1503	MachineInstr &Inst) const;
1504
1505	void insertNoop(MachineBasicBlock &MBB,
1506	MachineBasicBlock::iterator MI) const override;
1507
1508	void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1509	unsigned Quantity) const override;
1510
1511	void insertReturn(MachineBasicBlock &MBB) const;
1512
1513	/// Build instructions that simulate the behavior of a `s_trap 2` instructions
1514	/// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1515	/// interpreted as a nop.
1516	MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1517	MachineBasicBlock &MBB,
1518	MachineInstr &MI,
1519	const DebugLoc &DL) const;
1520
1521	/// Return the number of wait states that result from executing this
1522	/// instruction.
1523	static unsigned getNumWaitStates(const MachineInstr &MI);
1524
1525	/// Returns the operand named \p Op. If \p MI does not have an
1526	/// operand named \c Op, this function returns nullptr.
1527	LLVM_READONLY
1528	MachineOperand *getNamedOperand(MachineInstr &MI,
1529	AMDGPU::OpName OperandName) const;
1530
1531	LLVM_READONLY
1532	const MachineOperand getNamedOperand(const* MachineInstr &MI,
1533	AMDGPU::OpName OperandName) const {
1534	return getNamedOperand(MI&: const_cast<MachineInstr &>(MI), OperandName);
1535	}
1536
1537	/// Get required immediate operand
1538	int64_t getNamedImmOperand(const MachineInstr &MI,
1539	AMDGPU::OpName OperandName) const {
1540	int Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: OperandName);
1541	return MI.getOperand(i: Idx).getImm();
1542	}
1543
1544	uint64_t getDefaultRsrcDataFormat() const;
1545	uint64_t getScratchRsrcWords23() const;
1546
1547	bool isLowLatencyInstruction(const MachineInstr &MI) const;
1548	bool isHighLatencyDef(int Opc) const override;
1549
1550	/// Return the descriptor of the target-specific machine instruction
1551	/// that corresponds to the specified pseudo or native opcode.
1552	const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1553	return get(Opcode: pseudoToMCOpcode(Opcode));
1554	}
1555
1556	Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1557	Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1558
1559	Register isLoadFromStackSlot(const MachineInstr &MI,
1560	int &FrameIndex) const override;
1561	Register isStoreToStackSlot(const MachineInstr &MI,
1562	int &FrameIndex) const override;
1563
1564	unsigned getInstBundleSize(const MachineInstr &MI) const;
1565	unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1566
1567	bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1568
1569	std::pair<unsigned, unsigned>
1570	decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1571
1572	ArrayRef<std::pair<int, const char *>>
1573	getSerializableTargetIndices() const override;
1574
1575	ArrayRef<std::pair<unsigned, const char *>>
1576	getSerializableDirectMachineOperandTargetFlags() const override;
1577
1578	ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1579	getSerializableMachineMemOperandTargetFlags() const override;
1580
1581	ScheduleHazardRecognizer *
1582	CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1583	const ScheduleDAG DAG) const* override;
1584
1585	ScheduleHazardRecognizer *
1586	CreateTargetPostRAHazardRecognizer(const MachineFunction &MF,
1587	MachineLoopInfo MLI) const* override;
1588
1589	ScheduleHazardRecognizer *
1590	CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1591	const ScheduleDAGMI DAG) const* override;
1592
1593	unsigned getLiveRangeSplitOpcode(Register Reg,
1594	const MachineFunction &MF) const override;
1595
1596	bool isBasicBlockPrologue(const MachineInstr &MI,
1597	Register Reg = Register ()) const override;
1598
1599	bool canAddToBBProlog(const MachineInstr &MI) const;
1600
1601	MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1602	MachineBasicBlock::iterator InsPt,
1603	const DebugLoc &DL, Register Src,
1604	Register Dst) const override;
1605
1606	MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1607	MachineBasicBlock::iterator InsPt,
1608	const DebugLoc &DL, Register Src,
1609	unsigned SrcSubReg,
1610	Register Dst) const override;
1611
1612	bool isWave32() const;
1613
1614	/// Return a partially built integer add instruction without carry.
1615	/// Caller must add source operands.
1616	/// For pre-GFX9 it will generate unused carry destination operand.
1617	/// TODO: After GFX9 it should return a no-carry operation.
1618	MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1619	MachineBasicBlock::iterator I,
1620	const DebugLoc &DL,
1621	Register DestReg) const;
1622
1623	MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1624	MachineBasicBlock::iterator I,
1625	const DebugLoc &DL,
1626	Register DestReg,
1627	RegScavenger &RS) const;
1628
1629	static bool isKillTerminator(unsigned Opcode);
1630	const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1631
1632	bool isLegalMUBUFImmOffset(unsigned Imm) const;
1633
1634	static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1635
1636	bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1637	Align Alignment = Align (`4`)) const;
1638
1639	/// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1640	/// encoded instruction with the given \p FlatVariant.
1641	bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1642	uint64_t FlatVariant) const;
1643
1644	/// Split \p COffsetVal into {immediate offset field, remainder offset}
1645	/// values.
1646	std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1647	unsigned AddrSpace,
1648	uint64_t FlatVariant) const;
1649
1650	/// Returns true if negative offsets are allowed for the given \p FlatVariant.
1651	bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1652
1653	/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1654	/// Return -1 if the target-specific opcode for the pseudo instruction does
1655	/// not exist. If Opcode is not a pseudo instruction, this is identity.
1656	int pseudoToMCOpcode(int Opcode) const;
1657
1658	/// \brief Check if this instruction should only be used by assembler.
1659	/// Return true if this opcode should not be used by codegen.
1660	bool isAsmOnlyOpcode(int MCOp) const;
1661
1662	void fixImplicitOperands(MachineInstr &MI) const;
1663
1664	MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1665	ArrayRef<unsigned> Ops,
1666	MachineBasicBlock::iterator InsertPt,
1667	int FrameIndex,
1668	LiveIntervals LIS = nullptr*,
1669	VirtRegMap VRM = nullptr) const* override;
1670
1671	unsigned getInstrLatency(const InstrItineraryData *ItinData,
1672	const MachineInstr &MI,
1673	unsigned PredCost = nullptr) const* override;
1674
1675	const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override;
1676
1677	InstructionUniformity
1678	getInstructionUniformity(const MachineInstr &MI) const final;
1679
1680	InstructionUniformity
1681	getGenericInstructionUniformity(const MachineInstr &MI) const;
1682
1683	const MIRFormatter getMIRFormatter() const* override;
1684
1685	static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1686
1687	const TargetSchedModel &getSchedModel() const { return SchedModel; }
1688
1689	// FIXME: This should be removed
1690	// Enforce operand's \p OpName even alignment if required by target.
1691	// This is used if an operand is a 32 bit register but needs to be aligned
1692	// regardless.
1693	void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const;
1694	};
1695
1696	/// \brief Returns true if a reg:subreg pair P has a TRC class
1697	inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1698	const TargetRegisterClass &TRC,
1699	MachineRegisterInfo &MRI) {
1700	auto *RC = MRI.getRegClass(Reg: P.Reg);
1701	if (!P.SubReg)
1702	return RC == &TRC;
1703	auto *TRI = MRI.getTargetRegisterInfo();
1704	return RC == TRI->getMatchingSuperRegClass(A: RC, B: &TRC, Idx: P.SubReg);
1705	}
1706
1707	/// \brief Create RegSubRegPair from a register MachineOperand
1708	inline
1709	TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1710	assert(O.isReg());
1711	return TargetInstrInfo::RegSubRegPair (O.getReg(), O.getSubReg());
1712	}
1713
1714	/// \brief Return the SubReg component from REG_SEQUENCE
1715	TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1716	unsigned SubReg);
1717
1718	/// \brief Return the defining instruction for a given reg:subreg pair
1719	/// skipping copy like instructions and subreg-manipulation pseudos.
1720	/// Following another subreg of a reg:subreg isn't supported.
1721	MachineInstr getVRegSubRegDef(const* TargetInstrInfo::RegSubRegPair &P,
1722	const MachineRegisterInfo &MRI);
1723
1724	/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1725	/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1726	/// attempt to track between blocks.
1727	bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1728	Register VReg,
1729	const MachineInstr &DefMI,
1730	const MachineInstr &UseMI);
1731
1732	/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1733	/// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1734	/// track between blocks.
1735	bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1736	Register VReg,
1737	const MachineInstr &DefMI);
1738
1739	namespace AMDGPU {
1740
1741	LLVM_READONLY
1742	int32_t getVOPe64(uint32_t Opcode);
1743
1744	LLVM_READONLY
1745	int32_t getVOPe32(uint32_t Opcode);
1746
1747	LLVM_READONLY
1748	int32_t getSDWAOp(uint32_t Opcode);
1749
1750	LLVM_READONLY
1751	int32_t getDPPOp32(uint32_t Opcode);
1752
1753	LLVM_READONLY
1754	int32_t getDPPOp64(uint32_t Opcode);
1755
1756	LLVM_READONLY
1757	int32_t getBasicFromSDWAOp(uint32_t Opcode);
1758
1759	LLVM_READONLY
1760	int32_t getCommuteRev(uint32_t Opcode);
1761
1762	LLVM_READONLY
1763	int32_t getCommuteOrig(uint32_t Opcode);
1764
1765	LLVM_READONLY
1766	int32_t getAddr64Inst(uint32_t Opcode);
1767
1768	/// Check if \p Opcode is an Addr64 opcode.
1769	///
1770	/// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1771	LLVM_READONLY
1772	int32_t getIfAddr64Inst(uint32_t Opcode);
1773
1774	LLVM_READONLY
1775	int32_t getSOPKOp(uint32_t Opcode);
1776
1777	/// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1778	/// of a VADDR form.
1779	LLVM_READONLY
1780	int32_t getGlobalSaddrOp(uint32_t Opcode);
1781
1782	/// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1783	/// of a SADDR form.
1784	LLVM_READONLY
1785	int32_t getGlobalVaddrOp(uint32_t Opcode);
1786
1787	LLVM_READONLY
1788	int32_t getVCMPXNoSDstOp(uint32_t Opcode);
1789
1790	/// \returns ST form with only immediate offset of a FLAT Scratch instruction
1791	/// given an \p Opcode of an SS (SADDR) form.
1792	LLVM_READONLY
1793	int32_t getFlatScratchInstSTfromSS(uint32_t Opcode);
1794
1795	/// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1796	/// of an SVS (SADDR + VADDR) form.
1797	LLVM_READONLY
1798	int32_t getFlatScratchInstSVfromSVS(uint32_t Opcode);
1799
1800	/// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1801	/// of an SV (VADDR) form.
1802	LLVM_READONLY
1803	int32_t getFlatScratchInstSSfromSV(uint32_t Opcode);
1804
1805	/// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1806	/// of an SS (SADDR) form.
1807	LLVM_READONLY
1808	int32_t getFlatScratchInstSVfromSS(uint32_t Opcode);
1809
1810	/// \returns earlyclobber version of a MAC MFMA is exists.
1811	LLVM_READONLY
1812	int32_t getMFMAEarlyClobberOp(uint32_t Opcode);
1813
1814	/// \returns Version of an MFMA instruction which uses AGPRs for srcC and
1815	/// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
1816	LLVM_READONLY
1817	int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode);
1818
1819	/// \returns v_cmpx version of a v_cmp instruction.
1820	LLVM_READONLY
1821	int32_t getVCMPXOpFromVCMP(uint32_t Opcode);
1822
1823	const uint64_t RSRC_DATA_FORMAT = `0xf00000000000LL`;
1824	const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (`32` + `19`);
1825	const uint64_t RSRC_INDEX_STRIDE_SHIFT = (`32` + `21`);
1826	const uint64_t RSRC_TID_ENABLE = UINT64_C(`1`) << (`32` + `23`);
1827
1828	} // end namespace AMDGPU
1829
1830	namespace AMDGPU {
1831	enum AsmComments {
1832	// For sgpr to vgpr spill instructions
1833	SGPR_SPILL = MachineInstr::TAsmComments
1834	};
1835	} // namespace AMDGPU
1836
1837	namespace SI {
1838	namespace KernelInputOffsets {
1839
1840	/// Offsets in bytes from the start of the input buffer
1841	enum Offsets {
1842	NGROUPS_X = `0`,
1843	NGROUPS_Y = `4`,
1844	NGROUPS_Z = `8`,
1845	GLOBAL_SIZE_X = `12`,
1846	GLOBAL_SIZE_Y = `16`,
1847	GLOBAL_SIZE_Z = `20`,
1848	LOCAL_SIZE_X = `24`,
1849	LOCAL_SIZE_Y = `28`,
1850	LOCAL_SIZE_Z = `32`
1851	};
1852
1853	} // end namespace KernelInputOffsets
1854	} // end namespace SI
1855
1856	} // end namespace llvm
1857
1858	#endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1859

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIInstrInfo.h