1//===- AMDGPUGlobalISelUtils.cpp ---------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUGlobalISelUtils.h"
10#include "AMDGPURegisterBankInfo.h"
11#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12#include "llvm/ADT/DenseSet.h"
13#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
14#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
16#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
17#include "llvm/CodeGenTypes/LowLevelType.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20
21using namespace llvm;
22using namespace AMDGPU;
23using namespace MIPatternMatch;
24
25std::pair<Register, unsigned>
26AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg,
27 GISelValueTracking *ValueTracking,
28 bool CheckNUW) {
29 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
30 if (Def->getOpcode() == TargetOpcode::G_CONSTANT) {
31 unsigned Offset;
32 const MachineOperand &Op = Def->getOperand(i: 1);
33 if (Op.isImm())
34 Offset = Op.getImm();
35 else
36 Offset = Op.getCImm()->getZExtValue();
37
38 return std::pair(Register(), Offset);
39 }
40
41 int64_t Offset;
42 if (Def->getOpcode() == TargetOpcode::G_ADD) {
43 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
44 // wraparound, because s_load instructions perform the addition in 64 bits.
45 if (CheckNUW && !Def->getFlag(Flag: MachineInstr::NoUWrap)) {
46 assert(MRI.getType(Reg).getScalarSizeInBits() == 32);
47 return std::pair(Reg, 0);
48 }
49 // TODO: Handle G_OR used for add case
50 if (mi_match(R: Def->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Offset)))
51 return std::pair(Def->getOperand(i: 1).getReg(), Offset);
52
53 // FIXME: matcher should ignore copies
54 if (mi_match(R: Def->getOperand(i: 2).getReg(), MRI, P: m_Copy(Src: m_ICst(Cst&: Offset))))
55 return std::pair(Def->getOperand(i: 1).getReg(), Offset);
56 }
57
58 Register Base;
59 if (ValueTracking && mi_match(R: Reg, MRI, P: m_GOr(L: m_Reg(R&: Base), R: m_ICst(Cst&: Offset))) &&
60 ValueTracking->maskedValueIsZero(Val: Base,
61 Mask: APInt(32, Offset, /*isSigned=*/true)))
62 return std::pair(Base, Offset);
63
64 // Handle G_PTRTOINT (G_PTR_ADD base, const) case
65 if (Def->getOpcode() == TargetOpcode::G_PTRTOINT) {
66 MachineInstr *Base;
67 if (mi_match(R: Def->getOperand(i: 1).getReg(), MRI,
68 P: m_GPtrAdd(L: m_MInstr(MI&: Base), R: m_ICst(Cst&: Offset)))) {
69 // If Base was int converted to pointer, simply return int and offset.
70 if (Base->getOpcode() == TargetOpcode::G_INTTOPTR)
71 return std::pair(Base->getOperand(i: 1).getReg(), Offset);
72
73 // Register returned here will be of pointer type.
74 return std::pair(Base->getOperand(i: 0).getReg(), Offset);
75 }
76 }
77
78 return std::pair(Reg, 0);
79}
80
81IntrinsicLaneMaskAnalyzer::IntrinsicLaneMaskAnalyzer(MachineFunction &MF)
82 : MRI(MF.getRegInfo()) {
83 initLaneMaskIntrinsics(MF);
84}
85
86bool IntrinsicLaneMaskAnalyzer::isS32S64LaneMask(Register Reg) const {
87 return S32S64LaneMask.contains(V: Reg);
88}
89
90void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) {
91 for (auto &MBB : MF) {
92 for (auto &MI : MBB) {
93 GIntrinsic *GI = dyn_cast<GIntrinsic>(Val: &MI);
94 if (GI && GI->is(ID: Intrinsic::amdgcn_if_break)) {
95 S32S64LaneMask.insert(V: MI.getOperand(i: 3).getReg());
96 S32S64LaneMask.insert(V: MI.getOperand(i: 0).getReg());
97 }
98
99 if (MI.getOpcode() == AMDGPU::SI_IF ||
100 MI.getOpcode() == AMDGPU::SI_ELSE) {
101 S32S64LaneMask.insert(V: MI.getOperand(i: 0).getReg());
102 }
103 }
104 }
105}
106
107static LLT getReadAnyLaneSplitTy(LLT Ty) {
108 if (Ty.isVector()) {
109 LLT ElTy = Ty.getElementType();
110 if (ElTy.getSizeInBits() == 16)
111 return LLT::fixed_vector(NumElements: 2, ScalarTy: ElTy);
112 // S32, S64 or pointer
113 return ElTy;
114 }
115
116 // Large scalars and 64-bit pointers
117 return LLT::scalar(SizeInBits: 32);
118}
119
120template <typename ReadLaneFnTy>
121static Register buildReadLane(MachineIRBuilder &, Register,
122 const RegisterBankInfo &, ReadLaneFnTy);
123
124template <typename ReadLaneFnTy>
125static void
126unmergeReadAnyLane(MachineIRBuilder &B, SmallVectorImpl<Register> &SgprDstParts,
127 LLT UnmergeTy, Register VgprSrc, const RegisterBankInfo &RBI,
128 ReadLaneFnTy BuildRL) {
129 const RegisterBank *VgprRB = &RBI.getRegBank(ID: AMDGPU::VGPRRegBankID);
130 auto Unmerge = B.buildUnmerge(Attrs: {.RCOrRB: VgprRB, .Ty: UnmergeTy}, Op: VgprSrc);
131 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
132 SgprDstParts.push_back(Elt: buildReadLane(B, Unmerge.getReg(Idx: i), RBI, BuildRL));
133 }
134}
135
136template <typename ReadLaneFnTy>
137static Register buildReadLane(MachineIRBuilder &B, Register VgprSrc,
138 const RegisterBankInfo &RBI,
139 ReadLaneFnTy BuildRL) {
140 LLT Ty = B.getMRI()->getType(Reg: VgprSrc);
141 const RegisterBank *SgprRB = &RBI.getRegBank(ID: AMDGPU::SGPRRegBankID);
142 if (Ty.getSizeInBits() == 32) {
143 Register SgprDst = B.getMRI()->createVirtualRegister(RegAttr: {.RCOrRB: SgprRB, .Ty: Ty});
144 return BuildRL(B, SgprDst, VgprSrc).getReg(0);
145 }
146
147 SmallVector<Register, 8> SgprDstParts;
148 unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
149 BuildRL);
150
151 return B.buildMergeLikeInstr(Res: {SgprRB, Ty}, Ops: SgprDstParts).getReg(Idx: 0);
152}
153
154template <typename ReadLaneFnTy>
155static void buildReadLane(MachineIRBuilder &B, Register SgprDst,
156 Register VgprSrc, const RegisterBankInfo &RBI,
157 ReadLaneFnTy BuildReadLane) {
158 LLT Ty = B.getMRI()->getType(Reg: VgprSrc);
159 if (Ty.getSizeInBits() == 32) {
160 BuildReadLane(B, SgprDst, VgprSrc);
161 return;
162 }
163
164 SmallVector<Register, 8> SgprDstParts;
165 unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI,
166 BuildReadLane);
167
168 B.buildMergeLikeInstr(Res: SgprDst, Ops: SgprDstParts).getReg(Idx: 0);
169}
170
171void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst,
172 Register VgprSrc, const RegisterBankInfo &RBI) {
173 return buildReadLane(
174 B, SgprDst, VgprSrc, RBI,
175 BuildReadLane: [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
176 return B.buildInstr(Opc: AMDGPU::G_AMDGPU_READANYLANE, DstOps: {SgprDst}, SrcOps: {VgprSrc});
177 });
178}
179
180void AMDGPU::buildReadFirstLane(MachineIRBuilder &B, Register SgprDst,
181 Register VgprSrc, const RegisterBankInfo &RBI) {
182 return buildReadLane(
183 B, SgprDst, VgprSrc, RBI,
184 BuildReadLane: [](MachineIRBuilder &B, Register SgprDst, Register VgprSrc) {
185 return B.buildIntrinsic(ID: Intrinsic::amdgcn_readfirstlane, Res: SgprDst)
186 .addReg(RegNo: VgprSrc);
187 });
188}
189