1//===-- AMDGPURegBankLegalize.cpp -----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Lower G_ instructions that can't be inst-selected with register bank
10/// assignment from AMDGPURegBankSelect based on machine uniformity info.
11/// Given types on all operands, some register bank assignments require lowering
12/// while others do not.
13/// Note: cases where all register bank assignments would require lowering are
14/// lowered in legalizer.
15/// For example vgpr S64 G_AND requires lowering to S32 while sgpr S64 does not.
16/// Eliminate sgpr S1 by lowering to sgpr S32.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AMDGPU.h"
21#include "AMDGPUGlobalISelUtils.h"
22#include "AMDGPURegBankLegalizeHelper.h"
23#include "GCNSubtarget.h"
24#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
25#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
26#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
27#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28#include "llvm/CodeGen/GlobalISel/Utils.h"
29#include "llvm/CodeGen/MachineFunctionPass.h"
30#include "llvm/CodeGen/MachineUniformityAnalysis.h"
31#include "llvm/CodeGen/TargetPassConfig.h"
32#include "llvm/InitializePasses.h"
33
34#define DEBUG_TYPE "amdgpu-regbanklegalize"
35
36using namespace llvm;
37using namespace AMDGPU;
38using namespace llvm::MIPatternMatch;
39
40namespace {
41
42// AMDGPU-specific pattern matchers
43template <typename SrcTy>
44inline UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_READANYLANE>
45m_GAMDGPUReadAnyLane(const SrcTy &Src) {
46 return UnaryOp_match<SrcTy, AMDGPU::G_AMDGPU_READANYLANE>(Src);
47}
48
49class AMDGPURegBankLegalize : public MachineFunctionPass {
50public:
51 static char ID;
52
53public:
54 AMDGPURegBankLegalize() : MachineFunctionPass(ID) {}
55
56 bool runOnMachineFunction(MachineFunction &MF) override;
57
58 StringRef getPassName() const override {
59 return "AMDGPU Register Bank Legalize";
60 }
61
62 void getAnalysisUsage(AnalysisUsage &AU) const override {
63 AU.addRequired<TargetPassConfig>();
64 AU.addRequired<GISelCSEAnalysisWrapperPass>();
65 AU.addRequired<MachineUniformityAnalysisPass>();
66 MachineFunctionPass::getAnalysisUsage(AU);
67 }
68
69 // If there were no phis and we do waterfall expansion machine verifier would
70 // fail.
71 MachineFunctionProperties getClearedProperties() const override {
72 return MachineFunctionProperties().setNoPHIs();
73 }
74};
75
76} // End anonymous namespace.
77
78INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE,
79 "AMDGPU Register Bank Legalize", false, false)
80INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
81INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
82INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
83INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE,
84 "AMDGPU Register Bank Legalize", false, false)
85
86char AMDGPURegBankLegalize::ID = 0;
87
88char &llvm::AMDGPURegBankLegalizeID = AMDGPURegBankLegalize::ID;
89
90FunctionPass *llvm::createAMDGPURegBankLegalizePass() {
91 return new AMDGPURegBankLegalize();
92}
93
94const RegBankLegalizeRules &getRules(const GCNSubtarget &ST,
95 MachineRegisterInfo &MRI) {
96 static std::mutex GlobalMutex;
97 static SmallDenseMap<unsigned, std::unique_ptr<RegBankLegalizeRules>>
98 CacheForRuleSet;
99 std::lock_guard<std::mutex> Lock(GlobalMutex);
100 auto [It, Inserted] = CacheForRuleSet.try_emplace(Key: ST.getGeneration());
101 if (Inserted)
102 It->second = std::make_unique<RegBankLegalizeRules>(args: ST, args&: MRI);
103 else
104 It->second->refreshRefs(ST: ST, MRI&: MRI);
105 return *It->second;
106}
107
108class AMDGPURegBankLegalizeCombiner {
109 MachineIRBuilder &B;
110 MachineRegisterInfo &MRI;
111 const SIRegisterInfo &TRI;
112 const RegisterBank *SgprRB;
113 const RegisterBank *VgprRB;
114 const RegisterBank *VccRB;
115
116 static constexpr LLT S1 = LLT::scalar(SizeInBits: 1);
117 static constexpr LLT S16 = LLT::scalar(SizeInBits: 16);
118 static constexpr LLT S32 = LLT::scalar(SizeInBits: 32);
119 static constexpr LLT S64 = LLT::scalar(SizeInBits: 64);
120
121public:
122 AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI,
123 const RegisterBankInfo &RBI)
124 : B(B), MRI(*B.getMRI()), TRI(TRI),
125 SgprRB(&RBI.getRegBank(ID: AMDGPU::SGPRRegBankID)),
126 VgprRB(&RBI.getRegBank(ID: AMDGPU::VGPRRegBankID)),
127 VccRB(&RBI.getRegBank(ID: AMDGPU::VCCRegBankID)) {};
128
129 bool isLaneMask(Register Reg);
130 std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode);
131 Register tryMatchUnmergeDefs(SmallVectorImpl<Register> &DefRegs);
132 SmallVector<Register> tryMatchMergeReadAnyLane(GMergeLikeInstr *Merge);
133 SmallVector<Register> getReadAnyLaneSrcs(Register Src);
134 void replaceRegWithOrBuildCopy(Register Dst, Register Src);
135
136 bool tryEliminateReadAnyLane(MachineInstr &Copy);
137 void tryCombineCopy(MachineInstr &MI);
138 void tryCombineS1AnyExt(MachineInstr &MI);
139};
140
141bool AMDGPURegBankLegalizeCombiner::isLaneMask(Register Reg) {
142 const RegisterBank *RB = MRI.getRegBankOrNull(Reg);
143 if (RB && RB->getID() == AMDGPU::VCCRegBankID)
144 return true;
145
146 const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
147 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
148}
149
150std::pair<MachineInstr *, Register>
151AMDGPURegBankLegalizeCombiner::tryMatch(Register Src, unsigned Opcode) {
152 MachineInstr *MatchMI = MRI.getVRegDef(Reg: Src);
153 if (MatchMI->getOpcode() != Opcode)
154 return {nullptr, Register()};
155 return {MatchMI, MatchMI->getOperand(i: 1).getReg()};
156}
157
158// Check if all registers are from same unmerge and there is no shuffling.
159// Returns the unmerge source if both conditions are met.
160Register AMDGPURegBankLegalizeCombiner::tryMatchUnmergeDefs(
161 SmallVectorImpl<Register> &DefRegs) {
162 auto *UnMerge = getOpcodeDef<GUnmerge>(Reg: DefRegs[0], MRI);
163 if (!UnMerge || UnMerge->getNumDefs() != DefRegs.size())
164 return {};
165 for (unsigned I = 1; I < DefRegs.size(); ++I) {
166 if (UnMerge->getReg(Idx: I) != DefRegs[I])
167 return {};
168 }
169 return UnMerge->getSourceReg();
170}
171
172// Check if all merge sources are readanylanes and return the readanylane
173// sources if they are.
174SmallVector<Register> AMDGPURegBankLegalizeCombiner::tryMatchMergeReadAnyLane(
175 GMergeLikeInstr *Merge) {
176 SmallVector<Register> ReadAnyLaneSrcs;
177 for (unsigned i = 0; i < Merge->getNumSources(); ++i) {
178 Register Src;
179 if (!mi_match(R: Merge->getSourceReg(I: i), MRI,
180 P: m_GAMDGPUReadAnyLane(Src: m_Reg(R&: Src))))
181 return {};
182 ReadAnyLaneSrcs.push_back(Elt: Src);
183 }
184 return ReadAnyLaneSrcs;
185}
186
187SmallVector<Register>
188AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrcs(Register Src) {
189 // Src = G_AMDGPU_READANYLANE RALSrc
190 Register RALSrc;
191 if (mi_match(R: Src, MRI, P: m_GAMDGPUReadAnyLane(Src: m_Reg(R&: RALSrc))))
192 return {RALSrc};
193
194 // RALSrc = G_ANYEXT S16Src
195 // TruncSrc = G_AMDGPU_READANYLANE RALSrc
196 // Src = G_TRUNC TruncSrc
197 if (mi_match(R: Src, MRI,
198 P: m_GTrunc(Src: m_GAMDGPUReadAnyLane(Src: m_GAnyExt(Src: m_Reg(R&: RALSrc)))))) {
199 return {RALSrc};
200 }
201
202 // TruncSrc = G_AMDGPU_READANYLANE RALSrc
203 // AextSrc = G_TRUNC TruncSrc
204 // Src = G_ANYEXT AextSrc
205 if (mi_match(R: Src, MRI,
206 P: m_GAnyExt(Src: m_GTrunc(Src: m_GAMDGPUReadAnyLane(Src: m_Reg(R&: RALSrc)))))) {
207 return {RALSrc};
208 }
209
210 // Sgpr0 = G_AMDGPU_READANYLANE Vgpr0
211 // Sgpr1 = G_AMDGPU_READANYLANE Vgpr1
212 // ...
213 // Src = G_MERGE_LIKE Sgpr0, Sgpr1, ...
214 // Dst = COPY Src
215 if (auto *Merge = getOpcodeDef<GMergeLikeInstr>(Reg: Src, MRI)) {
216 SmallVector<Register> ReadAnyLaneSrcs = tryMatchMergeReadAnyLane(Merge);
217 if (ReadAnyLaneSrcs.empty())
218 return {};
219
220 // Vgpr0, Vgpr1, ... = G_UNMERGE_VALUES UnmergeSrc
221 if (Register UnmergeSrc = tryMatchUnmergeDefs(DefRegs&: ReadAnyLaneSrcs))
222 return {UnmergeSrc};
223
224 // Multiple ReadAnyLane vgpr sources, need to merge Vgpr0, Vgpr1, ...
225 return ReadAnyLaneSrcs;
226 }
227
228 // SrcRegIdx = G_AMDGPU_READANYLANE RALElSrc
229 // SourceReg G_MERGE_VALUES ..., SrcRegIdx, ...
230 // ..., Src, ... = G_UNMERGE_VALUES SourceReg
231 auto *UnMerge = getOpcodeDef<GUnmerge>(Reg: Src, MRI);
232 if (!UnMerge)
233 return {};
234
235 int Idx = UnMerge->findRegisterDefOperandIdx(Reg: Src, TRI: nullptr);
236 auto *Merge = getOpcodeDef<GMergeLikeInstr>(Reg: UnMerge->getSourceReg(), MRI);
237 if (!Merge || UnMerge->getNumDefs() != Merge->getNumSources())
238 return {};
239
240 Register SrcRegIdx = Merge->getSourceReg(I: Idx);
241 if (MRI.getType(Reg: Src) != MRI.getType(Reg: SrcRegIdx))
242 return {};
243
244 auto [RALEl, RALElSrc] = tryMatch(Src: SrcRegIdx, Opcode: AMDGPU::G_AMDGPU_READANYLANE);
245 if (RALEl)
246 return {RALElSrc};
247
248 return {};
249}
250
251void AMDGPURegBankLegalizeCombiner::replaceRegWithOrBuildCopy(Register Dst,
252 Register Src) {
253 if (Dst.isVirtual())
254 MRI.replaceRegWith(FromReg: Dst, ToReg: Src);
255 else
256 B.buildCopy(Res: Dst, Op: Src);
257}
258
259bool AMDGPURegBankLegalizeCombiner::tryEliminateReadAnyLane(
260 MachineInstr &Copy) {
261 Register Dst = Copy.getOperand(i: 0).getReg();
262 Register Src = Copy.getOperand(i: 1).getReg();
263
264 // Skip non-vgpr Dst
265 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Reg: Dst) != VgprRB)
266 : !TRI.isVGPR(MRI, Reg: Dst))
267 return false;
268
269 // Skip physical source registers and source registers with register class
270 if (!Src.isVirtual() || MRI.getRegClassOrNull(Reg: Src))
271 return false;
272
273 Register RALDst = Src;
274 MachineInstr &SrcMI = *MRI.getVRegDef(Reg: Src);
275 if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
276 RALDst = SrcMI.getOperand(i: 1).getReg();
277
278 B.setInstrAndDebugLoc(Copy);
279 SmallVector<Register> ReadAnyLaneSrcRegs = getReadAnyLaneSrcs(Src: RALDst);
280 if (ReadAnyLaneSrcRegs.empty())
281 return false;
282
283 Register ReadAnyLaneSrc;
284 if (ReadAnyLaneSrcRegs.size() == 1) {
285 ReadAnyLaneSrc = ReadAnyLaneSrcRegs[0];
286 } else {
287 // Multiple readanylane sources without a common unmerge, merge them.
288 auto Merge = B.buildMergeLikeInstr(Res: {VgprRB, MRI.getType(Reg: RALDst)},
289 Ops: ReadAnyLaneSrcRegs);
290 ReadAnyLaneSrc = Merge.getReg(Idx: 0);
291 }
292
293 if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
294 // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
295 // Dst = Copy Src $Dst = Copy Src
296 // -> ->
297 // Dst = RALSrc $Dst = Copy RALSrc
298 replaceRegWithOrBuildCopy(Dst, Src: ReadAnyLaneSrc);
299 } else {
300 // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
301 // Src = G_BITCAST RALDst Src = G_BITCAST RALDst
302 // Dst = Copy Src Dst = Copy Src
303 // -> ->
304 // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
305 // Dst = NewVgpr $Dst = Copy NewVgpr
306 auto Bitcast = B.buildBitcast(Dst: {VgprRB, MRI.getType(Reg: Src)}, Src: ReadAnyLaneSrc);
307 replaceRegWithOrBuildCopy(Dst, Src: Bitcast.getReg(Idx: 0));
308 }
309
310 eraseInstr(MI&: Copy, MRI);
311 return true;
312}
313
314void AMDGPURegBankLegalizeCombiner::tryCombineCopy(MachineInstr &MI) {
315 if (tryEliminateReadAnyLane(Copy&: MI))
316 return;
317
318 Register Dst = MI.getOperand(i: 0).getReg();
319 Register Src = MI.getOperand(i: 1).getReg();
320 // Skip copies of physical registers.
321 if (!Dst.isVirtual() || !Src.isVirtual())
322 return;
323
324 // This is a cross bank copy, sgpr S1 to lane mask.
325 //
326 // %Src:sgpr(s1) = G_TRUNC %TruncS32Src:sgpr(s32)
327 // %Dst:lane-mask(s1) = COPY %Src:sgpr(s1)
328 // ->
329 // %BoolSrc:sgpr(s32) = G_AND %TruncS32Src:sgpr(s32), 1
330 // %Dst:lane-mask(s1) = G_AMDGPU_COPY_VCC_SCC %BoolSrc:sgpr(s32)
331 if (isLaneMask(Reg: Dst) && MRI.getRegBankOrNull(Reg: Src) == SgprRB) {
332 auto [Trunc, TruncS32Src] = tryMatch(Src, Opcode: AMDGPU::G_TRUNC);
333 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
334 "sgpr S1 must be result of G_TRUNC of sgpr S32");
335
336 B.setInstr(MI);
337 // Ensure that truncated bits in BoolSrc are 0.
338 auto One = B.buildConstant(Res: {SgprRB, S32}, Val: 1);
339 auto BoolSrc = B.buildAnd(Dst: {SgprRB, S32}, Src0: TruncS32Src, Src1: One);
340 B.buildInstr(Opc: AMDGPU::G_AMDGPU_COPY_VCC_SCC, DstOps: {Dst}, SrcOps: {BoolSrc});
341 eraseInstr(MI, MRI);
342 }
343}
344
345void AMDGPURegBankLegalizeCombiner::tryCombineS1AnyExt(MachineInstr &MI) {
346 // %Src:sgpr(S1) = G_TRUNC %TruncSrc
347 // %Dst = G_ANYEXT %Src:sgpr(S1)
348 // ->
349 // %Dst = G_... %TruncSrc
350 Register Dst = MI.getOperand(i: 0).getReg();
351 Register Src = MI.getOperand(i: 1).getReg();
352 if (MRI.getType(Reg: Src) != S1)
353 return;
354
355 auto [Trunc, TruncSrc] = tryMatch(Src, Opcode: AMDGPU::G_TRUNC);
356 if (!Trunc)
357 return;
358
359 LLT DstTy = MRI.getType(Reg: Dst);
360 LLT TruncSrcTy = MRI.getType(Reg: TruncSrc);
361
362 if (DstTy == TruncSrcTy) {
363 MRI.replaceRegWith(FromReg: Dst, ToReg: TruncSrc);
364 eraseInstr(MI, MRI);
365 return;
366 }
367
368 B.setInstr(MI);
369
370 if (DstTy == S32 && TruncSrcTy == S64) {
371 auto Unmerge = B.buildUnmerge(Attrs: {.RCOrRB: SgprRB, .Ty: S32}, Op: TruncSrc);
372 MRI.replaceRegWith(FromReg: Dst, ToReg: Unmerge.getReg(Idx: 0));
373 eraseInstr(MI, MRI);
374 return;
375 }
376
377 if (DstTy == S64 && TruncSrcTy == S32) {
378 B.buildMergeLikeInstr(Res: MI.getOperand(i: 0).getReg(),
379 Ops: {TruncSrc, B.buildUndef(Res: {SgprRB, S32})});
380 eraseInstr(MI, MRI);
381 return;
382 }
383
384 if (DstTy == S32 && TruncSrcTy == S16) {
385 B.buildAnyExt(Res: Dst, Op: TruncSrc);
386 eraseInstr(MI, MRI);
387 return;
388 }
389
390 if (DstTy == S16 && TruncSrcTy == S32) {
391 B.buildTrunc(Res: Dst, Op: TruncSrc);
392 eraseInstr(MI, MRI);
393 return;
394 }
395
396 llvm_unreachable("missing anyext + trunc combine");
397}
398
399// Search through MRI for virtual registers with sgpr register bank and S1 LLT.
400[[maybe_unused]] static Register getAnySgprS1(const MachineRegisterInfo &MRI) {
401 const LLT S1 = LLT::scalar(SizeInBits: 1);
402 for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
403 Register Reg = Register::index2VirtReg(Index: i);
404 if (MRI.def_empty(RegNo: Reg) || MRI.getType(Reg) != S1)
405 continue;
406
407 const RegisterBank *RB = MRI.getRegBankOrNull(Reg);
408 if (RB && RB->getID() == AMDGPU::SGPRRegBankID) {
409 LLVM_DEBUG(dbgs() << "Warning: detected sgpr S1 register in: ";
410 MRI.getVRegDef(Reg)->dump(););
411 return Reg;
412 }
413 }
414
415 return {};
416}
417
418bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
419 if (MF.getProperties().hasFailedISel())
420 return false;
421
422 // Setup the instruction builder with CSE.
423 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
424 GISelCSEAnalysisWrapper &Wrapper =
425 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
426 GISelCSEInfo &CSEInfo = Wrapper.get(CSEOpt: TPC.getCSEConfig());
427 GISelObserverWrapper Observer;
428 Observer.addObserver(O: &CSEInfo);
429
430 CSEMIRBuilder B(MF);
431 B.setCSEInfo(&CSEInfo);
432 B.setChangeObserver(Observer);
433
434 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
435 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
436
437 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
438 MachineRegisterInfo &MRI = MF.getRegInfo();
439 const RegisterBankInfo &RBI = *ST.getRegBankInfo();
440 const MachineUniformityInfo &MUI =
441 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
442
443 // RegBankLegalizeRules is initialized with assigning sets of IDs to opcodes.
444 const RegBankLegalizeRules &RBLRules = getRules(ST, MRI);
445
446 // Logic that does legalization based on IDs assigned to Opcode.
447 RegBankLegalizeHelper RBLHelper(B, MUI, RBI, RBLRules);
448
449 SmallVector<MachineInstr *> AllInst;
450
451 for (MachineBasicBlock &MBB : MF) {
452 for (MachineInstr &MI : MBB) {
453 AllInst.push_back(Elt: &MI);
454 }
455 }
456
457 for (MachineInstr *MI : AllInst) {
458 if (!MI->isPreISelOpcode())
459 continue;
460
461 unsigned Opc = MI->getOpcode();
462 // Insert point for use operands needs some calculation.
463 if (Opc == AMDGPU::G_PHI) {
464 if (!RBLHelper.applyMappingPHI(MI&: *MI))
465 return false;
466 continue;
467 }
468
469 // Opcodes that support pretty much all combinations of reg banks and LLTs
470 // (except S1). There is no point in writing rules for them.
471 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_MERGE_VALUES ||
472 Opc == AMDGPU::G_CONCAT_VECTORS || Opc == AMDGPU::G_BITCAST) {
473 RBLHelper.applyMappingTrivial(MI&: *MI);
474 continue;
475 }
476
477 // Opcodes that also support S1.
478 if (Opc == G_FREEZE &&
479 MRI.getType(Reg: MI->getOperand(i: 0).getReg()) != LLT::scalar(SizeInBits: 1)) {
480 RBLHelper.applyMappingTrivial(MI&: *MI);
481 continue;
482 }
483
484 if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||
485 Opc == AMDGPU::G_IMPLICIT_DEF)) {
486 Register Dst = MI->getOperand(i: 0).getReg();
487 // Non S1 types are trivially accepted.
488 if (MRI.getType(Reg: Dst) != LLT::scalar(SizeInBits: 1)) {
489 assert(MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
490 continue;
491 }
492
493 // S1 rules are in RegBankLegalizeRules.
494 }
495
496 if (!RBLHelper.findRuleAndApplyMapping(MI&: *MI))
497 return false;
498 }
499
500 // Sgpr S1 clean up combines:
501 // - Sgpr S1(S32) to sgpr S1(S32) Copy: anyext + trunc combine.
502 // In RegBankLegalize 'S1 Dst' are legalized into S32 as
503 // 'S1Dst = Trunc S32Dst' and 'S1 Src' into 'S32Src = Anyext S1Src'.
504 // S1 Truncs and Anyexts that come from legalizer, that can have non-S32
505 // types e.g. S16 = Anyext S1 or S1 = Trunc S64, will also be cleaned up.
506 // - Sgpr S1(S32) to vcc Copy: G_AMDGPU_COPY_VCC_SCC combine.
507 // Divergent instruction uses sgpr S1 as input that should be lane mask(vcc)
508 // Legalizing this use creates sgpr S1(S32) to vcc Copy.
509
510 // Note: Remaining S1 copies, S1s are either sgpr S1(S32) or vcc S1:
511 // - Vcc to vcc Copy: nothing to do here, just a regular copy.
512 // - Vcc to sgpr S1 Copy: Should not exist in a form of COPY instruction(*).
513 // Note: For 'uniform-in-vcc to sgpr-S1 copy' G_AMDGPU_COPY_SCC_VCC is used
514 // instead. When only available instruction creates vcc result, use of
515 // UniformInVcc results in creating G_AMDGPU_COPY_SCC_VCC.
516
517 // (*)Explanation for 'sgpr S1(uniform) = COPY vcc(divergent)':
518 // Copy from divergent to uniform register indicates an error in either:
519 // - Uniformity analysis: Uniform instruction has divergent input. If one of
520 // the inputs is divergent, instruction should be divergent!
521 // - RegBankLegalizer not executing in waterfall loop (missing implementation)
522
523 AMDGPURegBankLegalizeCombiner Combiner(B, *ST.getRegisterInfo(), RBI);
524
525 for (MachineBasicBlock &MBB : MF) {
526 for (MachineInstr &MI : make_early_inc_range(Range&: MBB)) {
527 if (MI.getOpcode() == AMDGPU::COPY) {
528 Combiner.tryCombineCopy(MI);
529 continue;
530 }
531 if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
532 Combiner.tryCombineS1AnyExt(MI);
533 continue;
534 }
535 }
536 }
537
538 assert(!getAnySgprS1(MRI).isValid() &&
539 "Registers with sgpr reg bank and S1 LLT are not legal after "
540 "AMDGPURegBankLegalize. Should lower to sgpr S32");
541
542 return true;
543}
544