1//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the RegisterBankInfo class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64RegisterBankInfo.h"
15#include "AArch64RegisterInfo.h"
16#include "AArch64Subtarget.h"
17#include "MCTargetDesc/AArch64MCTargetDesc.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
21#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22#include "llvm/CodeGen/GlobalISel/Utils.h"
23#include "llvm/CodeGen/LowLevelTypeUtils.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineOperand.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterBank.h"
29#include "llvm/CodeGen/RegisterBankInfo.h"
30#include "llvm/CodeGen/TargetOpcodes.h"
31#include "llvm/CodeGen/TargetRegisterInfo.h"
32#include "llvm/CodeGen/TargetSubtargetInfo.h"
33#include "llvm/IR/IntrinsicsAArch64.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/Threading.h"
36#include <cassert>
37
38#define GET_TARGET_REGBANK_IMPL
39#include "AArch64GenRegisterBank.inc"
40
41// This file will be TableGen'ed at some point.
42#include "AArch64GenRegisterBankInfo.def"
43
44using namespace llvm;
45static const unsigned CustomMappingID = 1;
46
47AArch64RegisterBankInfo::AArch64RegisterBankInfo(
48 const TargetRegisterInfo &TRI) {
49 static llvm::once_flag InitializeRegisterBankFlag;
50
51 static auto InitializeRegisterBankOnce = [&]() {
52 // We have only one set of register banks, whatever the subtarget
53 // is. Therefore, the initialization of the RegBanks table should be
54 // done only once. Indeed the table of all register banks
55 // (AArch64::RegBanks) is unique in the compiler. At some point, it
56 // will get tablegen'ed and the whole constructor becomes empty.
57
58 const RegisterBank &RBGPR = getRegBank(ID: AArch64::GPRRegBankID);
59 (void)RBGPR;
60 assert(&AArch64::GPRRegBank == &RBGPR &&
61 "The order in RegBanks is messed up");
62
63 const RegisterBank &RBFPR = getRegBank(ID: AArch64::FPRRegBankID);
64 (void)RBFPR;
65 assert(&AArch64::FPRRegBank == &RBFPR &&
66 "The order in RegBanks is messed up");
67
68 const RegisterBank &RBCCR = getRegBank(ID: AArch64::CCRegBankID);
69 (void)RBCCR;
70 assert(&AArch64::CCRegBank == &RBCCR &&
71 "The order in RegBanks is messed up");
72
73 // The GPR register bank is fully defined by all the registers in
74 // GR64all + its subclasses.
75 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
76 "Subclass not added?");
77 assert(getMaximumSize(RBGPR.getID()) == 128 &&
78 "GPRs should hold up to 128-bit");
79
80 // The FPR register bank is fully defined by all the registers in
81 // GR64all + its subclasses.
82 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
83 "Subclass not added?");
84 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
85 "Subclass not added?");
86 assert(getMaximumSize(RBFPR.getID()) == 512 &&
87 "FPRs should hold up to 512-bit via QQQQ sequence");
88
89 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
90 "Class not added?");
91 assert(getMaximumSize(RBCCR.getID()) == 32 &&
92 "CCR should hold up to 32-bit");
93
94 // Check that the TableGen'ed like file is in sync we our expectations.
95 // First, the Idx.
96 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
97 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
98 "PartialMappingIdx's are incorrectly ordered");
99 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
100 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
101 PMI_FPR256, PMI_FPR512}) &&
102 "PartialMappingIdx's are incorrectly ordered");
103// Now, the content.
104// Check partial mapping.
105#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
106 do { \
107 assert( \
108 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
109 #Idx " is incorrectly initialized"); \
110 } while (false)
111
112 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
113 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
114 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
115 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
116 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
117 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
118 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
119 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
120 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
121
122// Check value mapping.
123#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
124 do { \
125 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
126 PartialMappingIdx::PMI_First##RBName, Size, \
127 Offset) && \
128 #RBName #Size " " #Offset " is incorrectly initialized"); \
129 } while (false)
130
131#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
132
133 CHECK_VALUEMAP(GPR, 32);
134 CHECK_VALUEMAP(GPR, 64);
135 CHECK_VALUEMAP(GPR, 128);
136 CHECK_VALUEMAP(FPR, 16);
137 CHECK_VALUEMAP(FPR, 32);
138 CHECK_VALUEMAP(FPR, 64);
139 CHECK_VALUEMAP(FPR, 128);
140 CHECK_VALUEMAP(FPR, 256);
141 CHECK_VALUEMAP(FPR, 512);
142
143// Check the value mapping for 3-operands instructions where all the operands
144// map to the same value mapping.
145#define CHECK_VALUEMAP_3OPS(RBName, Size) \
146 do { \
147 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
148 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
149 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
150 } while (false)
151
152 CHECK_VALUEMAP_3OPS(GPR, 32);
153 CHECK_VALUEMAP_3OPS(GPR, 64);
154 CHECK_VALUEMAP_3OPS(GPR, 128);
155 CHECK_VALUEMAP_3OPS(FPR, 32);
156 CHECK_VALUEMAP_3OPS(FPR, 64);
157 CHECK_VALUEMAP_3OPS(FPR, 128);
158 CHECK_VALUEMAP_3OPS(FPR, 256);
159 CHECK_VALUEMAP_3OPS(FPR, 512);
160
161#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
162 do { \
163 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
164 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
165 (void)PartialMapDstIdx; \
166 (void)PartialMapSrcIdx; \
167 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \
168 AArch64::RBNameSrc##RegBankID, \
169 TypeSize::getFixed(Size)); \
170 (void)Map; \
171 assert(Map[0].BreakDown == \
172 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
173 Map[0].NumBreakDowns == 1 && \
174 #RBNameDst #Size " Dst is incorrectly initialized"); \
175 assert(Map[1].BreakDown == \
176 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
177 Map[1].NumBreakDowns == 1 && \
178 #RBNameSrc #Size " Src is incorrectly initialized"); \
179 \
180 } while (false)
181
182 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
183 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
184 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
185 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
186 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
187 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
188 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
189 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
190
191#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
192 do { \
193 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
194 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
195 (void)PartialMapDstIdx; \
196 (void)PartialMapSrcIdx; \
197 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
198 (void)Map; \
199 assert(Map[0].BreakDown == \
200 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
201 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
202 " Dst is incorrectly initialized"); \
203 assert(Map[1].BreakDown == \
204 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
205 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
206 " Src is incorrectly initialized"); \
207 \
208 } while (false)
209
210 CHECK_VALUEMAP_FPEXT(32, 16);
211 CHECK_VALUEMAP_FPEXT(64, 16);
212 CHECK_VALUEMAP_FPEXT(64, 32);
213 CHECK_VALUEMAP_FPEXT(128, 64);
214
215 assert(verify(TRI) && "Invalid register bank information");
216 };
217
218 llvm::call_once(flag&: InitializeRegisterBankFlag, F&: InitializeRegisterBankOnce);
219}
220
221unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
222 const RegisterBank &B,
223 const TypeSize Size) const {
224 // What do we do with different size?
225 // copy are same size.
226 // Will introduce other hooks for different size:
227 // * extract cost.
228 // * build_sequence cost.
229
230 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
231 // FIXME: This should be deduced from the scheduling model.
232 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
233 // FMOVXDr or FMOVWSr.
234 return 5;
235 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
236 // FMOVDXr or FMOVSWr.
237 return 4;
238
239 return RegisterBankInfo::copyCost(A, B, Size);
240}
241
242const RegisterBank &
243AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
244 LLT Ty) const {
245 switch (RC.getID()) {
246 case AArch64::GPR64sponlyRegClassID:
247 return getRegBank(ID: AArch64::GPRRegBankID);
248 default:
249 return AArch64GenRegisterBankInfo::getRegBankFromRegClass(RC, Ty);
250 }
251}
252
253RegisterBankInfo::InstructionMappings
254AArch64RegisterBankInfo::getInstrAlternativeMappings(
255 const MachineInstr &MI) const {
256 const MachineFunction &MF = *MI.getParent()->getParent();
257 const TargetSubtargetInfo &STI = MF.getSubtarget();
258 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
259 const MachineRegisterInfo &MRI = MF.getRegInfo();
260
261 switch (MI.getOpcode()) {
262 case TargetOpcode::G_OR: {
263 // 32 and 64-bit or can be mapped on either FPR or
264 // GPR for the same cost.
265 TypeSize Size = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
266 if (Size != 32 && Size != 64)
267 break;
268
269 // If the instruction has any implicit-defs or uses,
270 // do not mess with it.
271 if (MI.getNumOperands() != 3)
272 break;
273 InstructionMappings AltMappings;
274 const InstructionMapping &GPRMapping = getInstructionMapping(
275 /*ID*/ 1, /*Cost*/ 1, OperandsMapping: getValueMapping(RBIdx: PMI_FirstGPR, Size),
276 /*NumOperands*/ 3);
277 const InstructionMapping &FPRMapping = getInstructionMapping(
278 /*ID*/ 2, /*Cost*/ 1, OperandsMapping: getValueMapping(RBIdx: PMI_FirstFPR, Size),
279 /*NumOperands*/ 3);
280
281 AltMappings.push_back(Elt: &GPRMapping);
282 AltMappings.push_back(Elt: &FPRMapping);
283 return AltMappings;
284 }
285 case TargetOpcode::G_BITCAST: {
286 TypeSize Size = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
287 if (Size != 32 && Size != 64)
288 break;
289
290 // If the instruction has any implicit-defs or uses,
291 // do not mess with it.
292 if (MI.getNumOperands() != 2)
293 break;
294
295 InstructionMappings AltMappings;
296 const InstructionMapping &GPRMapping = getInstructionMapping(
297 /*ID*/ 1, /*Cost*/ 1,
298 OperandsMapping: getCopyMapping(DstBankID: AArch64::GPRRegBankID, SrcBankID: AArch64::GPRRegBankID, Size),
299 /*NumOperands*/ 2);
300 const InstructionMapping &FPRMapping = getInstructionMapping(
301 /*ID*/ 2, /*Cost*/ 1,
302 OperandsMapping: getCopyMapping(DstBankID: AArch64::FPRRegBankID, SrcBankID: AArch64::FPRRegBankID, Size),
303 /*NumOperands*/ 2);
304 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
305 /*ID*/ 3,
306 /*Cost*/
307 copyCost(A: AArch64::GPRRegBank, B: AArch64::FPRRegBank,
308 Size: TypeSize::getFixed(ExactSize: Size)),
309 OperandsMapping: getCopyMapping(DstBankID: AArch64::FPRRegBankID, SrcBankID: AArch64::GPRRegBankID, Size),
310 /*NumOperands*/ 2);
311 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
312 /*ID*/ 3,
313 /*Cost*/
314 copyCost(A: AArch64::GPRRegBank, B: AArch64::FPRRegBank,
315 Size: TypeSize::getFixed(ExactSize: Size)),
316 OperandsMapping: getCopyMapping(DstBankID: AArch64::GPRRegBankID, SrcBankID: AArch64::FPRRegBankID, Size),
317 /*NumOperands*/ 2);
318
319 AltMappings.push_back(Elt: &GPRMapping);
320 AltMappings.push_back(Elt: &FPRMapping);
321 AltMappings.push_back(Elt: &GPRToFPRMapping);
322 AltMappings.push_back(Elt: &FPRToGPRMapping);
323 return AltMappings;
324 }
325 case TargetOpcode::G_LOAD: {
326 TypeSize Size = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
327 if (Size != 64)
328 break;
329
330 // If the instruction has any implicit-defs or uses,
331 // do not mess with it.
332 if (MI.getNumOperands() != 2)
333 break;
334
335 InstructionMappings AltMappings;
336 const InstructionMapping &GPRMapping = getInstructionMapping(
337 /*ID*/ 1, /*Cost*/ 1,
338 OperandsMapping: getOperandsMapping(
339 OpdsMapping: {getValueMapping(RBIdx: PMI_FirstGPR, Size),
340 // Addresses are GPR 64-bit.
341 getValueMapping(RBIdx: PMI_FirstGPR, Size: TypeSize::getFixed(ExactSize: 64))}),
342 /*NumOperands*/ 2);
343 const InstructionMapping &FPRMapping = getInstructionMapping(
344 /*ID*/ 2, /*Cost*/ 1,
345 OperandsMapping: getOperandsMapping(
346 OpdsMapping: {getValueMapping(RBIdx: PMI_FirstFPR, Size),
347 // Addresses are GPR 64-bit.
348 getValueMapping(RBIdx: PMI_FirstGPR, Size: TypeSize::getFixed(ExactSize: 64))}),
349 /*NumOperands*/ 2);
350
351 AltMappings.push_back(Elt: &GPRMapping);
352 AltMappings.push_back(Elt: &FPRMapping);
353 return AltMappings;
354 }
355 default:
356 break;
357 }
358 return RegisterBankInfo::getInstrAlternativeMappings(MI);
359}
360
361void AArch64RegisterBankInfo::applyMappingImpl(
362 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
363 MachineInstr &MI = OpdMapper.getMI();
364 MachineRegisterInfo &MRI = OpdMapper.getMRI();
365
366 switch (MI.getOpcode()) {
367 case TargetOpcode::G_STORE: {
368 Register Dst = MI.getOperand(i: 0).getReg();
369 LLT Ty = MRI.getType(Reg: Dst);
370 if (MRI.getRegBank(Reg: Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
371 Ty.getSizeInBits() < 32) {
372 Builder.setInsertPt(MBB&: *MI.getParent(), II: MI.getIterator());
373 auto Ext = Builder.buildAnyExt(Res: LLT::scalar(SizeInBits: 32), Op: Dst);
374 MI.getOperand(i: 0).setReg(Ext.getReg(Idx: 0));
375 MRI.setRegBank(Reg: Ext.getReg(Idx: 0), RegBank: AArch64::GPRRegBank);
376 }
377 return applyDefaultMapping(OpdMapper);
378 }
379 case TargetOpcode::G_LOAD: {
380 Register Dst = MI.getOperand(i: 0).getReg();
381 LLT Ty = MRI.getType(Reg: Dst);
382 if (MRI.getRegBank(Reg: Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
383 Ty.getSizeInBits() < 32) {
384 Builder.setInsertPt(MBB&: *MI.getParent(), II: std::next(x: MI.getIterator()));
385 Register ExtReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 32));
386 Builder.buildTrunc(Res: Dst, Op: ExtReg);
387 MI.getOperand(i: 0).setReg(ExtReg);
388 MRI.setRegBank(Reg: ExtReg, RegBank: AArch64::GPRRegBank);
389 }
390 [[fallthrough]];
391 }
392 case TargetOpcode::G_OR:
393 case TargetOpcode::G_BITCAST:
394 // Those ID must match getInstrAlternativeMappings.
395 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
396 OpdMapper.getInstrMapping().getID() <= 4) &&
397 "Don't know how to handle that ID");
398 return applyDefaultMapping(OpdMapper);
399 case TargetOpcode::G_INSERT_VECTOR_ELT: {
400 // Extend smaller gpr operands to 32 bit.
401 Builder.setInsertPt(MBB&: *MI.getParent(), II: MI.getIterator());
402 auto Ext = Builder.buildAnyExt(Res: LLT::scalar(SizeInBits: 32), Op: MI.getOperand(i: 2).getReg());
403 MRI.setRegBank(Reg: Ext.getReg(Idx: 0), RegBank: getRegBank(ID: AArch64::GPRRegBankID));
404 MI.getOperand(i: 2).setReg(Ext.getReg(Idx: 0));
405 return applyDefaultMapping(OpdMapper);
406 }
407 case AArch64::G_DUP: {
408 // Extend smaller gpr to 32-bits
409 assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
410 "Expected sources smaller than 32-bits");
411 Builder.setInsertPt(MBB&: *MI.getParent(), II: MI.getIterator());
412
413 Register ConstReg;
414 auto ConstMI = MRI.getVRegDef(Reg: MI.getOperand(i: 1).getReg());
415 if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
416 auto CstVal = ConstMI->getOperand(i: 1).getCImm()->getValue();
417 ConstReg =
418 Builder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: CstVal.sext(width: 32)).getReg(Idx: 0);
419 } else {
420 ConstReg = Builder.buildAnyExt(Res: LLT::scalar(SizeInBits: 32), Op: MI.getOperand(i: 1).getReg())
421 .getReg(Idx: 0);
422 }
423 MRI.setRegBank(Reg: ConstReg, RegBank: getRegBank(ID: AArch64::GPRRegBankID));
424 MI.getOperand(i: 1).setReg(ConstReg);
425 return applyDefaultMapping(OpdMapper);
426 }
427 default:
428 llvm_unreachable("Don't know how to handle that operation");
429 }
430}
431
432const RegisterBankInfo::InstructionMapping &
433AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
434 const MachineInstr &MI) const {
435 const unsigned Opc = MI.getOpcode();
436 const MachineFunction &MF = *MI.getParent()->getParent();
437 const MachineRegisterInfo &MRI = MF.getRegInfo();
438
439 unsigned NumOperands = MI.getNumOperands();
440 assert(NumOperands <= 3 &&
441 "This code is for instructions with 3 or less operands");
442
443 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
444 TypeSize Size = Ty.getSizeInBits();
445 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
446
447 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
448
449#ifndef NDEBUG
450 // Make sure all the operands are using similar size and type.
451 // Should probably be checked by the machine verifier.
452 // This code won't catch cases where the number of lanes is
453 // different between the operands.
454 // If we want to go to that level of details, it is probably
455 // best to check that the types are the same, period.
456 // Currently, we just check that the register banks are the same
457 // for each types.
458 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
459 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
460 assert(
461 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
462 RBIdx, OpTy.getSizeInBits()) ==
463 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
464 "Operand has incompatible size");
465 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
466 (void)OpIsFPR;
467 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
468 }
469#endif // End NDEBUG.
470
471 return getInstructionMapping(ID: DefaultMappingID, Cost: 1,
472 OperandsMapping: getValueMapping(RBIdx, Size), NumOperands);
473}
474
475/// \returns true if a given intrinsic only uses and defines FPRs.
476static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
477 const MachineInstr &MI) {
478 // TODO: Add more intrinsics.
479 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
480 default:
481 return false;
482 case Intrinsic::aarch64_neon_uaddlv:
483 case Intrinsic::aarch64_neon_uaddv:
484 case Intrinsic::aarch64_neon_saddv:
485 case Intrinsic::aarch64_neon_umaxv:
486 case Intrinsic::aarch64_neon_smaxv:
487 case Intrinsic::aarch64_neon_uminv:
488 case Intrinsic::aarch64_neon_sminv:
489 case Intrinsic::aarch64_neon_faddv:
490 case Intrinsic::aarch64_neon_fmaxv:
491 case Intrinsic::aarch64_neon_fminv:
492 case Intrinsic::aarch64_neon_fmaxnmv:
493 case Intrinsic::aarch64_neon_fminnmv:
494 case Intrinsic::aarch64_neon_fmulx:
495 case Intrinsic::aarch64_neon_frecpe:
496 case Intrinsic::aarch64_neon_frecps:
497 case Intrinsic::aarch64_neon_frecpx:
498 case Intrinsic::aarch64_neon_frsqrte:
499 case Intrinsic::aarch64_neon_frsqrts:
500 case Intrinsic::aarch64_neon_facge:
501 case Intrinsic::aarch64_neon_facgt:
502 case Intrinsic::aarch64_neon_fabd:
503 case Intrinsic::aarch64_neon_sqrdmlah:
504 case Intrinsic::aarch64_neon_sqrdmlsh:
505 case Intrinsic::aarch64_neon_sqrdmulh:
506 case Intrinsic::aarch64_neon_sqadd:
507 case Intrinsic::aarch64_neon_sqsub:
508 case Intrinsic::aarch64_neon_srshl:
509 case Intrinsic::aarch64_neon_urshl:
510 case Intrinsic::aarch64_neon_sqshl:
511 case Intrinsic::aarch64_neon_uqshl:
512 case Intrinsic::aarch64_neon_sqrshl:
513 case Intrinsic::aarch64_neon_uqrshl:
514 case Intrinsic::aarch64_neon_ushl:
515 case Intrinsic::aarch64_neon_sshl:
516 case Intrinsic::aarch64_neon_sqshrn:
517 case Intrinsic::aarch64_neon_sqshrun:
518 case Intrinsic::aarch64_neon_sqrshrn:
519 case Intrinsic::aarch64_neon_sqrshrun:
520 case Intrinsic::aarch64_neon_uqshrn:
521 case Intrinsic::aarch64_neon_uqrshrn:
522 case Intrinsic::aarch64_crypto_sha1h:
523 case Intrinsic::aarch64_crypto_sha1c:
524 case Intrinsic::aarch64_crypto_sha1p:
525 case Intrinsic::aarch64_crypto_sha1m:
526 case Intrinsic::aarch64_sisd_fcvtxn:
527 case Intrinsic::aarch64_sisd_fabd:
528 return true;
529 case Intrinsic::aarch64_neon_saddlv: {
530 const LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
531 return SrcTy.getElementType().getSizeInBits() >= 16 &&
532 SrcTy.getElementCount().getFixedValue() >= 4;
533 }
534 }
535}
536
537bool AArch64RegisterBankInfo::isPHIWithFPConstraints(
538 const MachineInstr &MI, const MachineRegisterInfo &MRI,
539 const AArch64RegisterInfo &TRI, const unsigned Depth) const {
540 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
541 return false;
542
543 return any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
544 P: [&](const MachineInstr &UseMI) {
545 if (onlyUsesFP(MI: UseMI, MRI, TRI, Depth: Depth + 1))
546 return true;
547 return isPHIWithFPConstraints(MI: UseMI, MRI, TRI, Depth: Depth + 1);
548 });
549}
550
551bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
552 const MachineRegisterInfo &MRI,
553 const AArch64RegisterInfo &TRI,
554 unsigned Depth) const {
555 unsigned Op = MI.getOpcode();
556 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
557 return true;
558
559 // Do we have an explicit floating point instruction?
560 if (isPreISelGenericFloatingPointOpcode(Opc: Op))
561 return true;
562
563 // No. Check if we have a copy-like instruction. If we do, then we could
564 // still be fed by floating point instructions.
565 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
566 !isPreISelGenericOptimizationHint(Opcode: Op))
567 return false;
568
569 // Check if we already know the register bank.
570 auto *RB = getRegBank(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
571 if (RB == &AArch64::FPRRegBank)
572 return true;
573 if (RB == &AArch64::GPRRegBank)
574 return false;
575
576 // We don't know anything.
577 //
578 // If we have a phi, we may be able to infer that it will be assigned a FPR
579 // based off of its inputs.
580 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
581 return false;
582
583 return any_of(Range: MI.explicit_uses(), P: [&](const MachineOperand &Op) {
584 return Op.isReg() &&
585 onlyDefinesFP(MI: *MRI.getVRegDef(Reg: Op.getReg()), MRI, TRI, Depth: Depth + 1);
586 });
587}
588
589bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
590 const MachineRegisterInfo &MRI,
591 const AArch64RegisterInfo &TRI,
592 unsigned Depth) const {
593 switch (MI.getOpcode()) {
594 case TargetOpcode::G_FPTOSI:
595 case TargetOpcode::G_FPTOUI:
596 case TargetOpcode::G_FPTOSI_SAT:
597 case TargetOpcode::G_FPTOUI_SAT:
598 case TargetOpcode::G_FCMP:
599 case TargetOpcode::G_LROUND:
600 case TargetOpcode::G_LLROUND:
601 case AArch64::G_PMULL:
602 case AArch64::G_SLI:
603 case AArch64::G_SRI:
604 return true;
605 case TargetOpcode::G_INTRINSIC:
606 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
607 case Intrinsic::aarch64_neon_fcvtas:
608 case Intrinsic::aarch64_neon_fcvtau:
609 case Intrinsic::aarch64_neon_fcvtzs:
610 case Intrinsic::aarch64_neon_fcvtzu:
611 case Intrinsic::aarch64_neon_fcvtms:
612 case Intrinsic::aarch64_neon_fcvtmu:
613 case Intrinsic::aarch64_neon_fcvtns:
614 case Intrinsic::aarch64_neon_fcvtnu:
615 case Intrinsic::aarch64_neon_fcvtps:
616 case Intrinsic::aarch64_neon_fcvtpu:
617 return true;
618 default:
619 break;
620 }
621 break;
622 default:
623 break;
624 }
625 return hasFPConstraints(MI, MRI, TRI, Depth);
626}
627
628bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
629 const MachineRegisterInfo &MRI,
630 const AArch64RegisterInfo &TRI,
631 unsigned Depth) const {
632 switch (MI.getOpcode()) {
633 case AArch64::G_DUP:
634 case AArch64::G_SADDLP:
635 case AArch64::G_UADDLP:
636 case TargetOpcode::G_SITOFP:
637 case TargetOpcode::G_UITOFP:
638 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
639 case TargetOpcode::G_INSERT_VECTOR_ELT:
640 case TargetOpcode::G_BUILD_VECTOR:
641 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
642 case AArch64::G_SLI:
643 case AArch64::G_SRI:
644 return true;
645 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
646 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
647 case Intrinsic::aarch64_neon_ld1x2:
648 case Intrinsic::aarch64_neon_ld1x3:
649 case Intrinsic::aarch64_neon_ld1x4:
650 case Intrinsic::aarch64_neon_ld2:
651 case Intrinsic::aarch64_neon_ld2lane:
652 case Intrinsic::aarch64_neon_ld2r:
653 case Intrinsic::aarch64_neon_ld3:
654 case Intrinsic::aarch64_neon_ld3lane:
655 case Intrinsic::aarch64_neon_ld3r:
656 case Intrinsic::aarch64_neon_ld4:
657 case Intrinsic::aarch64_neon_ld4lane:
658 case Intrinsic::aarch64_neon_ld4r:
659 return true;
660 default:
661 break;
662 }
663 break;
664 default:
665 break;
666 }
667 return hasFPConstraints(MI, MRI, TRI, Depth);
668}
669
670bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
671 const MachineRegisterInfo &MRI,
672 const AArch64RegisterInfo &TRI,
673 unsigned Depth) const {
674 switch (MI.getOpcode()) {
675 case TargetOpcode::G_SITOFP:
676 case TargetOpcode::G_UITOFP:
677 return MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() ==
678 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
679 }
680 return onlyDefinesFP(MI, MRI, TRI, Depth);
681}
682
683bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
684 // GMemOperation because we also want to match indexed loads.
685 auto *MemOp = cast<GMemOperation>(Val: &MI);
686 const Value *LdVal = MemOp->getMMO().getValue();
687 if (!LdVal)
688 return false;
689
690 Type *EltTy = nullptr;
691 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: LdVal)) {
692 EltTy = GV->getValueType();
693 // Look at the first element of the struct to determine the type we are
694 // loading
695 while (StructType *StructEltTy = dyn_cast<StructType>(Val: EltTy)) {
696 if (StructEltTy->getNumElements() == 0)
697 break;
698 EltTy = StructEltTy->getTypeAtIndex(N: 0U);
699 }
700 // Look at the first element of the array to determine its type
701 if (isa<ArrayType>(Val: EltTy))
702 EltTy = EltTy->getArrayElementType();
703 } else if (!isa<Constant>(Val: LdVal)) {
704 // FIXME: grubbing around uses is pretty ugly, but with no more
705 // `getPointerElementType` there's not much else we can do.
706 for (const auto *LdUser : LdVal->users()) {
707 if (isa<LoadInst>(Val: LdUser)) {
708 EltTy = LdUser->getType();
709 break;
710 }
711 if (isa<StoreInst>(Val: LdUser) && LdUser->getOperand(i: 1) == LdVal) {
712 EltTy = LdUser->getOperand(i: 0)->getType();
713 break;
714 }
715 }
716 }
717 return EltTy && EltTy->isFPOrFPVectorTy();
718}
719
720const RegisterBankInfo::InstructionMapping &
721AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
722 const unsigned Opc = MI.getOpcode();
723
724 // Try the default logic for non-generic instructions that are either copies
725 // or already have some operands assigned to banks.
726 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opcode: Opc)) ||
727 Opc == TargetOpcode::G_PHI) {
728 const RegisterBankInfo::InstructionMapping &Mapping =
729 getInstrMappingImpl(MI);
730 if (Mapping.isValid())
731 return Mapping;
732 }
733
734 const MachineFunction &MF = *MI.getParent()->getParent();
735 const MachineRegisterInfo &MRI = MF.getRegInfo();
736 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
737 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
738
739 switch (Opc) {
740 // G_{F|S|U}REM are not listed because they are not legal.
741 // Arithmetic ops.
742 case TargetOpcode::G_ADD:
743 case TargetOpcode::G_SUB:
744 case TargetOpcode::G_PTR_ADD:
745 case TargetOpcode::G_MUL:
746 case TargetOpcode::G_SDIV:
747 case TargetOpcode::G_UDIV:
748 // Bitwise ops.
749 case TargetOpcode::G_AND:
750 case TargetOpcode::G_OR:
751 case TargetOpcode::G_XOR:
752 // Floating point ops.
753 case TargetOpcode::G_FADD:
754 case TargetOpcode::G_FSUB:
755 case TargetOpcode::G_FMUL:
756 case TargetOpcode::G_FDIV:
757 case TargetOpcode::G_FMAXIMUM:
758 case TargetOpcode::G_FMINIMUM:
759 return getSameKindOfOperandsMapping(MI);
760 case TargetOpcode::G_FPEXT: {
761 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
762 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
763 return getInstructionMapping(
764 ID: DefaultMappingID, /*Cost*/ 1,
765 OperandsMapping: getFPExtMapping(DstSize: DstTy.getSizeInBits(), SrcSize: SrcTy.getSizeInBits()),
766 /*NumOperands*/ 2);
767 }
768 // Shifts.
769 case TargetOpcode::G_SHL:
770 case TargetOpcode::G_LSHR:
771 case TargetOpcode::G_ASHR: {
772 LLT ShiftAmtTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
773 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
774 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
775 return getInstructionMapping(ID: DefaultMappingID, Cost: 1,
776 OperandsMapping: &ValMappings[Shift64Imm], NumOperands: 3);
777 return getSameKindOfOperandsMapping(MI);
778 }
779 case TargetOpcode::COPY: {
780 Register DstReg = MI.getOperand(i: 0).getReg();
781 Register SrcReg = MI.getOperand(i: 1).getReg();
782 // Check if one of the register is not a generic register.
783 if ((DstReg.isPhysical() || !MRI.getType(Reg: DstReg).isValid()) ||
784 (SrcReg.isPhysical() || !MRI.getType(Reg: SrcReg).isValid())) {
785 const RegisterBank *DstRB = getRegBank(Reg: DstReg, MRI, TRI);
786 const RegisterBank *SrcRB = getRegBank(Reg: SrcReg, MRI, TRI);
787 if (!DstRB)
788 DstRB = SrcRB;
789 else if (!SrcRB)
790 SrcRB = DstRB;
791 // If both RB are null that means both registers are generic.
792 // We shouldn't be here.
793 assert(DstRB && SrcRB && "Both RegBank were nullptr");
794 TypeSize Size = getSizeInBits(Reg: DstReg, MRI, TRI);
795 return getInstructionMapping(
796 ID: DefaultMappingID, Cost: copyCost(A: *DstRB, B: *SrcRB, Size),
797 OperandsMapping: getCopyMapping(DstBankID: DstRB->getID(), SrcBankID: SrcRB->getID(), Size),
798 // We only care about the mapping of the destination.
799 /*NumOperands*/ 1);
800 }
801 // Both registers are generic, use G_BITCAST.
802 [[fallthrough]];
803 }
804 case TargetOpcode::G_BITCAST: {
805 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
806 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
807 TypeSize Size = DstTy.getSizeInBits();
808 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
809 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
810 const RegisterBank &DstRB =
811 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
812 const RegisterBank &SrcRB =
813 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
814 return getInstructionMapping(
815 ID: DefaultMappingID, Cost: copyCost(A: DstRB, B: SrcRB, Size),
816 OperandsMapping: getCopyMapping(DstBankID: DstRB.getID(), SrcBankID: SrcRB.getID(), Size),
817 // We only care about the mapping of the destination for COPY.
818 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
819 }
820 default:
821 break;
822 }
823
824 unsigned NumOperands = MI.getNumOperands();
825 unsigned MappingID = DefaultMappingID;
826
827 // Track the size and bank of each register. We don't do partial mappings.
828 SmallVector<unsigned, 4> OpSize(NumOperands);
829 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
830 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
831 auto &MO = MI.getOperand(i: Idx);
832 if (!MO.isReg() || !MO.getReg())
833 continue;
834
835 LLT Ty = MRI.getType(Reg: MO.getReg());
836 if (!Ty.isValid())
837 continue;
838 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
839
840 // As a top-level guess, vectors including both scalable and non-scalable
841 // ones go in FPRs, scalars and pointers in GPRs.
842 // For floating-point instructions, scalars go in FPRs.
843 if (Ty.isVector())
844 OpRegBankIdx[Idx] = PMI_FirstFPR;
845 else if (isPreISelGenericFloatingPointOpcode(Opc) ||
846 (MO.isDef() && onlyDefinesFP(MI, MRI, TRI)) ||
847 (MO.isUse() && onlyUsesFP(MI, MRI, TRI)) ||
848 Ty.getSizeInBits() > 64)
849 OpRegBankIdx[Idx] = PMI_FirstFPR;
850 else
851 OpRegBankIdx[Idx] = PMI_FirstGPR;
852 }
853
854 unsigned Cost = 1;
855 // Some of the floating-point instructions have mixed GPR and FPR operands:
856 // fine-tune the computed mapping.
857 switch (Opc) {
858 case AArch64::G_DUP: {
859 Register ScalarReg = MI.getOperand(i: 1).getReg();
860 LLT ScalarTy = MRI.getType(Reg: ScalarReg);
861 auto ScalarDef = MRI.getVRegDef(Reg: ScalarReg);
862 // We want to select dup(load) into LD1R.
863 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
864 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
865 // s8 is an exception for G_DUP, which we always want on gpr.
866 else if (ScalarTy.getSizeInBits() != 8 &&
867 (getRegBank(Reg: ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
868 onlyDefinesFP(MI: *ScalarDef, MRI, TRI)))
869 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
870 else {
871 if (ScalarTy.getSizeInBits() < 32 &&
872 getRegBank(Reg: ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
873 // Calls applyMappingImpl()
874 MappingID = CustomMappingID;
875 }
876 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
877 }
878 break;
879 }
880 case TargetOpcode::G_TRUNC: {
881 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
882 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
883 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
884 break;
885 }
886 case TargetOpcode::G_SITOFP:
887 case TargetOpcode::G_UITOFP: {
888 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
889 break;
890 // Integer to FP conversions don't necessarily happen between GPR -> FPR
891 // regbanks. They can also be done within an FPR register.
892 Register SrcReg = MI.getOperand(i: 1).getReg();
893 if (getRegBank(Reg: SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
894 MRI.getType(Reg: SrcReg).getSizeInBits() ==
895 MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits())
896 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
897 else
898 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
899 break;
900 }
901 case TargetOpcode::G_FPTOSI_SAT:
902 case TargetOpcode::G_FPTOUI_SAT:
903 case TargetOpcode::G_FPTOSI:
904 case TargetOpcode::G_FPTOUI:
905 case TargetOpcode::G_INTRINSIC_LRINT:
906 case TargetOpcode::G_INTRINSIC_LLRINT:
907 case TargetOpcode::G_LROUND:
908 case TargetOpcode::G_LLROUND: {
909 LLT DstType = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
910 if (DstType.isVector())
911 break;
912 if (DstType == LLT::scalar(SizeInBits: 16)) {
913 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
914 break;
915 }
916 TypeSize DstSize = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
917 TypeSize SrcSize = getSizeInBits(Reg: MI.getOperand(i: 1).getReg(), MRI, TRI);
918 if (((DstSize == SrcSize) || STI.hasFeature(Feature: AArch64::FeatureFPRCVT)) &&
919 all_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
920 P: [&](const MachineInstr &UseMI) {
921 return onlyUsesFP(MI: UseMI, MRI, TRI) ||
922 prefersFPUse(MI: UseMI, MRI, TRI);
923 }))
924 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
925 else
926 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
927 break;
928 }
929 case TargetOpcode::G_FCMP: {
930 // If the result is a vector, it must use a FPR.
931 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
932 MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector() ? PMI_FirstFPR
933 : PMI_FirstGPR;
934 OpRegBankIdx = {Idx0,
935 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
936 break;
937 }
938 case TargetOpcode::G_BITCAST:
939 // This is going to be a cross register bank copy and this is expensive.
940 if (OpRegBankIdx[0] != OpRegBankIdx[1])
941 Cost = copyCost(
942 A: *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
943 B: *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
944 Size: TypeSize::getFixed(ExactSize: OpSize[0]));
945 break;
946 case TargetOpcode::G_LOAD: {
947 // Loading in vector unit is slightly more expensive.
948 // This is actually only true for the LD1R and co instructions,
949 // but anyway for the fast mode this number does not matter and
950 // for the greedy mode the cost of the cross bank copy will
951 // offset this number.
952 // FIXME: Should be derived from the scheduling model.
953 if (OpRegBankIdx[0] != PMI_FirstGPR) {
954 Cost = 2;
955 break;
956 }
957
958 if (cast<GLoad>(Val: MI).isAtomic()) {
959 // Atomics always use GPR destinations. Don't refine any further.
960 OpRegBankIdx[0] = PMI_FirstGPR;
961 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() < 32)
962 MappingID = CustomMappingID;
963 break;
964 }
965
966 // Try to guess the type of the load from the MMO.
967 if (isLoadFromFPType(MI)) {
968 OpRegBankIdx[0] = PMI_FirstFPR;
969 break;
970 }
971
972 // Check if that load feeds fp instructions.
973 // In that case, we want the default mapping to be on FPR
974 // instead of blind map every scalar to GPR.
975 if (any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
976 P: [&](const MachineInstr &UseMI) {
977 // If we have at least one direct or indirect use
978 // in a FP instruction,
979 // assume this was a floating point load in the IR. If it was
980 // not, we would have had a bitcast before reaching that
981 // instruction.
982 //
983 // Int->FP conversion operations are also captured in
984 // prefersFPUse().
985
986 if (isPHIWithFPConstraints(MI: UseMI, MRI, TRI))
987 return true;
988
989 return onlyUsesFP(MI: UseMI, MRI, TRI) ||
990 prefersFPUse(MI: UseMI, MRI, TRI);
991 }))
992 OpRegBankIdx[0] = PMI_FirstFPR;
993
994 // On GPR, extend any load < 32bits to 32bit.
995 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
996 if (Ty.isScalar() && Ty.getSizeInBits() < 32)
997 MappingID = CustomMappingID;
998 break;
999 }
1000 case TargetOpcode::G_STORE:
1001 // Check if that store is fed by fp instructions.
1002 if (OpRegBankIdx[0] == PMI_FirstGPR) {
1003 Register VReg = MI.getOperand(i: 0).getReg();
1004 if (VReg) {
1005 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1006 if (onlyDefinesFP(MI: *DefMI, MRI, TRI)) {
1007 OpRegBankIdx[0] = PMI_FirstFPR;
1008 break;
1009 }
1010 }
1011
1012 // On GPR, extend any store < 32bits to 32bit.
1013 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1014 if (Ty.isScalar() && Ty.getSizeInBits() < 32)
1015 MappingID = CustomMappingID;
1016 }
1017 break;
1018 case TargetOpcode::G_INDEXED_STORE:
1019 if (OpRegBankIdx[1] == PMI_FirstGPR) {
1020 Register VReg = MI.getOperand(i: 1).getReg();
1021 if (!VReg)
1022 break;
1023 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1024 if (onlyDefinesFP(MI: *DefMI, MRI, TRI))
1025 OpRegBankIdx[1] = PMI_FirstFPR;
1026 break;
1027 }
1028 break;
1029 case TargetOpcode::G_INDEXED_SEXTLOAD:
1030 case TargetOpcode::G_INDEXED_ZEXTLOAD:
1031 // These should always be GPR.
1032 OpRegBankIdx[0] = PMI_FirstGPR;
1033 break;
1034 case TargetOpcode::G_INDEXED_LOAD: {
1035 if (isLoadFromFPType(MI))
1036 OpRegBankIdx[0] = PMI_FirstFPR;
1037 break;
1038 }
1039 case TargetOpcode::G_SELECT: {
1040 // If the destination is FPR, preserve that.
1041 if (OpRegBankIdx[0] != PMI_FirstGPR)
1042 break;
1043
1044 // If we're taking in vectors, we have no choice but to put everything on
1045 // FPRs, except for the condition. The condition must always be on a GPR.
1046 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
1047 if (SrcTy.isVector()) {
1048 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
1049 break;
1050 }
1051
1052 // Try to minimize the number of copies. If we have more floating point
1053 // constrained values than not, then we'll put everything on FPR. Otherwise,
1054 // everything has to be on GPR.
1055 unsigned NumFP = 0;
1056
1057 // Check if the uses of the result always produce floating point values.
1058 //
1059 // For example:
1060 //
1061 // %z = G_SELECT %cond %x %y
1062 // fpr = G_FOO %z ...
1063 if (any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1064 P: [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
1065 ++NumFP;
1066
1067 // Check if the defs of the source values always produce floating point
1068 // values.
1069 //
1070 // For example:
1071 //
1072 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
1073 // %z = G_SELECT %cond %x %y
1074 //
1075 // Also check whether or not the sources have already been decided to be
1076 // FPR. Keep track of this.
1077 //
1078 // This doesn't check the condition, since it's just whatever is in NZCV.
1079 // This isn't passed explicitly in a register to fcsel/csel.
1080 for (unsigned Idx = 2; Idx < 4; ++Idx) {
1081 Register VReg = MI.getOperand(i: Idx).getReg();
1082 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1083 if (getRegBank(Reg: VReg, MRI, TRI) == &AArch64::FPRRegBank ||
1084 onlyDefinesFP(MI: *DefMI, MRI, TRI))
1085 ++NumFP;
1086 }
1087
1088 // If we have more FP constraints than not, then move everything over to
1089 // FPR.
1090 if (NumFP >= 2)
1091 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
1092
1093 break;
1094 }
1095 case TargetOpcode::G_UNMERGE_VALUES: {
1096 // If the first operand belongs to a FPR register bank, then make sure that
1097 // we preserve that.
1098 if (OpRegBankIdx[0] != PMI_FirstGPR)
1099 break;
1100
1101 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: MI.getNumOperands()-1).getReg());
1102 // UNMERGE into scalars from a vector should always use FPR.
1103 // Likewise if any of the uses are FP instructions.
1104 if (SrcTy.isVector() || SrcTy == LLT::scalar(SizeInBits: 128) ||
1105 any_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1106 P: [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
1107 // Set the register bank of every operand to FPR.
1108 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
1109 Idx < NumOperands; ++Idx)
1110 OpRegBankIdx[Idx] = PMI_FirstFPR;
1111 }
1112 break;
1113 }
1114 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1115 // Destination and source need to be FPRs.
1116 OpRegBankIdx[0] = PMI_FirstFPR;
1117 OpRegBankIdx[1] = PMI_FirstFPR;
1118
1119 // Index needs to be a GPR.
1120 OpRegBankIdx[2] = PMI_FirstGPR;
1121 break;
1122 case AArch64::G_SQSHLU_I:
1123 // Destination and source need to be FPRs.
1124 OpRegBankIdx[0] = PMI_FirstFPR;
1125 OpRegBankIdx[1] = PMI_FirstFPR;
1126
1127 // Shift Index needs to be a GPR.
1128 OpRegBankIdx[2] = PMI_FirstGPR;
1129 break;
1130
1131 case TargetOpcode::G_INSERT_VECTOR_ELT:
1132 OpRegBankIdx[0] = PMI_FirstFPR;
1133 OpRegBankIdx[1] = PMI_FirstFPR;
1134
1135 // The element may be either a GPR or FPR. Preserve that behaviour.
1136 if (getRegBank(Reg: MI.getOperand(i: 2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1137 OpRegBankIdx[2] = PMI_FirstFPR;
1138 else {
1139 // If the type is i8/i16, and the regank will be GPR, then we change the
1140 // type to i32 in applyMappingImpl.
1141 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
1142 if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
1143 // Calls applyMappingImpl()
1144 MappingID = CustomMappingID;
1145 }
1146 OpRegBankIdx[2] = PMI_FirstGPR;
1147 }
1148
1149 // Index needs to be a GPR.
1150 OpRegBankIdx[3] = PMI_FirstGPR;
1151 break;
1152 case TargetOpcode::G_EXTRACT: {
1153 // For s128 sources we have to use fpr unless we know otherwise.
1154 auto Src = MI.getOperand(i: 1).getReg();
1155 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
1156 if (SrcTy.getSizeInBits() != 128)
1157 break;
1158 auto Idx = MRI.getRegClassOrNull(Reg: Src) == &AArch64::XSeqPairsClassRegClass
1159 ? PMI_FirstGPR
1160 : PMI_FirstFPR;
1161 OpRegBankIdx[0] = Idx;
1162 OpRegBankIdx[1] = Idx;
1163 break;
1164 }
1165 case TargetOpcode::G_BUILD_VECTOR: {
1166 // If the first source operand belongs to a FPR register bank, then make
1167 // sure that we preserve that.
1168 if (OpRegBankIdx[1] != PMI_FirstGPR)
1169 break;
1170 Register VReg = MI.getOperand(i: 1).getReg();
1171 if (!VReg)
1172 break;
1173
1174 // Get the instruction that defined the source operand reg, and check if
1175 // it's a floating point operation. Or, if it's a type like s16 which
1176 // doesn't have a exact size gpr register class. The exception is if the
1177 // build_vector has all constant operands, which may be better to leave as
1178 // gpr without copies, so it can be matched in imported patterns.
1179 MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
1180 unsigned DefOpc = DefMI->getOpcode();
1181 const LLT SrcTy = MRI.getType(Reg: VReg);
1182 if (all_of(Range: MI.operands(), P: [&](const MachineOperand &Op) {
1183 return Op.isDef() || MRI.getVRegDef(Reg: Op.getReg())->getOpcode() ==
1184 TargetOpcode::G_CONSTANT;
1185 }))
1186 break;
1187 if (isPreISelGenericFloatingPointOpcode(Opc: DefOpc) ||
1188 SrcTy.getSizeInBits() < 32 ||
1189 getRegBank(Reg: VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1190 // Have a floating point op.
1191 // Make sure every operand gets mapped to a FPR register class.
1192 unsigned NumOperands = MI.getNumOperands();
1193 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1194 OpRegBankIdx[Idx] = PMI_FirstFPR;
1195 }
1196 break;
1197 }
1198 case TargetOpcode::G_VECREDUCE_FADD:
1199 case TargetOpcode::G_VECREDUCE_FMUL:
1200 case TargetOpcode::G_VECREDUCE_FMAX:
1201 case TargetOpcode::G_VECREDUCE_FMIN:
1202 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1203 case TargetOpcode::G_VECREDUCE_FMINIMUM:
1204 case TargetOpcode::G_VECREDUCE_ADD:
1205 case TargetOpcode::G_VECREDUCE_MUL:
1206 case TargetOpcode::G_VECREDUCE_AND:
1207 case TargetOpcode::G_VECREDUCE_OR:
1208 case TargetOpcode::G_VECREDUCE_XOR:
1209 case TargetOpcode::G_VECREDUCE_SMAX:
1210 case TargetOpcode::G_VECREDUCE_SMIN:
1211 case TargetOpcode::G_VECREDUCE_UMAX:
1212 case TargetOpcode::G_VECREDUCE_UMIN:
1213 // Reductions produce a scalar value from a vector, the scalar should be on
1214 // FPR bank.
1215 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1216 break;
1217 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1218 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1219 // These reductions also take a scalar accumulator input.
1220 // Assign them FPR for now.
1221 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1222 break;
1223 case TargetOpcode::G_INTRINSIC:
1224 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1225 switch (cast<GIntrinsic>(Val: MI).getIntrinsicID()) {
1226 case Intrinsic::aarch64_neon_fcvtas:
1227 case Intrinsic::aarch64_neon_fcvtau:
1228 case Intrinsic::aarch64_neon_fcvtzs:
1229 case Intrinsic::aarch64_neon_fcvtzu:
1230 case Intrinsic::aarch64_neon_fcvtms:
1231 case Intrinsic::aarch64_neon_fcvtmu:
1232 case Intrinsic::aarch64_neon_fcvtns:
1233 case Intrinsic::aarch64_neon_fcvtnu:
1234 case Intrinsic::aarch64_neon_fcvtps:
1235 case Intrinsic::aarch64_neon_fcvtpu: {
1236 OpRegBankIdx[2] = PMI_FirstFPR;
1237 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector()) {
1238 OpRegBankIdx[0] = PMI_FirstFPR;
1239 break;
1240 }
1241 TypeSize DstSize = getSizeInBits(Reg: MI.getOperand(i: 0).getReg(), MRI, TRI);
1242 TypeSize SrcSize = getSizeInBits(Reg: MI.getOperand(i: 2).getReg(), MRI, TRI);
1243 if (((DstSize == SrcSize) || STI.hasFeature(Feature: AArch64::FeatureFPRCVT)) &&
1244 all_of(Range: MRI.use_nodbg_instructions(Reg: MI.getOperand(i: 0).getReg()),
1245 P: [&](const MachineInstr &UseMI) {
1246 return onlyUsesFP(MI: UseMI, MRI, TRI) ||
1247 prefersFPUse(MI: UseMI, MRI, TRI);
1248 }))
1249 OpRegBankIdx[0] = PMI_FirstFPR;
1250 else
1251 OpRegBankIdx[0] = PMI_FirstGPR;
1252 break;
1253 }
1254 case Intrinsic::aarch64_neon_vcvtfxs2fp:
1255 case Intrinsic::aarch64_neon_vcvtfxu2fp:
1256 case Intrinsic::aarch64_neon_vcvtfp2fxs:
1257 case Intrinsic::aarch64_neon_vcvtfp2fxu:
1258 // Override these intrinsics, because they would have a partial
1259 // mapping. This is needed for 'half' types, which otherwise don't
1260 // get legalised correctly.
1261 OpRegBankIdx[0] = PMI_FirstFPR;
1262 OpRegBankIdx[2] = PMI_FirstFPR;
1263 // OpRegBankIdx[1] is the intrinsic ID.
1264 // OpRegBankIdx[3] is an integer immediate.
1265 break;
1266 default: {
1267 // Check if we know that the intrinsic has any constraints on its register
1268 // banks. If it does, then update the mapping accordingly.
1269 unsigned Idx = 0;
1270 if (onlyDefinesFP(MI, MRI, TRI))
1271 for (const auto &Op : MI.defs()) {
1272 if (Op.isReg())
1273 OpRegBankIdx[Idx] = PMI_FirstFPR;
1274 ++Idx;
1275 }
1276 else
1277 Idx += MI.getNumExplicitDefs();
1278
1279 if (onlyUsesFP(MI, MRI, TRI))
1280 for (const auto &Op : MI.explicit_uses()) {
1281 if (Op.isReg())
1282 OpRegBankIdx[Idx] = PMI_FirstFPR;
1283 ++Idx;
1284 }
1285 break;
1286 }
1287 }
1288 break;
1289 }
1290 }
1291
1292 // Finally construct the computed mapping.
1293 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1294 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1295 if (MI.getOperand(i: Idx).isReg() && MI.getOperand(i: Idx).getReg()) {
1296 LLT Ty = MRI.getType(Reg: MI.getOperand(i: Idx).getReg());
1297 if (!Ty.isValid())
1298 continue;
1299 auto Mapping =
1300 getValueMapping(RBIdx: OpRegBankIdx[Idx], Size: TypeSize::getFixed(ExactSize: OpSize[Idx]));
1301 if (!Mapping->isValid())
1302 return getInvalidInstructionMapping();
1303
1304 OpdsMapping[Idx] = Mapping;
1305 }
1306 }
1307
1308 return getInstructionMapping(ID: MappingID, Cost, OperandsMapping: getOperandsMapping(OpdsMapping),
1309 NumOperands);
1310}
1311